def D_asm(self, runtime): util.load_func(runtime, "fast_acos_ss", "fast_cos_ss", "fast_tan_ss", "fast_exp_ss") # eax pointer to hitpoint # ebx pointer to H (half vector) name = "DistBeck" + str(hash(self)) ASM = """ #DATA """ ASM += "float " + name + "m \n" ASM += "float " + name + "minus_one = -1.0 \n" ASM += "float " + name + "alpha \n" ASM += "float " + name + "temp \n" ASM += "#CODE \n" ASM += "macro dot xmm0 = eax.hitpoint.normal * ebx \n" ASM += "call fast_acos_ss \n" ASM += "macro eq32 " + name + "alpha = xmm0 \n" ASM += "call fast_tan_ss \n" ASM += "macro eq32 xmm0 = xmm0 / " + name + "m \n" ASM += "macro eq32 xmm0 = xmm0 * xmm0 \n" ASM += "macro eq32 xmm0 = xmm0 * " + name + "minus_one \n" ASM += "call fast_exp_ss \n" ASM += "macro eq32 " + name + "temp = xmm0 \n" ASM += "macro eq32 xmm0 = " + name + "alpha \n" ASM += "call fast_cos_ss \n" ASM += "macro eq32 xmm0 = xmm0 * xmm0 \n" ASM += "macro eq32 xmm1 = xmm0 * xmm0 \n" ASM += "macro eq32 xmm1 = xmm1 * " + name + "m \n" ASM += "macro eq32 xmm1 = xmm1 * " + name + "m \n" ASM += "macro eq32 xmm0 = " + name + "temp \n" ASM += "macro eq32 xmm0 = xmm0 / xmm1 \n" return ASM
def D_asm(self, runtime): util.load_func(runtime, "fast_acos_ss", "fast_cos_ss", "fast_tan_ss", "fast_exp_ss") #eax pointer to hitpoint #ebx pointer to H (half vector) name = "DistBeck" + str(hash(self)) ASM = """ #DATA """ ASM += "float " + name + "m \n" ASM += "float " + name + "minus_one = -1.0 \n" ASM += "float " + name + "alpha \n" ASM += "float " + name + "temp \n" ASM += "#CODE \n" ASM += "macro dot xmm0 = eax.hitpoint.normal * ebx \n" ASM += "call fast_acos_ss \n" ASM += "macro eq32 " + name + "alpha = xmm0 \n" ASM += "call fast_tan_ss \n" ASM += "macro eq32 xmm0 = xmm0 / " + name + "m \n" ASM += "macro eq32 xmm0 = xmm0 * xmm0 \n" ASM += "macro eq32 xmm0 = xmm0 * " + name + "minus_one \n" ASM += "call fast_exp_ss \n" ASM += "macro eq32 " + name + "temp = xmm0 \n" ASM += "macro eq32 xmm0 = " + name + "alpha \n" ASM += "call fast_cos_ss \n" ASM += "macro eq32 xmm0 = xmm0 * xmm0 \n" ASM += "macro eq32 xmm1 = xmm0 * xmm0 \n" ASM += "macro eq32 xmm1 = xmm1 * " + name + "m \n" ASM += "macro eq32 xmm1 = xmm1 * " + name + "m \n" ASM += "macro eq32 xmm0 = " + name + "temp \n" ASM += "macro eq32 xmm0 = xmm0 / xmm1 \n" return ASM
def light_sample_asm(self, runtime): util.load_func(runtime, "random") #eax - pointer to hitpoint structure asm_structs = util.structs("hitpoint") ASM = """ #DATA """ ASM += asm_structs + """ float normal[4] float edge_a[4] float edge_b[4] float point[4] float pdf uint32 hp_ptr #CODE mov dword [hp_ptr], eax call random macro eq128 xmm1 = xmm0 macro broadcast xmm0 = xmm0[0] macro broadcast xmm1 = xmm1[1] macro eq128 xmm0 = xmm0 * edge_a {xmm1} macro eq128 xmm1 = xmm1 * edge_b {xmm0} macro eq128 xmm0 = xmm0 + point {xmm1} macro eq128 xmm0 = xmm0 + xmm1 mov eax, dword [hp_ptr] macro eq128 eax.hitpoint.light_sample = xmm0 macro eq128 eax.hitpoint.light_normal = normal macro eq32 eax.hitpoint.light_pdf = pdf ret """ assembler = util.get_asm() mc = assembler.assemble(ASM, True) #mc.print_machine_code() name = "recangle_sample" + str(util.unique()) self.ds = runtime.load(name, mc) self._populate_ds() #FIXME - add method to runtime class so we can ask runtime for address of module self.sample_ptr = runtime.modules[name][0]
def brdf_asm(self, runtime): util.load_func(runtime, "fast_pow_ss") # eax pointer to hitpoint name = "phong" + str(hash(self)) ASM = """ #DATA """ ASM += "float " + name + "spectrum[4] \n" ASM += "float " + name + "k[4] \n" ASM += "float " + name + "zero_spectrum[4] = 0.0, 0.0, 0.0, 0.0 \n" ASM += "float " + name + "e\n" ASM += "float " + name + "two = 2.0 \n" ASM += "uint32 " + name + "hp_ptr \n" ASM += "#CODE \n" ASM += "mov dword [" + name + "hp_ptr], eax \n" ASM += "macro eq32 xmm0 = " + name + "two * eax.hitpoint.ndotwi \n" ASM += "macro broadcast xmm0 = xmm0[0] \n" ASM += "macro eq128 xmm0 = xmm0 * eax.hitpoint.normal\n" ASM += "macro eq128 xmm0 = xmm0 - eax.hitpoint.wi \n" ASM += "macro dot xmm0 = xmm0 * eax.hitpoint.wo \n" ASM += "macro if xmm0 > " + name + "zero_spectrum goto " + name + "accept \n" ASM += "macro eq128 xmm0 = " + name + "zero_spectrum \n" ASM += "jmp " + name + "end \n" ASM += name + "accept:\n" ASM += "macro eq32 xmm1 = " + name + "e\n" ASM += "call fast_pow_ss \n" ASM += "macro broadcast xmm0 = xmm0[0] \n" ASM += "macro eq128 xmm0 = xmm0 *" + name + "spectrum \n" ASM += "macro eq32 xmm1 = eax.hitpoint.ndotwi \n" ASM += "macro broadcast xmm1 = xmm1[0] \n" ASM += "macro eq128 xmm0 = xmm0 / xmm1 \n" if self.k is not None: ASM += "macro eq128 xmm0 = xmm0 * " + name + "k\n" ASM += name + "end: \n" return ASM
def brdf_asm(self, runtime): util.load_func(runtime, "fast_pow_ss") #eax pointer to hitpoint name = "phong" + str(hash(self)) ASM = """ #DATA """ ASM += "float " + name + "spectrum[4] \n" ASM += "float " + name + "k[4] \n" ASM += "float " + name + "zero_spectrum[4] = 0.0, 0.0, 0.0, 0.0 \n" ASM += "float " + name + "e\n" ASM += "float " + name + "two = 2.0 \n" ASM += "uint32 " + name + "hp_ptr \n" ASM += "#CODE \n" ASM += "mov dword [" + name + "hp_ptr], eax \n" ASM += "macro eq32 xmm0 = " + name + "two * eax.hitpoint.ndotwi \n" ASM += "macro broadcast xmm0 = xmm0[0] \n" ASM += "macro eq128 xmm0 = xmm0 * eax.hitpoint.normal\n" ASM += "macro eq128 xmm0 = xmm0 - eax.hitpoint.wi \n" ASM += "macro dot xmm0 = xmm0 * eax.hitpoint.wo \n" ASM += "macro if xmm0 > " + name + "zero_spectrum goto " + name + "accept \n" ASM += "macro eq128 xmm0 = " + name + "zero_spectrum \n" ASM += "jmp " + name + "end \n" ASM += name + "accept:\n" ASM += "macro eq32 xmm1 = " + name + "e\n" ASM += "call fast_pow_ss \n" ASM += "macro broadcast xmm0 = xmm0[0] \n" ASM += "macro eq128 xmm0 = xmm0 *" + name + "spectrum \n" ASM += "macro eq32 xmm1 = eax.hitpoint.ndotwi \n" ASM += "macro broadcast xmm1 = xmm1[0] \n" ASM += "macro eq128 xmm0 = xmm0 / xmm1 \n" if self.k is not None: ASM += "macro eq128 xmm0 = xmm0 * " + name + "k\n" ASM += name + "end: \n" return ASM
def brdf_asm(self, runtime): #eax pointer to hitpoint name = "oren" + str(hash(self)) util.load_func(runtime, "fast_acos_ps", "fast_sin_ss", "fast_tan_ss") ASM = """ #DATA """ ASM += "float " + name + "spectrum[4] \n" ASM += "float " + name + "k[4] \n" ASM += "float " + name + "zero[4] = 0.0, 0.0, 0.0, 0.0 \n" ASM += "float " + name + "A \n" ASM += "float " + name + "B \n" ASM += "float " + name + "alpha \n" ASM += "float " + name + "beta \n" ASM += "uint32 " + name + "hp_ptr \n" ASM += "#CODE \n" ASM += "mov dword [" + name + "hp_ptr], eax \n" ASM += "macro dot xmm0 = eax.hitpoint.normal * eax.hitpoint.wo \n" ASM += "macro eq32 xmm1 = eax.hitpoint.ndotwi \n" ASM += "movlhps xmm0, xmm1 \n" ASM += "call fast_acos_ps \n" ASM += "movhlps xmm1, xmm0 \n" ASM += "macro eq32 xmm2 = xmm0 \n" ASM += "macro eq32 xmm3 = xmm1 \n" ASM += "minss xmm0, xmm1 \n" # _beta ASM += "maxss xmm2, xmm3 \n" # _alpha ASM += "macro eq32 " + name + "alpha = xmm2 \n" ASM += "call fast_tan_ss \n" ASM += "macro eq32 " + name + "beta = xmm0 \n" ASM += "macro eq32 xmm0 = " + name + "alpha \n" ASM += "call fast_sin_ss \n" ASM += "macro eq32 xmm0 = xmm0 * " + name + "beta \n" ASM += "macro eq32 " + name + "alpha = xmm0 \n" # sin(alpha) * tan(beta) ASM += "mov eax, dword [" + name + "hp_ptr]\n" ASM += "macro dot xmm0 = eax.hitpoint.normal * eax.hitpoint.wo \n" ASM += "macro broadcast xmm0 = xmm0[0] \n" ASM += "macro eq128 xmm0 = xmm0 * eax.hitpoint.normal \n" ASM += "macro eq128 xmm1 = eax.hitpoint.wo \n" ASM += "macro eq128 xmm1 = xmm1 - xmm0 \n" ASM += util.normalization("xmm1", "xmm4", "xmm5") #v1 ASM += "macro eq32 xmm2 = eax.hitpoint.ndotwi \n" ASM += "macro broadcast xmm2 = xmm2[0] \n" ASM += "macro eq128 xmm2 = xmm2 * eax.hitpoint.normal {xmm1} \n" ASM += "macro eq128 xmm3 = eax.hitpoint.wi \n" ASM += "macro eq128 xmm3 = xmm3 - xmm2 {xmm1} \n" ASM += util.normalization("xmm3", "xmm5", "xmm6") #v2 ASM += "macro dot xmm3 = xmm3 * xmm1 \n" ASM += "maxss xmm3, dword [" + name + "zero] \n" ASM += "macro eq32 xmm3 = xmm3 * " + name + "alpha \n" ASM += "macro eq32 xmm3 = xmm3 * " + name + "B \n" ASM += "macro eq32 xmm3 = xmm3 + " + name + "A \n" ASM += "macro broadcast xmm0 = xmm3[0] \n" ASM += "macro eq128 xmm0 = xmm0 * " + name + "spectrum \n" if self.k is not None: ASM += "macro eq128 xmm0 = xmm0 * " + name + "k\n" return ASM
def get_sample_asm(self, runtime): # eax - pointer to hitpoint asm_structs = renmas.utils.structs("hitpoint") util.load_func(runtime, "random") util.load_func(runtime, "fast_sincos_ps") if util.AVX: line1 = "vmovss xmm1, dword [ecx + 4*ebx] \n" else: line1 = "movss xmm1, dword [ecx + 4*ebx] \n" ASM = """ #DATA """ ASM += asm_structs + """ float ie[4] float pi[4] = 3.14159265359, 3.14159265359, 3.14159265359, 3.14159265359 float one[4] = 1.0, 1.0, 1.0, 1.0 float two[4] = 2.0, 2.0, 2.0, 2.0 float tvector[4] = 0.0034, 1.0, 0.0071, 0.0 float pu[4] float pv[4] float pw[4] uint32 ptr_hp uint32 idx = 0 #CODE """ ASM += """ mov dword [ptr_hp], eax sub dword [idx], 1 js _calculate_samples _gen_direction: mov eax, dword [ptr_hp] macro eq128 xmm1 = eax.hitpoint.normal macro eq128 xmm7 = xmm1 macro eq128 xmm0 = tvector """ ASM += util.cross_product("xmm0", "xmm1", "xmm2", "xmm3") ASM += util.normalization("xmm0", "xmm1", "xmm2") + """ macro eq128 xmm1 = xmm7 macro eq128 xmm6 = xmm0 """ ASM += util.cross_product("xmm0", "xmm1", "xmm2", "xmm3") ASM += """ mov ebx, dword [idx] mov ecx, pu ; in line we load pu, pv or pw """ ASM += line1 + """ macro broadcast xmm1 = xmm1[0] macro eq128 xmm0 = xmm0 * xmm1 {xmm6, xmm7} mov ecx, pv """ ASM += line1 + """ macro broadcast xmm1 = xmm1[0] macro eq128 xmm6 = xmm6 * xmm1 {xmm0, xmm7} macro eq128 xmm0 = xmm0 + xmm6 mov ecx, pw """ ASM += line1 + """ macro broadcast xmm1 = xmm1[0] macro eq128 xmm1 = xmm1 * xmm7 {xmm0} macro eq128 xmm0 = xmm0 + xmm1 {xmm7} """ ASM += util.normalization("xmm0", "xmm1", "xmm2") + """ macro eq128 eax.hitpoint.wi = xmm0 macro dot xmm0 = xmm0 * xmm7 macro eq32 eax.hitpoint.ndotwi = xmm0 ret _calculate_samples: call random """ if self.e == 1: if util.AVX: ASM += "vsqrtps xmm0, xmm0 \n" else: ASM += "sqrtps xmm0, xmm0 \n" else: util.load_func(runtime, "fast_pow_ps") ASM += "macro eq128 xmm1 = ie \n" ASM += "call fast_pow_ps \n" ASM += """ macro eq128 pw = xmm0 macro eq128 xmm0 = xmm0 * xmm0 macro eq128 xmm1 = one - xmm0 """ if util.AVX: ASM += "vsqrtps xmm0, xmm1 \n" else: ASM += "sqrtps xmm0, xmm1 \n" ASM += """ macro eq128 pu = xmm0 macro eq128 pv = xmm0 call random macro eq128 xmm0 = xmm0 * pi macro eq128 xmm0 = xmm0 * two call fast_sincos_ps macro eq128 xmm0 = xmm0 * pv {xmm6} macro eq128 xmm6 = xmm6 * pu {xmm0} macro eq128 pv = xmm0 macro eq128 pu = xmm6 mov dword [idx], 3 jmp _gen_direction """ assembler = util.get_asm() mc = assembler.assemble(ASM, True) #mc.print_machine_code() name = "brdf_hemisphere" + str(util.unique()) self.ds = runtime.load(name, mc) self.ds["ie"] = (self.ie, self.ie, self.ie, self.ie) #FIXME - add method to runtime class so we can ask runtime for address of module self.func_ptr = runtime.modules[name][0]
def _ray_tri_asm(self, runtime, label): util.load_func(runtime, "ray_triangle_mesh") asm_structs = util.structs("ray", "hitpoint") ASM = """ #DATA """ ASM += asm_structs + """ float min_dist = 999999.0 float max_dist = 999999.0 float zero = 0.0 float one = 1.0 float epsilon = 0.00001 float minus_nesto = 0.0001 ; pointer to vertex and triangle buffers uint32 vb_ptr uint32 tr_ptr uint32 vertices_size uint32 triangle_size #CODE """ ASM += " global " + label + ":\n" + """ ; eax - ray, ebx - hp, ecx - min_dist, esi - ptr_arr, edi - nobj ; 64-bit version will bi i little different beacuse of different size of array macro eq32 min_dist = max_dist + one mov ecx, min_dist push ecx push eax push ebx push esi push edi mov edx, dword [minus_nesto] mov dword [ebx + hitpoint.t], edx _objects_loop: mov eax, dword [esp + 12] ; address of ray mov ecx, dword [esp + 16] ; address of minimum distance mov edx, dword [esp + 8] ; address of hitpoint mov esi, dword [esp + 4] ; array of indexes of triangles mov ebx, dword [esi] ; put index of triangle in ebx ; prepeare call - address of parameters ;addres of points, normal a value of material index ;addr = self.address.ptr() + index * self.tri_size imul ebx, dword [triangle_size] add ebx, dword [tr_ptr] ; trbuffer tr_ptr=v0, tr_ptr+4=v1, tr_ptr+8=v2, tr_ptr+12=mat_idx , tr_ptr+16=normal mov ebp, dword [ebx + 12] push ebp mov ebp, ebx add ebp, 16 push ebp mov ebp, dword [ebx + 8] imul ebp, dword [vertices_size] add ebp, dword [vb_ptr] push ebp mov ebp, dword [ebx + 4] imul ebp, dword [vertices_size] add ebp, dword [vb_ptr] push ebp mov ebp, dword [ebx] imul ebp, dword [vertices_size] add ebp, dword [vb_ptr] push ebp call ray_triangle_mesh add esp, 20 cmp eax, 0 ; 0 - no intersection ocur 1 - intersection ocur jne _update_distance _next_object: sub dword [esp], 1 jz _end_objects add dword [esp + 4], 4 ;increment array by 4 - index of triangle jmp _objects_loop _update_distance: mov eax, dword [esp + 8] mov ebx, dword [eax + hitpoint.t] mov edx, dword [esp + 16] ;populate new minimum distance mov dword [edx], ebx jmp _next_object _end_objects: add esp, 20 macro eq32 xmm0 = min_dist macro if xmm0 < max_dist goto _accept mov eax, 0 ret _accept: macro if xmm0 < epsilon goto _reject mov eax, 1 ret _reject: mov eax, 0 ret """ asm = util.get_asm() mc = asm.assemble(ASM, True) #mc.print_machine_code() name = "ray_tri_intersection" + str(util.unique()) self.ds = runtime.load(name, mc) self.ds['vertices_size'] = self.vertex_buffer.vsize() self.ds['triangle_size'] = self.triangles.tsize() self.ds['vb_ptr'] = self.vertex_buffer.addr() self.ds['tr_ptr'] = self.triangles.addr()
def get_sample_asm(self, runtime, label): # eax - pointer to sample structure util.load_func(runtime, "random") if util.AVX: line1 = "vcvtdq2ps xmm4, xmm4 \n" else: line1 = "cvtdq2ps xmm4, xmm4 \n" asm_structs = util.structs("sample") code = """ #DATA """ code += asm_structs + """ uint32 n, curn uint32 tile_endx, tile_endy uint32 tilex, tiley uint32 cur_xyxy[4] ; we just use first two numbers for now float pixel_size[4] float w2h2[4] #CODE """ code += " global " + label + ":\n" + """ cmp dword [curn], 0 jbe _next_pixel ; calculate sample call random ; random number is in xmm0 macro eq128 xmm4 = cur_xyxy {xmm0} """ code += line1 + """ macro eq128_128 xmm1 = pixel_size, xmm2 = w2h2 {xmm0} macro eq128 xmm3 = xmm4 + xmm2 {xmm1, xmm0} macro eq128 xmm3 = xmm3 + xmm0 {xmm1} mov ebx, dword [cur_xyxy] mov ecx, dword [cur_xyxy + 4] macro eq128 eax.sample.xyxy = xmm3 * xmm1 mov dword [eax + sample.ix] ,ebx mov dword [eax + sample.iy] ,ecx sub dword [curn], 1 mov eax, 1 ret _next_pixel: mov edx, dword [n] ; self.curn = self.n - 1 sub edx, 1 mov dword [curn], edx mov ebx, dword [cur_xyxy] cmp ebx, dword [tile_endx] je _checky ; increase curx add ebx, 1 mov dword [cur_xyxy], ebx ; calculate sample call random macro eq128 xmm4 = cur_xyxy {xmm0} """ code += line1 + """ macro eq128_128 xmm1 = pixel_size, xmm2 = w2h2 {xmm0} macro eq128 xmm3 = xmm4 + xmm2 {xmm1, xmm0} macro eq128 xmm3 = xmm3 + xmm0 {xmm1} mov ecx, dword [cur_xyxy + 4] macro eq128 eax.sample.xyxy = xmm3 * xmm1 mov dword [eax + sample.ix] ,ebx mov dword [eax + sample.iy] ,ecx mov eax, 1 ret _checky: mov ecx, dword [cur_xyxy + 4] cmp ecx, dword [tile_endy] je _end_sampling ; increase cury add ecx, 1 mov ebx, dword [tilex] mov dword [cur_xyxy+ 4], ecx mov dword [cur_xyxy], ebx call random macro eq128 xmm4 = cur_xyxy {xmm0} """ code += line1 + """ macro eq128_128 xmm1 = pixel_size, xmm2 = w2h2 {xmm0} macro eq128 xmm3 = xmm4 + xmm2 {xmm1, xmm0} macro eq128 xmm3 = xmm3 + xmm0 {xmm1} macro eq128 eax.sample.xyxy = xmm3 * xmm1 mov dword [eax + sample.ix] ,ebx mov dword [eax + sample.iy] ,ecx mov eax, 1 ret _end_sampling: xor eax, eax ret """ assembler = util.get_asm() mc = assembler.assemble(code, True) #mc.print_machine_code() name = "get_sample" + str(util.unique()) self.ds = runtime.load(name, mc) self._populate_ds() return True