def le_asm(self, runtime): asm_structs = util.structs("hitpoint") #eax pointer to hitpoint ASM = """ #DATA """ ASM += asm_structs + """ float zero_spectrum[4] = 0.0, 0.0, 0.0, 0.0 #CODE """ if self.emiter is None: ASM += "macro eq128 eax.hitpoint.le = zero_spectrum" else: ASM += self.emiter.le_asm(runtime) ASM += """ ret """ assembler = util.get_asm() mc = assembler.assemble(ASM, True) #mc.print_machine_code() name = "material_le" + str(util.unique()) self.emiter_ds = runtime.load(name, mc) if self.emiter is not None: self.emiter.populate_ds(self.emiter_ds) self.le_ptr = runtime.address_module(name)
def ray_asm(self, runtime, label): asm_structs = util.structs("ray", "sample") ASM = """ #DATA """ if util.AVX: code = """ vdpps xmm5, xmm4, xmm4, 0x7F vsqrtps xmm5, xmm5 """ elif util.SSE41: code = """ movaps xmm5, xmm4 dpps xmm5, xmm5, 0x7F sqrtps xmm5, xmm5 """ else: code = """ macro dot xmm5 = xmm4 * xmm4 macro broadcast xmm5 = xmm5[0] sqrtps xmm5, xmm5 """ # eax pointer to ray structure # ebx pointer to sample structure ASM += asm_structs + """ float u[4] float v[4] float wdistance[4] float eye[4] #CODE """ ASM += " global " + label + ":\n" + """ macro eq128 xmm0 = ebx.sample.xyxy macro broadcast xmm1 = xmm0[0] macro eq128 xmm2 = xmm1 * u {xmm0} macro broadcast xmm0 = xmm0[1] macro eq128 xmm3 = xmm0 * v {xmm2} macro eq128 xmm4 = xmm2 + xmm3 macro eq128 xmm4 = xmm4 - wdistance """ ASM += code + """ macro eq128 xmm4 = xmm4 / xmm5 macro eq128_128 eax.ray.dir = xmm4, eax.ray.origin = eye ret """ assembler = util.get_asm() mc = assembler.assemble(ASM, True) #mc.print_machine_code() name = "generate_ray" + str(util.unique()) self.ds = runtime.load(name, mc) self._populate_ds()
def struct(cls): asm_code = """ #DATA """ asm_code += util.structs("triangle") asm_code += """ #CODE #END """ mc = util.get_asm().assemble(asm_code) return mc.get_struct("triangle")
def struct(cls): asm_code = """ #DATA """ asm_code += util.structs("grid") asm_code += """ #CODE #END """ mc = util.get_asm().assemble(asm_code) return mc.get_struct("grid")
def next_direction_asm(self, runtime): self.sampling[0].get_sample_asm(runtime) f_ptr = self.sampling[0].func_ptr asm_structs = util.structs("hitpoint") #eax pointer to hitpoint ASM = """ #DATA """ ASM += asm_structs + """ float zero = 0.0 float pdf float inv_c uint32 sampling_ptr uint32 brdf_ptr uint32 hp_ptr #CODE mov dword [hp_ptr], eax ;save pointer to hitpoint ; call get sample call dword [sampling_ptr] ; init pdf with zero - calculate pdf we can have multiple sampler mov eax, dword [zero] mov dword [pdf], eax mov eax, dword [hp_ptr] """ for s in self.sampling: ASM += s.pdf_asm() ASM += """ macro eq32 pdf = pdf + xmm0 mov eax, dword [hp_ptr] """ ASM += """ macro eq32 xmm0 = pdf * inv_c macro eq32 eax.hitpoint.pdf = xmm0 call dword [brdf_ptr] ret """ assembler = util.get_asm() mc = assembler.assemble(ASM, True) #mc.print_machine_code() name = "material_dir" + str(util.unique()) ds = runtime.load(name, mc) ds["inv_c"] = 1.0 / len(self.sampling) ds["sampling_ptr"] = f_ptr ds["brdf_ptr"] = self.func_ptr self.sampling_brdf_ptr = runtime.address_module(name)
def isect_asm(cls, runtime, label, populate=True): asm_structs = util.structs("ray", "plane", "hitpoint") ASM = """ #DATA float epsilon = 0.0001 """ ASM += asm_structs + """ ;eax = pointer to ray structure ;ebx = pointer to plane structure ;ecx = pointer to minimum distance ;edx = pointer to hitpoint #CODE """ ASM += " global " + label + ":\n" + """ macro eq128 xmm0 = ebx.plane.normal macro dot xmm1 = eax.ray.dir * xmm0 macro eq128 xmm2 = ebx.plane.point - eax.ray.origin {xmm0, xmm1} macro dot xmm3 = xmm2 * xmm0 {xmm1} macro eq32 xmm4 = xmm3 / xmm1 macro if xmm4 > epsilon goto populate_hitpoint mov eax, 0 ret populate_hitpoint: ; in ecx is minimum distance macro if xmm4 > ecx goto _reject """ if populate: ASM += """ macro broadcast xmm5 = xmm4[0] macro eq128_128 edx.hitpoint.normal = ebx.plane.normal, xmm6 = xmm5 * eax.ray.dir macro eq32 edx.hitpoint.t = xmm4 {xmm6} macro eq32_128 edx.hitpoint.mat_index = ebx.plane.mat_index, edx.hitpoint.hit = xmm6 + eax.ray.origin """ ASM += """ mov eax, 1 ret _reject: mov eax, 0 ret """ assembler = util.get_asm() mc = assembler.assemble(ASM, True) #mc.print_machine_code() name = "ray_plane_isect" + str(util.unique()) runtime.load(name, mc)
def intersect_ray_shape_array(name_struct, runtime, lbl_arr_intersect, lbl_ray_intersect): asm_structs = util.structs("ray", name_struct, "hitpoint") ASM = """ #DATA """ ASM += asm_structs + """ #CODE """ ASM += " global " + lbl_arr_intersect + ":\n" + """ ; eax - ray, ebx - hp , ecx - min_dist, esi - ptr_planes, edi - nplanes push ecx push eax push ebx push esi push edi _objects_loop: mov eax, dword [esp + 12] ; mov eax, ray mov ebx, dword [esp + 4] ; mov ebx, plane mov ecx, dword [esp + 16]; address of minimum distance mov edx, dword [esp + 8] ; mov edx, hp """ ASM += " call " + lbl_ray_intersect + "\n" + """ cmp eax, 0 ; 0 - no intersection ocur je _next_object mov eax, dword [esp + 8] mov ebx, dword [eax + hitpoint.t] mov edx, dword [esp + 16] ;populate new minimum distance mov dword [edx], ebx _next_object: sub dword [esp], 1 jz _end_objects """ ASM += "add dword [esp + 4], sizeof " + name_struct + "\n" + """ jmp _objects_loop _end_objects: add esp, 20 ret """ assembler = util.get_asm() mc = assembler.assemble(ASM, True) #mc.print_machine_code() runtime.load(lbl_arr_intersect, mc)
def brdf_asm(self, runtime): asm_structs = util.structs("hitpoint") #eax pointer to hitpoint ASM = """ #DATA """ ASM += asm_structs + """ float zero_spectrum[4] = 0.0, 0.0, 0.0, 0.0 float spectrum[4] uint32 hp_ptr #CODE mov dword [hp_ptr], eax ;save pointer to hitpoint macro eq128 spectrum = zero_spectrum """ for c in self.components: ASM += c.brdf_asm(runtime) #in xmm0 is spectrum from component so we acumulate spectrum ASM += """ macro eq128 spectrum = spectrum + xmm0 mov eax, dword [hp_ptr] """ ASM += """ mov eax, dword [hp_ptr] macro eq128 eax.hitpoint.brdf = spectrum ret """ #print(ASM) assembler = util.get_asm() mc = assembler.assemble(ASM, True) #mc.print_machine_code() name = "material" + str(util.unique()) self.ds = runtime.load(name, mc) self.func_ptr = runtime.address_module(name) for c in self.components: c.populate_ds(self.ds) self.next_direction_asm(runtime)
def isect_asm(self, runtime, label, populate=True): asm_structs = util.structs("ray", "mesh3d", "hitpoint") ASM = """ #DATA """ ASM += asm_structs + """ #CODE """ ASM += " global " + label + ":\n" + """ ;we just use indirect call call dword [ebx + mesh3d.ptr_isect] ret """ asm = util.get_asm() mc = asm.assemble(ASM, True) #mc.print_machine_code() name = "ray_mesh_intersection" + str(util.unique()) runtime.load(name, mc)
def light_sample_asm(self, runtime): util.load_func(runtime, "random") #eax - pointer to hitpoint structure asm_structs = util.structs("hitpoint") ASM = """ #DATA """ ASM += asm_structs + """ float normal[4] float edge_a[4] float edge_b[4] float point[4] float pdf uint32 hp_ptr #CODE mov dword [hp_ptr], eax call random macro eq128 xmm1 = xmm0 macro broadcast xmm0 = xmm0[0] macro broadcast xmm1 = xmm1[1] macro eq128 xmm0 = xmm0 * edge_a {xmm1} macro eq128 xmm1 = xmm1 * edge_b {xmm0} macro eq128 xmm0 = xmm0 + point {xmm1} macro eq128 xmm0 = xmm0 + xmm1 mov eax, dword [hp_ptr] macro eq128 eax.hitpoint.light_sample = xmm0 macro eq128 eax.hitpoint.light_normal = normal macro eq32 eax.hitpoint.light_pdf = pdf ret """ assembler = util.get_asm() mc = assembler.assemble(ASM, True) #mc.print_machine_code() name = "recangle_sample" + str(util.unique()) self.ds = runtime.load(name, mc) self._populate_ds() #FIXME - add method to runtime class so we can ask runtime for address of module self.sample_ptr = runtime.modules[name][0]
def isect_asm(cls, runtime, label, populate=True): asm_structs = util.structs("ray", "grid", "hitpoint") multiple_isect_asm(runtime, "multiple_isect") ASM = """ #DATA """ ASM += asm_structs + """ float one[4] = 1.0, 1.0, 1.0, 0.0 float zero[4] = 0.0, 0.0, 0.0, 0.0 int32 ixyz[4] float dtxyz[4] int32 istep[4] int32 istop[4] float tnext[4] int32 n[4] uint32 ones = 0xFFFFFFFF float khuge = 999999.999 uint32 hp_ptr uint32 ray_ptr uint32 grid_ptr #CODE """ ASM += " global " + label + ":\n" if util.AVX: ASM += AVX_ASM else: ASM += SSE_ASM assembler = util.get_asm() mc = assembler.assemble(ASM, True) #mc.print_machine_code() name = "ray_grid_isect" + str(util.unique()) runtime.load(name, mc)
def prepare_isect_asm(self, runtime): asm_structs = util.structs("ray", "mesh3d", "hitpoint") ray_tri_name = "ray_mesh" + str(hash(self)) + str(util.unique()) grid_mesh_name = "ray_gridmesh" + str(hash(self)) + str(util.unique()) self._ray_tri_asm(runtime, ray_tri_name) self.grid.isect_asm(runtime, grid_mesh_name, ray_tri_name) ASM = """ #DATA """ ASM += asm_structs + """ #CODE """ ASM += "call " + grid_mesh_name + " \n" ASM += "ret \n " asm = util.get_asm() mc = asm.assemble(ASM, True) #mc.print_machine_code() name = "ray_mesh_intersection" + str(util.unique()) runtime.load(name, mc) self.ptr_isect = runtime.address_module(name)
def isect_asm(cls, runtime, label, populate=True): asm_structs = util.structs("ray", "rectangle", "hitpoint") ASM = """ #DATA float epsilon = 0.0001 float zero = 0.0 """ ASM += asm_structs + """ ;eax = pointer to ray structure ;ebx = pointer to rectangle structure ;ecx = pointer to minimum distance ;edx = pointer to hitpoint #CODE """ ASM += " global " + label + ":\n" + """ macro eq128 xmm0 = ebx.rectangle.point - eax.ray.origin macro eq128 xmm1 = ebx.rectangle.normal macro dot xmm0 = xmm0 * xmm1 macro eq128 xmm2 = eax.ray.dir macro dot xmm2 = xmm2 * xmm1 {xmm0} macro eq32 xmm0 = xmm0 / xmm2 {xmm1} macro if xmm0 > epsilon goto _next mov eax, 0 ret _next: macro broadcast xmm0 = xmm0[0] macro eq128 xmm2 = xmm0 * eax.ray.dir {xmm1} macro eq128 xmm2 = xmm2 + eax.ray.origin {xmm0, xmm1} macro eq128 xmm3 = xmm2 - ebx.rectangle.point {xmm0, xmm1} macro eq128 xmm4 = ebx.rectangle.edge_a {xmm0, xmm1, xmm2, xmm3} macro dot xmm4 = xmm4 * xmm3 {xmm0, xmm1, xmm2} macro if xmm4 < zero goto _reject macro eq32 xmm5 = ebx.rectangle.edge_a_squared {xmm0, xmm1, xmm2, xmm3} macro if xmm4 > xmm5 goto _reject macro eq128 xmm4 = ebx.rectangle.edge_b {xmm0, xmm1, xmm2, xmm3} macro dot xmm4 = xmm4 * xmm3 {xmm0, xmm1, xmm2} macro if xmm4 < zero goto _reject macro eq32 xmm5 = ebx.rectangle.edge_b_squared {xmm0, xmm1, xmm2, xmm3} macro if xmm4 > xmm5 goto _reject ; distance checking macro if xmm0 > ecx goto _reject """ if populate: ASM += """ macro eq128 edx.hitpoint.normal = xmm1 macro eq32 edx.hitpoint.t = xmm0 macro eq128 edx.hitpoint.hit = xmm2 macro eq32 edx.hitpoint.mat_index = ebx.rectangle.mat_index """ ASM += """ mov eax, 1 ret _reject: mov eax, 0 ret """ assembler = util.get_asm() mc = assembler.assemble(ASM, True) #mc.print_machine_code() name = "ray_rectangle_isect" + str(util.unique()) runtime.load(name, mc)
print("Python = (", sample.ix, sample.x, ")", " (", sample.iy, sample.y, ")") print("===============") print("===============") runtime.run("test") print_sample(ds, "sam") runtime.run("test") print_sample(ds, "sam") runtime.run("test") print_sample(ds, "sam") ASM = """ #DATA """ ASM += util.structs("sample") + """ sample sam uint32 have #CODE mov eax, sam call get_sample mov dword [have], eax #END """ def print_sample(ds, name): x, y, temp1, temp2 = ds[name + ".xyxy"] print("ASM = ", "(", ds[name + ".ix"], x, ") (", ds[name + ".iy"], y,
from tdasm import Tdasm, Runtime from renmas.maths import Vector3 from renmas.shapes import Triangle, intersect_ray_shape_array from renmas.core import Ray import random import renmas.utils as util import timeit asm_structs = util.structs("ray", "triangle", "hitpoint") def create_triangle(): p0 = Vector3(0.1, 0.0, -2.0) p1 = Vector3(4.0, 0.5, 0.2) p2 = Vector3(2.2, 4.3, -1.0) tr = Triangle(p0, p1, p2, 3) return tr def create_ray(): origin = Vector3(0.0, 0.0, 0.0) dirx = 0.985906665972 diry = 0.165777376892 dirz = 0.0224923832256 #direction = Vector3(8.8, 8.9, 8.7) direction = Vector3(dirx, diry, dirz) #direction.normalize() ray = Ray(origin, direction) return ray
from tdasm import Tdasm, Runtime import random from renmas.shapes import Sphere, intersect_ray_shape_array from renmas.core import Ray from renmas.maths import Vector3 import renmas.utils as util if util.AVX: line1 = " vsqrtss xmm5, xmm5, xmm5 \n" else: line1 = " sqrtss xmm5, xmm5 \n" asm_structs = util.structs("ray", "sphere", "hitpoint") ASM = """ #DATA """ ASM += asm_structs + """ ray r1 sphere sph hitpoint hp float min_dist = 999999.0 float epsilon = 0.0001 float t uint32 hit float two = 2.0 float minus_four = -4.0 float zero = 0.0 float one = 1.0 float minus_one = -1.0
def get_sample_asm(self, runtime, label): # eax - pointer to sample structure util.load_func(runtime, "random") if util.AVX: line1 = "vcvtdq2ps xmm4, xmm4 \n" else: line1 = "cvtdq2ps xmm4, xmm4 \n" asm_structs = util.structs("sample") code = """ #DATA """ code += asm_structs + """ uint32 n, curn uint32 tile_endx, tile_endy uint32 tilex, tiley uint32 cur_xyxy[4] ; we just use first two numbers for now float pixel_size[4] float w2h2[4] #CODE """ code += " global " + label + ":\n" + """ cmp dword [curn], 0 jbe _next_pixel ; calculate sample call random ; random number is in xmm0 macro eq128 xmm4 = cur_xyxy {xmm0} """ code += line1 + """ macro eq128_128 xmm1 = pixel_size, xmm2 = w2h2 {xmm0} macro eq128 xmm3 = xmm4 + xmm2 {xmm1, xmm0} macro eq128 xmm3 = xmm3 + xmm0 {xmm1} mov ebx, dword [cur_xyxy] mov ecx, dword [cur_xyxy + 4] macro eq128 eax.sample.xyxy = xmm3 * xmm1 mov dword [eax + sample.ix] ,ebx mov dword [eax + sample.iy] ,ecx sub dword [curn], 1 mov eax, 1 ret _next_pixel: mov edx, dword [n] ; self.curn = self.n - 1 sub edx, 1 mov dword [curn], edx mov ebx, dword [cur_xyxy] cmp ebx, dword [tile_endx] je _checky ; increase curx add ebx, 1 mov dword [cur_xyxy], ebx ; calculate sample call random macro eq128 xmm4 = cur_xyxy {xmm0} """ code += line1 + """ macro eq128_128 xmm1 = pixel_size, xmm2 = w2h2 {xmm0} macro eq128 xmm3 = xmm4 + xmm2 {xmm1, xmm0} macro eq128 xmm3 = xmm3 + xmm0 {xmm1} mov ecx, dword [cur_xyxy + 4] macro eq128 eax.sample.xyxy = xmm3 * xmm1 mov dword [eax + sample.ix] ,ebx mov dword [eax + sample.iy] ,ecx mov eax, 1 ret _checky: mov ecx, dword [cur_xyxy + 4] cmp ecx, dword [tile_endy] je _end_sampling ; increase cury add ecx, 1 mov ebx, dword [tilex] mov dword [cur_xyxy+ 4], ecx mov dword [cur_xyxy], ebx call random macro eq128 xmm4 = cur_xyxy {xmm0} """ code += line1 + """ macro eq128_128 xmm1 = pixel_size, xmm2 = w2h2 {xmm0} macro eq128 xmm3 = xmm4 + xmm2 {xmm1, xmm0} macro eq128 xmm3 = xmm3 + xmm0 {xmm1} macro eq128 eax.sample.xyxy = xmm3 * xmm1 mov dword [eax + sample.ix] ,ebx mov dword [eax + sample.iy] ,ecx mov eax, 1 ret _end_sampling: xor eax, eax ret """ assembler = util.get_asm() mc = assembler.assemble(code, True) #mc.print_machine_code() name = "get_sample" + str(util.unique()) self.ds = runtime.load(name, mc) self._populate_ds() return True
def print_regular(reg, runtime, ds): sample = Sample() while True: sam = reg.get_sample(sample) runtime.run("test") if sam is None: break print ("Python = (", sample.ix, sample.x, ")", " (", sample.iy, sample.y, ")") print_sample(ds, "sam") runtime.run("test") print_sample(ds, "sam") ASM = """ #DATA """ ASM += util.structs("sample") + """ sample sam uint32 have #CODE mov eax, sam call get_sample mov dword [have], eax #END """ def print_sample(ds, name): x, y, temp1, temp2 = ds[name + ".xyxy"] print("ASM = ", "(",ds[name+".ix"], x, ") (", ds[name+".iy"], y, ") have =", ds["have"])
def intersect_ray_triangle(runtime, label, populate=True): asm_structs = util.structs("ray", "hitpoint") ASM = """ #DATA float epsilon = 0.00001 float neg_epsilon = -0.00001 float one = 1.0 float zero = 0.0 uint32 mask_abs[4] = 0x7FFFFFFF, 0, 0, 0 float minus_one = -1.0 """ ASM += asm_structs + """ ;eax = pointer to ray structure ;ecx = pointer to minimum distance ;edx = pointer to hitpoint #CODE ;macro eq128_128 xmm0 = ebx.triangle.p1 - ebx.triangle.p0, xmm1 = ebx.triangle.p2 - ebx.triangle.p0 """ ASM += " global " + label + ":\n" if util.AVX: ASM += """ mov ebp, dword [esp + 8] vmovaps xmm0, oword [ebp] mov ebp, dword [esp+4] vsubps xmm0, xmm0, oword [ebp] mov ebp, dword [esp + 12] vmovaps xmm1, oword [ebp] mov ebp, dword [esp+4] vsubps xmm1, xmm1, oword [ebp] """ else: ASM += """ mov ebp, dword [esp + 8] movaps xmm0, oword [ebp] mov ebp, dword [esp+4] subps xmm0, oword [ebp] mov ebp, dword [esp + 12] movaps xmm1, oword [ebp] mov ebp, dword [esp+4] subps xmm1, oword [ebp] """ ASM += """ ; e1 = xmm0 , e2 = xmm1 macro eq128_128 xmm2 = eax.ray.dir, xmm3 = xmm1 {xmm0, xmm1} ; p = d x e2 macro eq128_128 xmm4 = xmm2, xmm5 = xmm3 {xmm0, xmm1} """ if util.AVX: ASM += """ vshufps xmm2, xmm2, xmm2, 0xC9 vshufps xmm3, xmm3, xmm3, 0xD2 macro eq128 xmm2 = xmm2 * xmm3 {xmm0, xmm1} vshufps xmm4, xmm4, xmm4, 0xD2 vshufps xmm5, xmm5, xmm5, 0xC9 """ else: ASM += """ shufps xmm2, xmm2, 0xC9 shufps xmm3, xmm3, 0xD2 macro eq128 xmm2 = xmm2 * xmm3 {xmm0, xmm1} shufps xmm4, xmm4, 0xD2 shufps xmm5, xmm5, 0xC9 """ ASM += """ macro eq128 xmm4 = xmm4 * xmm5 {xmm0, xmm1, xmm2} macro eq128 xmm2 = xmm2 - xmm4 {xmm0, xmm1} macro dot xmm3 = xmm0 * xmm2 {xmm0, xmm1} """ if util.AVX: ASM += "vpabsd xmm4, xmm3 \n" else: ASM += "movaps xmm4, oword [mask_abs] \n" ASM += "andps xmm4, xmm3 \n" ASM += """ macro if xmm4 < epsilon goto reject macro eq32 xmm4 = one / xmm3 {xmm0, xmm1, xmm2, xmm3} ; f = xmm4 ;macro eq128 xmm5 = eax.ray.origin - ebx.triangle.p0 {xmm0, xmm1, xmm2, xmm3, xmm4} """ if util.AVX: ASM += """ mov ebp, dword [esp+4] macro eq128 xmm5 = eax.ray.origin vsubps xmm5, xmm5, oword [ebp] """ else: ASM += """ mov ebp, dword [esp+4] macro eq128 xmm5 = eax.ray.origin subps xmm5, oword [ebp] """ ASM += """ ; s = xmm5 macro dot xmm2 = xmm2 * xmm5 {xmm0, xmm1, xmm3, xmm4} ;s * p(s dot p) = xmm2 macro eq32 xmm6 = xmm4 * xmm2 {xmm0, xmm1, xmm2, xmm3, xmm4, xmm5} macro if xmm6 < zero goto reject macro if xmm6 > one goto reject ; q = s x e1 macro eq128_128 xmm3 = xmm5, xmm7 = xmm0 """ if util.AVX: ASM += """ vshufps xmm5, xmm5, xmm5, 0xC9 vshufps xmm0, xmm0, xmm0, 0xD2 macro eq128 xmm0 = xmm0 * xmm5 vshufps xmm3, xmm3, xmm3, 0xD2 vshufps xmm7, xmm7, xmm7, 0xC9 """ else: ASM += """ shufps xmm5, xmm5, 0xC9 shufps xmm0, xmm0, 0xD2 macro eq128 xmm0 = xmm0 * xmm5 shufps xmm3, xmm3, 0xD2 shufps xmm7, xmm7, 0xC9 """ ASM += """ macro eq128 xmm3 = xmm3 * xmm7 macro eq128 xmm0 = xmm0 - xmm3 macro dot xmm7 = xmm0 * eax.ray.dir {xmm1} macro eq32 xmm7 = xmm7 * xmm4 macro if xmm7 < zero goto reject macro eq32 xmm7 = xmm7 + xmm6 macro if xmm7 > one goto reject macro dot xmm6 = xmm1 * xmm0 macro eq32 xmm6 = xmm6 * xmm4 ;populate hitpoint structure ; t is in xmm6 , t can be negative so we eleminate those macro if xmm6 < zero goto reject macro if xmm6 > ecx goto reject """ if populate: ASM += """ macro eq32 edx.hitpoint.t = xmm6 macro broadcast xmm7 = xmm6[0] ;macro eq128_32 edx.hitpoint.normal = ebx.triangle.normal, edx.hitpoint.mat_index = ebx.triangle.mat_index """ if util.AVX: ASM += """ mov ebp, dword [esp + 16] vmovaps xmm0, oword [ebp] vmovss xmm1, dword [esp + 20] """ else: ASM += """ mov ebp, dword [esp + 16] movaps xmm0, oword [ebp] movss xmm1, dword [esp + 20] """ ASM += """ macro eq128 edx.hitpoint.normal = xmm0 macro eq32 edx.hitpoint.mat_index = xmm1 macro eq128 xmm5 = xmm7 * eax.ray.dir macro eq128 edx.hitpoint.hit = xmm5 + eax.ray.origin """ ASM += """ mov eax, 1 ret reject: mov eax, 0 ret """ assembler = util.get_asm() mc = assembler.assemble(ASM, True) #mc.print_machine_code() name = "ray_triangle_isect" + str(util.unique()) runtime.load(name, mc)
def intersect_ray_triangle_new(runtime, label, populate=True): asm_structs = util.structs("ray", "hitpoint") ASM = """ #DATA float epsilon= 0.00001 float one = 1.0 """ ASM += asm_structs + """ ;eax = pointer to ray structure ;ecx = pointer to minimum distance ;edx = pointer to hitpoint #CODE ;macro eq128_128 xmm0 = ebx.triangle.p1 - ebx.triangle.p0, xmm1 = ebx.triangle.p2 - ebx.triangle.p0 """ ASM += " global " + label + ":\n" ASM += """ mov ebp, dword [esp+4] movaps xmm0, oword [ebp] ;movaps xmm0, oword [ebx + triangle.p0] movaps xmm2, oword [eax + ray.dir] movaps xmm1, xmm0 mov ebp, dword [esp+12] subps xmm1, oword [ebp] ;subps xmm1, oword [ebx + triangle.p2] movaps xmm3, xmm0 subps xmm3, oword [eax + ray.origin] mov ebp, dword [esp+8] subps xmm0, oword [ebp] ;subps xmm0, oword [ebx + triangle.p1] ; f f h f movaps xmm4, xmm1 movlhps xmm4, xmm3 shufps xmm4, xmm4, 01110101B ; k k k l movaps xmm5, xmm2 movhlps xmm5, xmm3 shufps xmm5, xmm5, 00101010B ; f f h f * k k k l movaps xmm7, xmm4 mulps xmm7, xmm5 ; g g g h movaps xmm6, xmm2 movaps xmm4, xmm1 movlhps xmm6, xmm3 movhlps xmm4, xmm3 shufps xmm6, xmm6, 11010101B shufps xmm4, xmm4, 10001010B ; j j l j ; g g g h * j j l j mulps xmm4, xmm6 ; f f h f * k k k l - g g g h * j j l j subps xmm7, xmm4 ; a d a a movaps xmm5, xmm0 movlhps xmm5, xmm3 shufps xmm5, xmm5, 00001000B ; a d a a * (f f h f * k k k l - g g g h * j j l j) mulps xmm7, xmm5 ; i l i i movaps xmm5, xmm0 movhlps xmm5, xmm3 shufps xmm5, xmm5, 10100010B ; g g g h * i l i i mulps xmm6, xmm5 ; e h e e movaps xmm4, xmm0 movlhps xmm4, xmm3 shufps xmm4, xmm4, 01011101B ; k k k l movaps xmm5, xmm2 movhlps xmm5, xmm3 shufps xmm5, xmm5, 00101010B ; e h e e * k k k l mulps xmm5, xmm4 ; g g g h * i l i i - e h e e * k k k l subps xmm6, xmm5 ; b b d b movaps xmm5, xmm1 movlhps xmm5, xmm3 shufps xmm5, xmm5, 00100000B ; b b d b * (g g g h * i l i i - e h e e * k k k l) mulps xmm6, xmm5 addps xmm7, xmm6 ; j j l j movaps xmm5, xmm1 movhlps xmm5, xmm3 shufps xmm5, xmm5, 10001010B ; e e h e * j j l j mulps xmm4, xmm5 ; f f h f movaps xmm6, xmm1 movlhps xmm6, xmm3 shufps xmm6, xmm6, 01110101B ; i l i i movaps xmm5, xmm0 movhlps xmm5, xmm3 shufps xmm5, xmm5, 10100010B ; f f h f * i l i i mulps xmm6, xmm5 ; e h e e * j j l j - f f h f * i l i i subps xmm4, xmm6 ; c c c d movaps xmm5, xmm2 movlhps xmm5, xmm3 shufps xmm5, xmm5, 10000000B ; c c c d * (e h e e * j j l j - f f h f * i l i i) mulps xmm4, xmm5 addps xmm7, xmm4 macro broadcast xmm3 = xmm7[0] divps xmm7, xmm3 movhlps xmm5, xmm7 movaps xmm4, xmm7 shufps xmm4, xmm4, 0x55 movaps xmm6, xmm7 shufps xmm6, xmm6, 0xFF ; xmm7 = d ; xmm6 = td ; xmm5 = gamma ; xmm4 = beta pxor xmm3, xmm3 macro if xmm4 < xmm3 goto _reject macro if xmm5 < xmm3 goto _reject addss xmm4, xmm5 macro if xmm4 > one goto _reject comiss xmm6, dword [epsilon] jc _reject comiss xmm6, dword [ecx] ;minimum distance jnc _reject ;populate hitpoint structure ; t is in xmm6 movaps xmm2, oword [eax + ray.dir] mov ebp, dword [esp + 16] movaps xmm3, oword [ebp] ;movaps xmm3, oword [ebx + triangle.normal] movss xmm4, dword [esp+20] ;movss xmm4, dword [ebx + triangle.mat_index] movss dword [edx + hitpoint.t], xmm6 movaps oword [edx + hitpoint.normal], xmm3 movss dword [edx + hitpoint.mat_index], xmm4 macro broadcast xmm5 = xmm6[0] mulps xmm5, xmm2 macro eq128 edx.hitpoint.hit = xmm5 + eax.ray.origin mov eax, 1 ret _reject: xor eax, eax ret """ assembler = util.get_asm() mc = assembler.assemble(ASM, True) #mc.print_machine_code() name = "ray_triangle_isect" + str(util.unique()) runtime.load(name, mc)
from tdasm import Runtime import renmas.samplers from renmas.camera import PinholeCamera import renmas.interface as ren import renmas.utils as util asm_structs = util.structs("ray", "sample") ASM = """ #DATA """ ASM += asm_structs + """ sample sp1 ray r1 float _xmm[4] #CODE mov eax, r1 mov ebx, sp1 call generate_ray movaps oword [_xmm], xmm4 """ if __name__ == "__main__": runtime = Runtime() asm = util.get_asm() mc = asm.assemble(ASM)
def isect_asm(self, runtime, label, label_ray_tri): asm_structs = util.structs("ray", "mesh3d", "hitpoint") ASM = """ #DATA """ ASM += asm_structs + """ float one[4] = 1.0, 1.0, 1.0, 0.0 float zero[4] = 0.0, 0.0, 0.0, 0.0 int32 ixyz[4] float dtxyz[4] int32 istep[4] int32 istop[4] float tnext[4] int32 n[4] uint32 ones = 0xFFFFFFFF float khuge = 999999.999 uint32 hp_ptr uint32 ray_ptr uint32 mesh3d_ptr float bbox_min[4] float bbox_max[4] float n_1[4] float nbox_width[4] float one_overn[4] int32 grid_size[4] uint32 grid_ptr uint32 arr_ptr uint32 min_dist_ptr hitpoint hp2 float epsilon = 0.00001 #CODE """ ASM += " global " + label + ":\n" if util.AVX: ASM += avx_asm(label_ray_tri) else: ASM += sse_asm(label_ray_tri) assembler = util.get_asm() mc = assembler.assemble(ASM, True) #mc.print_machine_code() name = "ray_grid_isect" + str(util.unique()) self.ds = runtime.load(name, mc) bbox = self.bbox self.ds["bbox_min"] = (bbox.x0, bbox.y0, bbox.z0, 0.0) self.ds["bbox_max"] = (bbox.x1, bbox.y1, bbox.z1, 0.0) n_1 = self.n_1 self.ds["n_1"] = (n_1.x, n_1.y, n_1.z, 0.0) ovn = self.one_overn self.ds["one_overn"] = (ovn.x, ovn.y, ovn.z, 0.0) nw = self.nbox_width self.ds["nbox_width"] = (nw.x, nw.y, nw.z, 0.0) self.ds["grid_size"] = (self.nx, self.ny, self.nz, 0) self.ds["grid_ptr"] = self.asm_cells.ptr() self.ds["arr_ptr"] = self.lin_array.ptr()
return ray idx_material = 2 mesh = renmas.shapes.Mesh3D(idx_material) #mesh.load_ply("dragon_vrip_res4.ply") mesh.load_ply("dragon_vrip_res3.ply") mesh.prepare_isect() runtime = Runtime() mesh._ray_tri_asm(runtime, "ray_tri_isect") asm_structs = util.structs("ray", "triangle", "hitpoint") ASM = """ #DATA """ ASM += asm_structs + """ ray ray1 hitpoint hp float min_dist = 9999.99 ; eax - ray, ebx - hp, ecx - min_dist, esi - ptr_arr, edi - nobj uint32 n = 10360 ;uint32 n = 10225 uint32 arr[12000] = 0, 1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12 uint32 ptr_array #CODE
def get_sample_asm(self, runtime, label): # eax - pointer to sample structure if util.AVX: line1 = "vcvtdq2ps xmm0, xmm0 \n" else: line1 = "cvtdq2ps xmm0, xmm0 \n" asm_structs = util.structs("sample") code = """ #DATA """ code += asm_structs + """ uint32 tile_endx, tile_endy uint32 tilex, tiley uint32 cur_xyxy[4] ; we just use first two numbers for now float pixel_size[4] float w2h2[4] #CODE """ code += " global " + label + ":\n" + """ mov ebx, dword [cur_xyxy] cmp ebx, dword [tile_endx] je _checky ; increase curx add ebx, 1 mov dword [cur_xyxy], ebx ; calculate sample macro eq128 xmm0 = cur_xyxy """ code += line1 + """ macro eq128_128 xmm1 = pixel_size, xmm2 = w2h2 macro eq128 xmm3 = xmm0 + xmm2 {xmm1} mov ecx, dword [cur_xyxy + 4] macro eq128 eax.sample.xyxy = xmm3 * xmm1 mov dword [eax + sample.ix] ,ebx mov dword [eax + sample.iy] ,ecx mov eax, 1 ret _checky: mov ecx, dword [cur_xyxy + 4] cmp ecx, dword [tile_endy] je _end_sampling ; increase cury add ecx, 1 mov ebx, dword [tilex] mov dword [cur_xyxy+ 4], ecx mov dword [cur_xyxy], ebx macro eq128 xmm0 = cur_xyxy """ code += line1 + """ macro eq128_128 xmm1 = pixel_size, xmm2 = w2h2 macro eq128 xmm3 = xmm0 + xmm2 {xmm1} macro eq128 eax.sample.xyxy = xmm3 * xmm1 mov dword [eax + sample.ix] ,ebx mov dword [eax + sample.iy] ,ecx mov eax, 1 ret _end_sampling: xor eax, eax ret """ assembler = util.get_asm() mc = assembler.assemble(code, True) #mc.print_machine_code() name = "get_sample" + str(util.unique()) self.ds = runtime.load(name, mc) self._populate_ds() return True
def _ray_tri_asm(self, runtime, label): util.load_func(runtime, "ray_triangle_mesh") asm_structs = util.structs("ray", "hitpoint") ASM = """ #DATA """ ASM += asm_structs + """ float min_dist = 999999.0 float max_dist = 999999.0 float zero = 0.0 float one = 1.0 float epsilon = 0.00001 float minus_nesto = 0.0001 ; pointer to vertex and triangle buffers uint32 vb_ptr uint32 tr_ptr uint32 vertices_size uint32 triangle_size #CODE """ ASM += " global " + label + ":\n" + """ ; eax - ray, ebx - hp, ecx - min_dist, esi - ptr_arr, edi - nobj ; 64-bit version will bi i little different beacuse of different size of array macro eq32 min_dist = max_dist + one mov ecx, min_dist push ecx push eax push ebx push esi push edi mov edx, dword [minus_nesto] mov dword [ebx + hitpoint.t], edx _objects_loop: mov eax, dword [esp + 12] ; address of ray mov ecx, dword [esp + 16] ; address of minimum distance mov edx, dword [esp + 8] ; address of hitpoint mov esi, dword [esp + 4] ; array of indexes of triangles mov ebx, dword [esi] ; put index of triangle in ebx ; prepeare call - address of parameters ;addres of points, normal a value of material index ;addr = self.address.ptr() + index * self.tri_size imul ebx, dword [triangle_size] add ebx, dword [tr_ptr] ; trbuffer tr_ptr=v0, tr_ptr+4=v1, tr_ptr+8=v2, tr_ptr+12=mat_idx , tr_ptr+16=normal mov ebp, dword [ebx + 12] push ebp mov ebp, ebx add ebp, 16 push ebp mov ebp, dword [ebx + 8] imul ebp, dword [vertices_size] add ebp, dword [vb_ptr] push ebp mov ebp, dword [ebx + 4] imul ebp, dword [vertices_size] add ebp, dword [vb_ptr] push ebp mov ebp, dword [ebx] imul ebp, dword [vertices_size] add ebp, dword [vb_ptr] push ebp call ray_triangle_mesh add esp, 20 cmp eax, 0 ; 0 - no intersection ocur 1 - intersection ocur jne _update_distance _next_object: sub dword [esp], 1 jz _end_objects add dword [esp + 4], 4 ;increment array by 4 - index of triangle jmp _objects_loop _update_distance: mov eax, dword [esp + 8] mov ebx, dword [eax + hitpoint.t] mov edx, dword [esp + 16] ;populate new minimum distance mov dword [edx], ebx jmp _next_object _end_objects: add esp, 20 macro eq32 xmm0 = min_dist macro if xmm0 < max_dist goto _accept mov eax, 0 ret _accept: macro if xmm0 < epsilon goto _reject mov eax, 1 ret _reject: mov eax, 0 ret """ asm = util.get_asm() mc = asm.assemble(ASM, True) #mc.print_machine_code() name = "ray_tri_intersection" + str(util.unique()) self.ds = runtime.load(name, mc) self.ds['vertices_size'] = self.vertex_buffer.vsize() self.ds['triangle_size'] = self.triangles.tsize() self.ds['vb_ptr'] = self.vertex_buffer.addr() self.ds['tr_ptr'] = self.triangles.addr()
# ray = random_ray() # hp = isect(ray, lst_triangles, 999999.0) # if hp is not None: # print(ray) # break runtime = Runtime() mesh.prepare_isect_asm(runtime) print(mesh.attributes()) renmas.shapes.Mesh3D.isect_asm(runtime, "mesh_isect") asm_structs = util.structs("ray", "mesh3d", "hitpoint") ASM = """ #DATA """ ASM += asm_structs + """ ray ray1 hitpoint hp float min_dist = 9999.99 ; eax - ray, ebx - hp, ecx - min_dist, esi - ptr_arr, edi - nobj mesh3d mesh1 uint32 result #CODE mov eax, ray1 mov ebx, mesh1
def multiple_isect_asm(runtime, label): asm_structs = util.structs("ray", "hitpoint") ASM = """ #DATA """ ASM += asm_structs + """ float min_dist = 999999.0 float max_dist = 999999.0 float zero = 0.0 float one = 1.0 float epsilon = 0.00001 #CODE """ ASM += " global " + label + ":\n" + """ ; eax - ray, ebx - hp, ecx - min_dist, esi - ptr_arr, edi - nobj ; 64-bit version will bi i little different beacuse of different size of array macro eq32 min_dist = max_dist + one mov ecx, min_dist push ecx push eax push ebx push esi push edi mov edx, dword [zero] mov dword [ebx + hitpoint.t], edx _objects_loop: mov eax, dword [esp + 12] ; address of ray mov ecx, dword [esp + 16] ; address of minimum distance mov edx, dword [esp + 8] ; address of hitpoint mov esi, dword [esp + 4] ; array of objects and functions obj_ptr:func_ptr mov ebx, dword [esi] ; put in ebx address of object call dword [esi + 4] ; function pointer cmp eax, 0 ; 0 - no intersection ocur 1 - intersection ocur jne _update_distance _next_object: sub dword [esp], 1 jz _end_objects add dword [esp + 4], 8 ;increment array by 8 jmp _objects_loop _update_distance: mov eax, dword [esp + 8] mov ebx, dword [eax + hitpoint.t] mov edx, dword [esp + 16] ;populate new minimum distance mov dword [edx], ebx jmp _next_object _end_objects: add esp, 20 macro eq32 xmm0 = min_dist macro if xmm0 < max_dist goto _accept mov eax, 0 ret _accept: macro if xmm0 < epsilon goto _reject mov eax, 1 ret _reject: mov eax, 0 ret """ asm = util.get_asm() mc = asm.assemble(ASM, True) #mc.print_machine_code() name = "ray_objects_intersection" + str(util.unique()) runtime.load(name, mc)
def visible_asm(runtime, label, ray_scene_isect): # visibility of two points # xmm0 = p1 # xmm1 = p2 norm = util.normalization("xmm1", "xmm2", "xmm3") asm_structs = util.structs("ray", "hitpoint") xmm = "xmm1" tmp1 = "xmm2" line1 = line2 = line3 = "" if util.AVX: line1 = "vdpps " + tmp1 + "," + xmm + "," + xmm + ", 0x7f \n" line2 = "vsqrtps " + tmp1 + "," + tmp1 + "\n" elif util.SSE41: line1 = "movaps " + tmp1 + "," + xmm + "\n" line2 = "dpps " + tmp1 + "," + tmp1 + ", 0x7F\n" line3 = "sqrtps " + tmp1 + "," + tmp1 + "\n" else: line1 = "macro dot " + tmp1 + " = " + xmm + "*" + xmm + "\n" line2 = "macro broadcast " + tmp1 + " = " + tmp1 + "[0]\n" line3 = "sqrtps " + tmp1 + "," + tmp1 + "\n" code = line1 + line2 + line3 ASM = """ #DATA """ ASM += asm_structs + """ hitpoint hp ray r1 float distance float epsilon = 0.0005 #CODE """ ASM += " global " + label + ":\n" + """ macro eq128 xmm1 = xmm1 - xmm0 macro eq128 r1.origin = xmm0 """ ASM += code + """ macro eq32 distance = xmm2 - epsilon {xmm0, xmm1} macro eq128 xmm1 = xmm1 / xmm2 {xmm0, xmm1} macro eq128 r1.dir = xmm1 ; call ray scene intersection mov eax, r1 mov ebx, hp """ ASM += "call " + ray_scene_isect + """ cmp eax, 0 jne _maybe_visible ;no intersection ocure that mean that points are visible mov eax, 1 ret _maybe_visible: macro eq32 xmm0 = distance macro if xmm0 < hp.t goto accept xor eax, eax ret accept: mov eax, 1 ret """ assembler = util.get_asm() mc = assembler.assemble(ASM, True) #mc.print_machine_code() name = "visible" + str(util.unique()) runtime.load(name, mc)
from tdasm import Tdasm, Runtime import random from renmas.shapes import Plane, intersect_ray_shape_array from renmas.core import Ray from renmas.maths import Vector3 import renmas.utils as util import timeit asm_structs = util.structs("ray", "plane", "hitpoint") ASM = """ #DATA """ ASM += asm_structs + """ ray r1 hitpoint hp float min_dist = 999999.0 uint32 ptr_planes uint32 nplanes #CODE ; eax = pointer_to_ray, ebx = pointer_to_hitpoint mov eax, r1 mov ebx, hp mov ecx, min_dist mov esi, dword [ptr_planes] mov edi, dword [nplanes] ; eax - ray, ebx - hp , ecx - min_dist, esi - ptr_planes, edi - nplanes call plane_array
def intersect_ray_triangle_avx(runtime, label, populate=True): asm_structs = util.structs("ray", "hitpoint") ASM = """ #DATA float one = 1.0 float zero = 0.0 float epsilon = 0.00001 float beta float coff """ ASM += asm_structs + """ ;eax = pointer to ray structure ;ecx = pointer to minimum distance ;edx = pointer to hitpoint #CODE """ ASM += " global " + label + ":\n" ASM += """ mov ebp, dword [esp+4] vmovaps xmm0, oword [ebp] ;vmovaps xmm0, oword [ebx + triangle.p0] vmovaps xmm2, oword [eax + ray.dir] mov ebp, dword [esp+12] vsubps xmm1, xmm0, oword [ebp] ;vsubps xmm1, xmm0, oword [ebx + triangle.p2] vsubps xmm3, xmm0, oword [eax + ray.origin] mov ebp, dword [esp+8] vsubps xmm0, xmm0, oword [ebp] ;vsubps xmm0, xmm0, oword [ebx + triangle.p1] vpermilps xmm0, xmm0, 11010010B ;rotate by 1 vpermilps xmm2, xmm2, 11001001B ; rotate by 2 vblendps xmm4, xmm0, xmm1, 0010b vblendps xmm4, xmm4, xmm2, 1 vblendps xmm5, xmm0, xmm1, 0100b vblendps xmm5, xmm5, xmm2, 0010b vpermilps xmm6, xmm4, 11010010B vpermilps xmm7, xmm5, 11001001B vmulps xmm4, xmm4, xmm5 vmulps xmm6, xmm6, xmm7 vblendps xmm5, xmm0, xmm1, 0001B vblendps xmm5, xmm5, xmm2, 0100B vsubps xmm4, xmm4, xmm6 vdpps xmm7, xmm4, xmm5, 0xf1 vpermilps xmm3, xmm3, 11010010B ;rotate by 1 vblendps xmm4, xmm1, xmm2, 0001B vmovss xmm6, dword [one] vblendps xmm4, xmm4, xmm3, 0100B vblendps xmm5, xmm1, xmm2, 0010B vdivss xmm6, xmm6, xmm7 vblendps xmm5, xmm5, xmm3, 0001B vpermilps xmm7, xmm5, 11001001B vmovss dword [coff], xmm6 vpermilps xmm6, xmm4, 11010010B vmulps xmm4, xmm4, xmm5 vmulps xmm6, xmm6, xmm7 vblendps xmm5, xmm1, xmm2, 0100B vblendps xmm5, xmm5, xmm3, 0010B vsubps xmm4, xmm4, xmm6 vdpps xmm7, xmm4, xmm5, 0xf1 vmulss xmm7, xmm7, dword [coff] vcomiss xmm7, dword [zero] jc _reject ;beta less then zero reject vmovss dword [beta], xmm7 vpermilps xmm3, xmm3, 11001001B ; rotate by 2 vblendps xmm4, xmm0, xmm2, 0001B vblendps xmm4, xmm4, xmm3, 0010B vpermilps xmm6, xmm4, 11010010B vblendps xmm5, xmm0, xmm2, 0010B vblendps xmm5, xmm5, xmm3, 0100B vpermilps xmm7, xmm5, 11001001B vmulps xmm4, xmm4, xmm5 vmulps xmm6, xmm6, xmm7 vblendps xmm5, xmm0, xmm2, 0100B vblendps xmm5, xmm5, xmm3, 0001B vsubps xmm4, xmm4, xmm6 vdpps xmm7, xmm4, xmm5, 0xf1 vmulss xmm7, xmm7, dword [coff] vcomiss xmm7, dword [zero] jc _reject ;beta less then zero reject vaddss xmm7, xmm7, dword [beta] vcomiss xmm7, dword [one] jnc _reject vpermilps xmm3, xmm3, 11001001B ; rotate by 2 vblendps xmm4, xmm0, xmm1, 0010B vblendps xmm4, xmm4, xmm3, 0001B vpermilps xmm6, xmm4, 11010010B vblendps xmm5, xmm0, xmm1, 0100B vblendps xmm5, xmm5, xmm3, 0010B vpermilps xmm7, xmm5, 11001001B vmulps xmm4, xmm4, xmm5 vmulps xmm6, xmm6, xmm7 vblendps xmm5, xmm0, xmm1, 0001B vblendps xmm5, xmm5, xmm3, 0100B vsubps xmm4, xmm4, xmm6 vdpps xmm7, xmm4, xmm5, 0xf1 vmulss xmm7, xmm7, dword [coff] vcomiss xmm7, dword [epsilon] jc _reject vcomiss xmm7, dword [ecx] ;minimum distance jnc _reject ;populate hitpoint structure ; t is in xmm7 vmovss dword [edx + hitpoint.t], xmm7 macro broadcast xmm6 = xmm7[0] ;macro eq128_32 edx.hitpoint.normal = ebx.triangle.normal, edx.hitpoint.mat_index = ebx.triangle.mat_index ;vpermilps xmm2, xmm2, 11001001B ; rotate by 2 macro eq128 xmm2 = eax.ray.dir vmulps xmm6, xmm6, xmm2 macro eq128 edx.hitpoint.hit = xmm6 + eax.ray.origin mov ebp, dword [esp + 16] vmovaps xmm0, oword [ebp] vmovss xmm1, dword [esp + 20] macro eq128 edx.hitpoint.normal = xmm0 macro eq32 edx.hitpoint.mat_index = xmm1 mov eax, 1 ret _reject: mov eax, 0 ret """ assembler = util.get_asm() mc = assembler.assemble(ASM, True) #mc.print_machine_code() name = "ray_triangle_isect" + str(util.unique()) runtime.load(name, mc)
isect = renmas.shapes.isect #intersection rutine #for x in range(100): # ray = random_ray() # hp = isect(ray, lst_triangles, 999999.0) # if hp is not None: # print(ray) # break runtime = Runtime() mesh.prepare_isect_asm(runtime) print(mesh.attributes()) renmas.shapes.Mesh3D.isect_asm(runtime, "mesh_isect") asm_structs = util.structs("ray", "mesh3d", "hitpoint") ASM = """ #DATA """ ASM += asm_structs + """ ray ray1 hitpoint hp float min_dist = 9999.99 ; eax - ray, ebx - hp, ecx - min_dist, esi - ptr_arr, edi - nobj mesh3d mesh1 uint32 result #CODE mov eax, ray1 mov ebx, mesh1
#sphere = random_sphere() hp = sphere.isect(ray) hp = renmas.shapes.isect(ray, mdl.lst_shapes()) #print(hp.t) grid = Grid() grid.setup(mdl.lst_shapes()) hp3 = grid.isect(ray) #print(hp3.t) runtime = Runtime() grid.isect_asm(runtime, "grid_intersect") asm_structs = util.structs("ray", "grid", "hitpoint") ASM = """ #DATA """ ASM += asm_structs + """ ray ray1 hitpoint hp grid grid1 float min_dist = 99999.0 float _xmm0[4] float _xmm1[4] float _xmm2[4] float _xmm3[4] float _xmm5[4]
def linear_isect_asm(runtime, label, dyn_arrays): data1 = """ uint32 r1 uint32 hp float min_dist = 999999.0 float max_dist = 999999.0 float zero = 0.0 float one = 1.0 float epsilon = 0.00001 """ ASM = """ #DATA """ asm_structs = util.structs("ray", "hitpoint") data2 = "" for key, value in dyn_arrays.items(): asm_structs += util.structs(key.name()) data2 += "uint32 ptr_" + key.name() + "\n" data2 += "uint32 n_" + key.name() + "\n" ASM += asm_structs ASM += data1 ASM += data2 ASM += "#CODE \n" ASM += "global " + label + ":\n" ASM += "mov dword [r1], eax \n" ASM += "mov dword [hp], ebx \n" ASM += "mov edx , dword [zero] \n" ASM += "macro eq32 min_dist = max_dist + one\n" ASM += "mov dword [ebx + hitpoint.t], edx \n" code = "" for key, value in dyn_arrays.items(): code1 = """ ;=== intersection of array mov eax, dword [r1] mov ebx, dword [hp] mov ecx, min_dist """ line1 = "mov esi, dword [" + "ptr_" + key.name() + "] \n" line2 = "mov edi, dword [" + "n_" + key.name() + "]\n" call = "call " + key.name() + "_array \n" code = code1 + line1 + line2 + call ASM += code key.isect_asm(runtime, key.name() + "_intersect") intersect_ray_shape_array(key.name(), runtime, key.name() + "_array", key.name() + "_intersect") ASM += "macro eq32 xmm0 = min_dist \n" ASM += "macro if xmm0 < max_dist goto _accept\n" ASM += "mov eax, 0\n" ASM += "ret \n" ASM += "_accept: \n" ASM += "macro if xmm0 < epsilon goto _reject\n" ASM += "mov eax, 1 \n" ASM += "ret\n" ASM += "_reject:\n" ASM += "mov eax, 0\n" ASM += "ret\n" asm = util.get_asm() mc = asm.assemble(ASM, True) #mc.print_machine_code() name = "ray_scene_intersection" + str(util.unique()) ds = runtime.load(name, mc) for key, value in dyn_arrays.items(): dy_arr = dyn_arrays[key] ds["ptr_" + key.name()] = dy_arr.get_addr() ds["n_" + key.name()] = dy_arr.size
import timeit from tdasm import Tdasm, Runtime import random from renmas.shapes import Plane, Sphere, intersect_ray_shape_array, isect, linear_isect_asm, isect_ray_scene from renmas.core import Ray, ShapeDatabase from renmas.maths import Vector3 import renmas.utils as util import renmas.interface as mdl asm_structs = util.structs("ray", "plane", "sphere", "hitpoint") def create_random_shapes(n, shape_db): for x in range(n): if random.random() > 0.5: shape = mdl.create_random_sphere() #shape = create_sphere() else: #shape = create_plane() shape = mdl.create_random_plane() shape_db.add_shape(shape) def generate_ray(): x = random.random() * 10.0 y = random.random() * 10.0 z = random.random() * 10.0 dir_x = random.random() * 10.0 - 5.0
def generate_shade(runtime, label, visible_label): materials = ren.lst_materials() lights = ren.lst_lights() nmaterials = len(materials) nlights = len(lights) asm_structs = util.structs("hitpoint") #loop through list of lights and do shading #TODO later implement smarter shade where we on random pick just light??? study this aproach #eax is pointer to hitpoint ASM = """ #DATA """ ASM += asm_structs ASM += "uint32 lights_ptrs[" + str(nlights) + "]\n" ASM += "uint32 materials_ptrs[" + str(nmaterials) + "]\n" ASM += "uint32 nlights \n" ASM += "uint32 cur_light \n" ASM += """ float zero_spectrum[4] = 0.0, 0.0, 0.0, 0.0 float curr_spectrum[4] uint32 hp_ptr #CODE """ ASM += "global " + label + ":\n" + """ macro eq128 curr_spectrum = zero_spectrum mov dword [cur_light], 0 mov dword [hp_ptr], eax next_light: ; check if include all lights and finish shading if we are mov ebx, dword [cur_light] cmp ebx, dword [nlights] je _end_shading ; call shading for current light mov eax, dword [hp_ptr] call dword [lights_ptrs + ebx*4] add dword [cur_light], 1 ;move to next light in next iteration ; check to see if we must call brdf of material mov eax, dword [hp_ptr] mov edx, dword [eax + hitpoint.visible] cmp edx, 0 je next_light ; call brdf of material mov eax, dword [hp_ptr] mov ebx, dword [eax + hitpoint.mat_index] call dword [materials_ptrs + 4*ebx] mov eax, dword [hp_ptr] macro eq128 curr_spectrum = curr_spectrum + eax.hitpoint.spectrum jmp next_light _end_shading: mov eax, dword [hp_ptr] macro eq128 eax.hitpoint.spectrum = curr_spectrum ret """ l_ptrs = [] for l in lights: l.L_asm(runtime, visible_label) l_ptrs.append(l.func_ptr) l_ptrs = tuple(l_ptrs) m_ptrs = [] for m in materials: m.brdf_asm(runtime) m_ptrs.append(m.func_ptr) m_ptrs = tuple(m_ptrs) asm = util.get_asm() mc = asm.assemble(ASM, True) name = "shade" + str(util.unique()) ds = runtime.load(name, mc) ds["lights_ptrs"] = l_ptrs ds["materials_ptrs"] = m_ptrs ds["nlights"] = nlights
from tdasm import Runtime import renmas.samplers from renmas.camera import PinholeCamera import renmas.interface as ren import renmas.utils as util asm_structs = util.structs("ray", "sample") ASM = """ #DATA """ ASM += asm_structs + """ sample sp1 ray r1 float _xmm[4] #CODE mov eax, r1 mov ebx, sp1 call generate_ray movaps oword [_xmm], xmm4 """ if __name__ == "__main__": runtime = Runtime()
def isect_asm(cls, runtime, label, populate=True): asm_structs = util.structs("ray", "sphere", "hitpoint") if util.AVX: line1 = " vsqrtss xmm5, xmm5, xmm5 \n" else: line1 = " sqrtss xmm5, xmm5 \n" ASM = """ #DATA float epsilon = 0.0001 float two = 2.0 float minus_four = -4.0 float zero = 0.0 float one = 1.0 float minus_one = -1.0 """ ASM += asm_structs + """ ;eax = pointer to ray structure ;ebx = pointer to plane structure ;ecx = pointer to minimum distance ;edx = pointer to hitpoint #CODE """ ASM += " global " + label + ":\n" + """ macro eq128_128 xmm1 = eax.ray.dir, xmm2 = eax.ray.origin - ebx.sphere.origin macro dot xmm3 = xmm1 * xmm1 {xmm2} macro dot xmm4 = xmm2 * xmm1 {xmm3} macro eq32_32 xmm4 = xmm4 * two, xmm5 = ebx.sphere.radius * ebx.sphere.radius {xmm1, xmm2, xmm3, xmm4} macro dot xmm6 = xmm2 * xmm2 {xmm1, xmm3, xmm4, xmm5} macro eq32 xmm5 = xmm6 - xmm5 {xmm1, xmm2, xmm3, xmm4} macro eq32_32 xmm5 = xmm5 * xmm3, xmm6 = xmm4 * xmm4 {xmm1, xmm2, xmm3} macro eq32 xmm5 = xmm5 * minus_four {xmm1, xmm2, xmm3, xmm4, xmm6} macro eq32 xmm5 = xmm5 + xmm6 {xmm1, xmm2, xmm3, xmm4} ; temp = xmm2, a = xmm3 , b = xmm4, disc = xmm5, ray.dir = xmm1 macro if xmm5 < zero goto _reject """ ASM += line1 + """ macro eq32 xmm3 = xmm3 * two {xmm1, xmm2, xmm4, xmm5} macro eq32_32 xmm3 = one / xmm3, xmm4 = xmm4 * minus_one {xmm1, xmm2, xmm5} macro eq32 xmm6 = xmm4 - xmm5 {xmm1, xmm2, xmm3, xmm5} macro eq32 xmm6 = xmm6 * xmm3 {xmm1, xmm2, xmm3, xmm4, xmm5} macro if xmm6 > epsilon goto populate_hitpoint macro eq32 xmm6 = xmm4 + xmm5 {xmm1, xmm2, xmm3} macro eq32 xmm6 = xmm6 * xmm3 {xmm1, xmm2, xmm3, xmm4, xmm5} macro if xmm6 > epsilon goto populate_hitpoint mov eax, 0 ret populate_hitpoint: macro if xmm6 > ecx goto _reject """ if populate: ASM += """ macro broadcast xmm5 = xmm6[0] macro eq128_32 xmm4 = xmm5 * xmm1, xmm7 = ebx.sphere.radius {xmm2} macro eq32 edx.hitpoint.t = xmm6 {xmm2, xmm4, xmm7} macro eq128_128 edx.hitpoint.hit = xmm4 + eax.ray.origin, xmm5 = xmm2 + xmm4 {xmm7} macro broadcast xmm7 = xmm7[0] macro eq128_32 edx.hitpoint.normal = xmm5 / xmm7, edx.hitpoint.mat_index = ebx.sphere.mat_index """ ASM += """ mov eax, 1 ret _reject: mov eax, 0 ret """ assembler = util.get_asm() mc = assembler.assemble(ASM, True) #mc.print_machine_code() name = "ray_sphere_isect" + str(util.unique()) runtime.load(name, mc)