Ejemplo n.º 1
0
def build_simple_bvh(degree):
    #mesh = chroma.models.lionsolid()
    mesh = chroma.models.companioncube()
    bvh = make_simple_bvh(mesh, degree)

    nodes = bvh.nodes
    layer_bounds = np.append(bvh.layer_offsets, len(nodes))
    world_coords = bvh.world_coords

    for i, (layer_start,
            layer_end) in enumerate(zip(layer_bounds[:-1], layer_bounds[1:])):
        print i, node_areas(
            nodes[layer_start:layer_end]).sum() * world_coords.world_scale**2
Ejemplo n.º 2
0
def build_simple_bvh(degree):
    mesh = chroma.models.lionsolid()
    bvh = make_simple_bvh(mesh, degree)

    nodes = bvh.nodes
    layer_bounds = np.append(bvh.layer_offsets, len(nodes))
    world_coords = bvh.world_coords

    for i, (layer_start, layer_end) in enumerate(zip(layer_bounds[:-1], 
                                                     layer_bounds[1:])):
        print i, node_areas(nodes[layer_start:layer_end]).sum() * world_coords.world_scale**2
    

    assert isinstance(bvh, BVH)
Ejemplo n.º 3
0
def build_simple_bvh(degree):
    #mesh = chroma.models.lionsolid()
    mesh = chroma.models.companioncube()
    bvh = make_recursive_grid_bvh(mesh,
                                  degree,
                                  save_morton_codes="mortonout.cu.nudot.txt")
    #bvh = make_simple_bvh(mesh, degree)

    nodes = bvh.nodes
    layer_bounds = np.append(bvh.layer_offsets, len(nodes))
    world_coords = bvh.world_coords

    for i, (layer_start,
            layer_end) in enumerate(zip(layer_bounds[:-1], layer_bounds[1:])):
        print i, node_areas(
            nodes[layer_start:layer_end]).sum() * world_coords.world_scale**2

    return bvh
Ejemplo n.º 4
0
def merge_nodes(nodes, degree, max_ratio=None):
    nthreads_per_block = 256
    context = None
    queue = None
    if gpuapi.is_gpu_api_opencl():
        context = cltools.get_last_context()
        queue = cl.CommandQueue(context)

    # Load GPU functions
    if gpuapi.is_gpu_api_cuda():
        bvh_module = get_module('bvh.cu',
                                options=api_options,
                                include_source_directory=True)
    elif gpuapi.is_gpu_api_opencl():
        # don't like the last context method. trouble. trouble.
        bvh_module = get_module('bvh.cl',
                                context,
                                options=api_options,
                                include_source_directory=True)
    else:
        raise RuntimeError('API is neither CUDA nor OpenCL?!')
    bvh_funcs = GPUFuncs(bvh_module)

    # determine number of parents
    nparent = len(nodes) / degree
    if len(nodes) % degree != 0:
        nparent += 1

    if nparent == 1:
        nparent_pad = nparent
    else:
        nparent_pad = round_up_to_multiple(nparent, 1)  #degree

    # allocate memory
    if gpuapi.is_gpu_api_cuda():
        gpu_parent_nodes = ga.zeros(shape=nparent_pad, dtype=ga.vec.uint4)
    elif gpuapi.is_gpu_api_opencl():
        parent_nodes_np = np.zeros(shape=nparent, dtype=ga.vec.uint4)
        gpu_parent_nodes = ga.to_device(queue, parent_nodes_np)
        gpu_nodes = ga.to_device(queue, nodes)
    else:
        raise RuntimeError('API is neither CUDA nor OpenCL?!')

    # run kernel
    if gpuapi.is_gpu_api_cuda():
        for first_index, elements_this_iter, nblocks_this_iter in \
                chunk_iterator(nparent, nthreads_per_block, max_blocks=10000):
            bvh_funcs.make_parents(np.uint32(first_index),
                                   np.uint32(elements_this_iter),
                                   np.uint32(degree),
                                   gpu_parent_nodes,
                                   cuda.In(nodes),
                                   np.uint32(0),
                                   np.uint32(len(nodes)),
                                   block=(nthreads_per_block, 1, 1),
                                   grid=(nblocks_this_iter, 1))
    elif gpuapi.is_gpu_api_opencl():
        for first_index, elements_this_iter, nblocks_this_iter in \
                chunk_iterator(nparent, nthreads_per_block, max_blocks=1):
            bvh_funcs.make_parents(queue, (elements_this_iter, 1, 1), None,
                                   np.uint32(first_index),
                                   np.uint32(elements_this_iter),
                                   np.uint32(degree), gpu_parent_nodes.data,
                                   gpu_nodes.data, np.uint32(0),
                                   np.uint32(len(nodes))).wait()
    else:
        raise RuntimeError('API is neither CUDA nor OpenCL?!')

    parent_nodes = gpu_parent_nodes.get()

    if max_ratio is not None:
        areas = node_areas(parent_nodes)
        child_areas = node_areas(nodes)

        excessive_area = np.zeros(shape=len(areas), dtype=bool)
        for i, parent_area in enumerate(areas):
            nchild = parent_nodes['w'][i] >> CHILD_BITS
            child_index = parent_nodes['w'][i] & ~NCHILD_MASK
            child_area = child_areas[child_index:child_index + nchild].sum()
            #if parent_area > 1e9:
            #    print i, 'Children: %e, Parent: %e' % (child_area, parent_area)
            if child_area / parent_area < 0.3:
                excessive_area[i] = True
                #print i, 'Children: %e, Parent: %e' % (child_area, parent_area)

        extra_slots = round_up_to_multiple(
            (degree - 1) * np.count_nonzero(excessive_area), 1)
        print 'Extra slots:', extra_slots
        new_parent_nodes = np.zeros(shape=len(parent_nodes) + extra_slots,
                                    dtype=parent_nodes.dtype)
        new_parent_nodes[:len(parent_nodes)] = parent_nodes

        offset = 0
        for count, index in enumerate(np.argwhere(excessive_area)):
            index = index[0] + offset
            nchild = new_parent_nodes['w'][index] >> CHILD_BITS
            child_index = new_parent_nodes['w'][index] & ~NCHILD_MASK
            new_parent_nodes[index] = nodes[child_index]
            #new_parent_nodes['w'][index] = 1 << CHILD_BITS | child_index
            tmp_nchild = new_parent_nodes['w'][index] >> CHILD_BITS
            tmp_child_index = new_parent_nodes['w'][index] & ~NCHILD_MASK
            new_parent_nodes['w'][index] = tmp_nchild << CHILD_BITS | (
                tmp_child_index + len(nodes))

            if nchild == 1:
                continue

            # slide everyone over
            #print index, nchild, len(new_parent_nodes)
            new_parent_nodes[index + nchild:] = new_parent_nodes[index +
                                                                 1:-nchild + 1]
            offset += nchild - 1
            for sibling in xrange(nchild - 1):
                new_parent_index = index + 1 + sibling
                new_parent_nodes[new_parent_index] = nodes[child_index +
                                                           sibling + 1]
                if new_parent_nodes['x'][new_parent_index] != 0:
                    tmp_nchild = new_parent_nodes['w'][
                        new_parent_index] >> CHILD_BITS
                    tmp_child_index = new_parent_nodes['w'][
                        new_parent_index] & ~NCHILD_MASK
                    new_parent_nodes['w'][
                        new_parent_index] = tmp_nchild << CHILD_BITS | (
                            tmp_child_index + len(nodes))

                    #new_parent_nodes['w'][new_parent_index] = 1 << CHILD_BITS | (child_index + sibling + 1)

            #print 'intermediate: %e' % node_areas(new_parent_nodes).max()
        print 'old: %e' % node_areas(parent_nodes).max()
        print 'new: %e' % node_areas(new_parent_nodes).max()
        if len(new_parent_nodes) < len(nodes):
            # Only adopt new set of parent nodes if it actually reduces the
            # total number of nodes at this level by 1.
            parent_nodes = new_parent_nodes

    return parent_nodes
Ejemplo n.º 5
0
def merge_nodes(nodes, degree, max_ratio=None):
    bvh_module = get_cu_module('bvh.cu', options=cuda_options,
                               include_source_directory=True)
    bvh_funcs = GPUFuncs(bvh_module)
    
    nparent = len(nodes) / degree
    if len(nodes) % degree != 0:
        nparent += 1

    if nparent == 1:
        nparent_pad = nparent
    else:
        nparent_pad = round_up_to_multiple(nparent, 1)#degree)
    gpu_parent_nodes = ga.zeros(shape=nparent_pad, dtype=ga.vec.uint4)

    nthreads_per_block = 256
    for first_index, elements_this_iter, nblocks_this_iter in \
            chunk_iterator(nparent, nthreads_per_block, max_blocks=10000):
        bvh_funcs.make_parents(np.uint32(first_index),
                               np.uint32(elements_this_iter),
                               np.uint32(degree),
                               gpu_parent_nodes,
                               cuda.In(nodes),
                               np.uint32(0),
                               np.uint32(len(nodes)),
                               block=(nthreads_per_block,1,1),
                               grid=(nblocks_this_iter,1))

    parent_nodes = gpu_parent_nodes.get()

    if max_ratio is not None:
        areas = node_areas(parent_nodes)
        child_areas = node_areas(nodes)

        excessive_area = np.zeros(shape=len(areas), dtype=bool)
        for i, parent_area in enumerate(areas):
            nchild = parent_nodes['w'][i] >> CHILD_BITS
            child_index = parent_nodes['w'][i] & ~NCHILD_MASK
            child_area = child_areas[child_index:child_index+nchild].sum()
            #if parent_area > 1e9:
            #    print i, 'Children: %e, Parent: %e' % (child_area, parent_area)
            if child_area/parent_area < 0.3:
                excessive_area[i] = True
                #print i, 'Children: %e, Parent: %e' % (child_area, parent_area)

        extra_slots = round_up_to_multiple((degree - 1) * np.count_nonzero(excessive_area), 1)
        print 'Extra slots:', extra_slots
        new_parent_nodes = np.zeros(shape=len(parent_nodes) + extra_slots,
                                    dtype=parent_nodes.dtype)
        new_parent_nodes[:len(parent_nodes)] = parent_nodes

        offset = 0
        for count, index in enumerate(np.argwhere(excessive_area)):
            index = index[0] + offset
            nchild = new_parent_nodes['w'][index] >> CHILD_BITS
            child_index = new_parent_nodes['w'][index] & ~NCHILD_MASK
            new_parent_nodes[index] = nodes[child_index]
            #new_parent_nodes['w'][index] = 1 << CHILD_BITS | child_index
            tmp_nchild = new_parent_nodes['w'][index] >> CHILD_BITS
            tmp_child_index = new_parent_nodes['w'][index] & ~NCHILD_MASK
            new_parent_nodes['w'][index] = tmp_nchild << CHILD_BITS | (tmp_child_index + len(nodes))

            if nchild == 1:
                continue

            # slide everyone over
            #print index, nchild, len(new_parent_nodes)
            new_parent_nodes[index+nchild:] = new_parent_nodes[index+1:-nchild+1]
            offset += nchild - 1
            for sibling in xrange(nchild - 1):
                new_parent_index = index + 1 + sibling
                new_parent_nodes[new_parent_index] = nodes[child_index + sibling + 1]
                if new_parent_nodes['x'][new_parent_index] != 0:
                    tmp_nchild = new_parent_nodes['w'][new_parent_index] >> CHILD_BITS
                    tmp_child_index = new_parent_nodes['w'][new_parent_index] & ~NCHILD_MASK
                    new_parent_nodes['w'][new_parent_index] = tmp_nchild << CHILD_BITS | (tmp_child_index + len(nodes))

                    #new_parent_nodes['w'][new_parent_index] = 1 << CHILD_BITS | (child_index + sibling + 1)


            #print 'intermediate: %e' % node_areas(new_parent_nodes).max()
        print 'old: %e' % node_areas(parent_nodes).max()
        print 'new: %e' % node_areas(new_parent_nodes).max()
        if len(new_parent_nodes) < len(nodes):
            # Only adopt new set of parent nodes if it actually reduces the
            # total number of nodes at this level by 1.
            parent_nodes = new_parent_nodes

    return parent_nodes
Ejemplo n.º 6
0
def merge_nodes(nodes, degree, max_ratio=None):
    bvh_module = get_cu_module('bvh.cu',
                               options=cuda_options,
                               include_source_directory=True)
    bvh_funcs = GPUFuncs(bvh_module)

    nparent = len(nodes) / degree
    if len(nodes) % degree != 0:
        nparent += 1

    if nparent == 1:
        nparent_pad = nparent
    else:
        nparent_pad = round_up_to_multiple(nparent, 1)  #degree)
    gpu_parent_nodes = ga.zeros(shape=nparent_pad, dtype=ga.vec.uint4)

    nthreads_per_block = 256
    for first_index, elements_this_iter, nblocks_this_iter in \
            chunk_iterator(nparent, nthreads_per_block, max_blocks=10000):
        bvh_funcs.make_parents(np.uint32(first_index),
                               np.uint32(elements_this_iter),
                               np.uint32(degree),
                               gpu_parent_nodes,
                               cuda.In(nodes),
                               np.uint32(0),
                               np.uint32(len(nodes)),
                               block=(nthreads_per_block, 1, 1),
                               grid=(nblocks_this_iter, 1))

    parent_nodes = gpu_parent_nodes.get()

    if max_ratio is not None:
        areas = node_areas(parent_nodes)
        child_areas = node_areas(nodes)

        excessive_area = np.zeros(shape=len(areas), dtype=bool)
        for i, parent_area in enumerate(areas):
            nchild = parent_nodes['w'][i] >> CHILD_BITS
            child_index = parent_nodes['w'][i] & ~NCHILD_MASK
            child_area = child_areas[child_index:child_index + nchild].sum()
            #if parent_area > 1e9:
            #    print i, 'Children: %e, Parent: %e' % (child_area, parent_area)
            if child_area / parent_area < 0.3:
                excessive_area[i] = True
                #print i, 'Children: %e, Parent: %e' % (child_area, parent_area)

        extra_slots = round_up_to_multiple(
            (degree - 1) * np.count_nonzero(excessive_area), 1)
        print 'Extra slots:', extra_slots
        new_parent_nodes = np.zeros(shape=len(parent_nodes) + extra_slots,
                                    dtype=parent_nodes.dtype)
        new_parent_nodes[:len(parent_nodes)] = parent_nodes

        offset = 0
        for count, index in enumerate(np.argwhere(excessive_area)):
            index = index[0] + offset
            nchild = new_parent_nodes['w'][index] >> CHILD_BITS
            child_index = new_parent_nodes['w'][index] & ~NCHILD_MASK
            new_parent_nodes[index] = nodes[child_index]
            #new_parent_nodes['w'][index] = 1 << CHILD_BITS | child_index
            tmp_nchild = new_parent_nodes['w'][index] >> CHILD_BITS
            tmp_child_index = new_parent_nodes['w'][index] & ~NCHILD_MASK
            new_parent_nodes['w'][index] = tmp_nchild << CHILD_BITS | (
                tmp_child_index + len(nodes))

            if nchild == 1:
                continue

            # slide everyone over
            #print index, nchild, len(new_parent_nodes)
            new_parent_nodes[index + nchild:] = new_parent_nodes[index +
                                                                 1:-nchild + 1]
            offset += nchild - 1
            for sibling in xrange(nchild - 1):
                new_parent_index = index + 1 + sibling
                new_parent_nodes[new_parent_index] = nodes[child_index +
                                                           sibling + 1]
                if new_parent_nodes['x'][new_parent_index] != 0:
                    tmp_nchild = new_parent_nodes['w'][
                        new_parent_index] >> CHILD_BITS
                    tmp_child_index = new_parent_nodes['w'][
                        new_parent_index] & ~NCHILD_MASK
                    new_parent_nodes['w'][
                        new_parent_index] = tmp_nchild << CHILD_BITS | (
                            tmp_child_index + len(nodes))

                    #new_parent_nodes['w'][new_parent_index] = 1 << CHILD_BITS | (child_index + sibling + 1)

            #print 'intermediate: %e' % node_areas(new_parent_nodes).max()
        print 'old: %e' % node_areas(parent_nodes).max()
        print 'new: %e' % node_areas(new_parent_nodes).max()
        if len(new_parent_nodes) < len(nodes):
            # Only adopt new set of parent nodes if it actually reduces the
            # total number of nodes at this level by 1.
            parent_nodes = new_parent_nodes

    return parent_nodes