def get_s32(buf, offset, length): if length < 4: return (0, offset, length) a = nb.i4(buf[offset + 3]) << 24 b = nb.i4(buf[offset + 2]) << 16 c = nb.i4(buf[offset + 1]) << 8 d = nb.i4(buf[offset + 0]) << 0 return a | b | c | d, offset + 4, length - 4
def fit_tree(tree, config, iterative=False): ''' Refits the tree from its DataStats ''' cache_nodes = False # print("Z") context_stack, node_dict, nodes = \ build_root(tree) # print("A") while (len(context_stack) > 0): # print("AZ") c = context_stack.pop() update_nominal_impurities(tree, c, iterative) # print("BZ") # print(c.impurities[:,0],c.start,c.end) best_split = np.argmin(c.impurities[:, 0]) for split in [best_split]: # print("S", split) inds_l, inds_r, y_counts_l, y_counts_r, imp_tot, imp_l, imp_r, val = \ extract_nominal_split_info(tree, c, split) # print("S1", split) # if(impurity_decrease[split] <= 0.0): if (c.impurity - imp_tot <= 0): c.node.ttype = TTYPE_LEAF else: # print("S2", split) ptr = _pointer_from_struct(c) locs = (c, best_split, val, iterative, node_dict, context_stack, cache_nodes) node_l = new_node(locs, tree, inds_l, y_counts_l, imp_l, 0) node_r = new_node(locs, tree, inds_r, y_counts_r, imp_r, 1) split_data = SplitData(u1(False), i4(split), i4(val), i4(node_l), i4(node_r)) #np.array([split, val, node_l, node_r, -1],dtype=np.int32) c.node.split_data.append(split_data) c.node.op_enum = OP_EQ # print("B") if (not iterative): SplitterContext_dtor(c) # print("C") # _decref_pointer(ptr) return 0
def experimental_sum_grad_cpu(new_grad, grad, k_cov): for k in range(grad.shape[0]): i, j = k_to_ij(i4(k + k_cov)) for qx in range(grad.shape[2]): for tz in range(3): new_grad[i, tz, qx] -= grad[k, tz, qx] new_grad[j, tz, qx] += grad[k, tz, qx]
def get_grad_omega(grad_omega, omega, r, d, qbin): """ Get the gradient of the Debye sum with respect to atomic positions Parameters ---------- grad_omega: kx3xQ array The gradient omega: kxQ array Debye sum r: k array The pair distance array d: kx3 array The pair displacements qbin: float The qbin size """ kmax, _, qmax_bin = grad_omega.shape k, qx = cuda.grid(2) if k >= kmax or qx >= qmax_bin: return sv = f4(qx) * qbin rk = r[k] a = (sv * math.cos(sv * rk)) - omega[k, qx] a /= rk * rk for w in range(i4(3)): grad_omega[k, w, qx] = a * d[k, w]
def build_root(tree, iterative=False): ds = tree.data_stats Y = ds.Y sample_inds = np.arange(len(Y), dtype=np.uint32) impurity = gini(len(Y), ds.y_counts) #Make Root Node node_dict = new_akd(u4, i4) nodes = List.empty_list(TreeNodeType) node = TreeNode_ctor(TTYPE_NODE, i4(0), ds.y_counts) nodes.append(node) tree.nodes = nodes #Make Root Context if (iterative and empty_u8 in tree.context_cache): c = tree.context_cache[empty_u8] else: c = SplitterContext_ctor(empty_u8) reinit_splittercontext(c, node, sample_inds, ds.y_counts, impurity) if (tree.ifit_enabled): tree.context_cache[empty_u8] = c context_stack = List.empty_list(SplitterContextType) context_stack.append(c) return context_stack, node_dict, nodes
def experimental_sum_grad_fq1(new_grad, grad, k_cov): k, qx = cuda.grid(2) if k >= len(grad) or qx >= grad.shape[2]: return i, j = cuda_k_to_ij(i4(k + k_cov)) for tz in range(3): a = grad[k, tz, qx] cuda.atomic.add(new_grad, (j, tz, qx), a) cuda.atomic.add(new_grad, (i, tz, qx), f4(-1.) * a)
def __VolProj3(I, x, r, y0, y1, y2, rad, u): j, k, l = cuda.grid(3) if j >= I.shape[0] or k >= I.shape[1] or l >= I.shape[2]: return jj, kk, ll = i4(rad * j), i4(rad * k), i4(rad * l) s = abs(y1[1] - y1[0]) / 2 # assume uniform grid size II, xx, rr = I[j, k, l], x[j, k, l], r[j, k, l] irad = i4(rad) y = cuda.local.array((3, ), f4) for jjj in range(jj, min(jj + irad, y0.size)): y[0] = y0[jjj] for kkk in range(kk, min(kk + irad, y1.size)): y[1] = y1[kkk] for lll in range(ll, min(ll + irad, y2.size)): y[2] = y2[lll] tmp = 0 for ii in range(II.size): if II[ii] == -1: break tmp += __if3(II[ii], xx[ii], rr[ii], y, s) u[jjj, kkk, lll] = tmp
def new_node(locs, tree, sample_inds, y_counts, impurity, is_right): c, best_split, best_val, iterative, node_dict, context_stack, cache_nodes = locs nodes = tree.nodes # node_dict,nodes,new_contexts,cache_nodes = locs # NODE, LEAF = i4(1), i4(2) #np.array(1,dtype=np.int32).item(), np.array(2,dtype=np.int32).item() node_id = i4(-1) if (cache_nodes): node_id = node_dict.get(sample_inds, -1) # if (cache_nodes): node_id= akd_get(node_dict, sample_inds) if (node_id == -1): node_id = i4(len(nodes)) # if(cache_nodes): akd_insert(node_dict, sample_inds, node_id) if (cache_nodes): node_dict[sample_inds] = node_id if (impurity > 0.0): node = TreeNode_ctor(TTYPE_NODE, node_id, y_counts) nodes.append(node) split_chain = next_split_chain(c, is_right, 0, best_split, best_val) # print('split_chain', split_chain, best_split) if (iterative and split_chain in tree.context_cache): new_c = tree.context_cache[split_chain] # print(new_c) ok = np.array_equal(new_c.split_chain, split_chain) # print("ALL OK", ok) # if(not ok): # print(new_c.split_chain) # print(split_chain) # print(hash(new_c.split_chain), hash(split_chain)) else: new_c = SplitterContext_ctor(split_chain) if (tree.ifit_enabled): tree.context_cache[split_chain] = new_c reinit_splittercontext(new_c, node, sample_inds, y_counts, impurity) context_stack.append(new_c) else: nodes.append(TreeNode_ctor(TTYPE_LEAF, node_id, y_counts)) return node_id
def fast_fast_flat_sum(new_grad, grad, k_cov): i, j, qx = cuda.grid(3) n = len(new_grad) if i >= n or j >= n or qx >= grad.shape[2] or i == j: return if j < i: k = cuda_ij_to_k(i, j) alpha = float32(-1) else: k = cuda_ij_to_k(j, i) alpha = float32(1) k -= k_cov if 0 <= k < len(grad): for tz in range(i4(3)): cuda.atomic.add(new_grad, (i, tz, qx), grad[k, tz, qx] * alpha)
def get_normalization_array(norm_array, scat, offset): """ Generate the sv dependant normalization factors for the F(sv) array Parameters ----------- norm_array: kxQ array Normalization array scat: NxQ array The scatter factor array offset: int The amount of previously covered pairs """ k, qx = cuda.grid(2) if k >= norm_array.shape[0] or qx >= norm_array.shape[1]: return i, j = cuda_k_to_ij(i4(k + offset)) norm_array[k, qx] = scat[i, qx] * scat[j, qx]
def get_grad_fq_inplace(grad_omega, norm): """ Generate the gradient F(sv) for an atomic configuration Parameters ------------ grad_omega: Kx3xQ numpy array The array which will store the FQ gradient norm: kxQ array The normalization array """ kmax, _, qmax_bin = grad_omega.shape k, qx = cuda.grid(2) if k >= kmax or qx >= qmax_bin: return a = norm[k, qx] for w in range(i4(3)): grad_omega[k, w, qx] *= a
def get_d_array(d, q, offset): """ Generate the kx3 array which holds the pair displacements Parameters ---------- d: NxNx3 array The displacement array q: Nx3 array The atomic positions offset: int The amount of previously covered pairs """ k = cuda.grid(1) if k >= len(d): return i, j = cuda_k_to_ij(i4(k + offset)) for w in range(3): d[k, w] = q[i, w] - q[j, w]
def get_grad_fq(grad, grad_omega, norm): """ Generate the gradient F(sv) for an atomic configuration Parameters ------------ grad: kx3xQ numpy array The array which will store the FQ gradient grad_omega: kx3xQ array The gradient of the Debye sum norm: kxQ Outer Product of the scatter factors """ kmax, _, qmax_bin = grad.shape k, qx = cuda.grid(2) if k >= kmax or qx >= qmax_bin: return a = norm[k, qx] for w in range(i4(3)): grad[k, w, qx] = a * grad_omega[k, w, qx]
return min( temp_distance, distance_to_point_on_equator( px_retrans_rad[0], px_retrans_rad[1], max(min(px_retrans_rad[0], lng_p1_rad), 0))) # @cc.export('int2coord', f8(i4)) @jit(f8(i4), nopython=True, cache=True) def int2coord(i4): return float(i4 / 10**7) # @cc.export('coord2int', i4(f8)) @jit(i4(f8), nopython=True, cache=True) def coord2int(double): return int(double * 10**7) # @cc.export('distance_to_polygon_exact', f8(f8, f8, i4, i4[:, :], f8[:, :])) @jit(f8(f8, f8, i4, i4[:, :], f8[:, :]), nopython=True, cache=True) def distance_to_polygon_exact(lng_rad, lat_rad, nr_points, points, trans_points): # transform all points (int) to coords (float) for i in range(nr_points): trans_points[0][i] = radians(int2coord(points[0][i])) trans_points[1][i] = radians(int2coord(points[1][i])) # check points -2, -1, 0 first pm1_lng = trans_points[0][0]
return signal_number*factor_number+offset_number # big endian assumption @njit(numba.u8(numba.u1[:],numba.u1,numba.u1, numba.u4)) def getBigEndiNumberFromBitNpArr(blist, idx, size, id): signal_number = 0 for i in range (0,size): signal_number = signal_number | (blist[idx+i] << (size - i - 1)) return signal_number @njit(numba.u1(numba.u1[:],numba.u1)) def getIsNegativeBigEndianNumberFormBitNpArr(blist, idx): return blist[idx] @njit(numba.i4(numba.u1[:], numba.u1, numba.u1[:], numba.u1[:], numba.u1[:], numba.u1[:], numba.f8[:], numba.f8[:], numba.u4)) # @njit((numba.u1[:], numba.u1, numba.u1[:], numba.u1[:], numba.u1[:], numba.u1[:], numba.f8[:], numba.f8[:], numba.u4)) def ppParseSignal(barray_unpacked, signal_no, signal_is_signed_types ,signal_start_bits ,signal_is_integers ,signal_sizes ,signal_offsets ,signal_factors , id): start_bit_idx = getArrayIdxFromStartBit(signal_start_bits[signal_no]) this_signal_number = getBigEndiNumberFromBitNpArr(barray_unpacked, start_bit_idx, signal_sizes[signal_no], id) # if id == 0x4e0: # print signal_no, signal_start_bits[signal_no], start_bit_idx, signal_sizes[signal_no], this_signal_number if signal_is_signed_types[signal_no] and getIsNegativeBigEndianNumberFormBitNpArr(barray_unpacked, start_bit_idx): this_signal_number = twosComplement(this_signal_number, signal_sizes[signal_no]) # if id == 0x4e0: # print this_signal_number # if signal_is_integers[signal_no]: # this_signal_number = this_signal_number*int(signal_factors[signal_no]) + int(signal_offsets[signal_no]) # else: # this_signal_number = this_signal_number*float(signal_factors[signal_no]) + float(signal_offsets[signal_no]) # if id == 0x4e0:
import torch import numpy as np import numpy.random as npr import numba as nb from tqdm import tqdm from attrdict import AttrDict #import pandas as pd import wget import os.path as osp from utils.paths import datasets_path @nb.njit(nb.i4(nb.f8[:])) def catrnd(prob): cprob = prob.cumsum() u = npr.rand() for i in range(len(cprob)): if u < cprob[i]: return i return i @nb.njit(nb.types.Tuple((nb.f8[:,:,:], nb.f8[:,:,:], nb.i4)) \ (nb.i4, nb.i4, nb.i4, \ nb.f8, nb.f8, nb.f8, nb.f8, nb.f8, nb.f8)) def _simulate_task(batch_size, num_steps, max_num_points, X0, Y0, theta0, theta1, theta2, theta3): time = np.zeros((batch_size, num_steps, 1)) pop = np.zeros((batch_size, num_steps, 2)) length = num_steps * np.ones((batch_size))
def k_to_ij(k): i = math.floor(float((1 + math.sqrt(1 + 8. * k))) / 2.) j = k - i * (i - 1) / 2. return i4(i), i4(j)
def cuda_k_to_ij(k): i = math.floor((f4(1) + f4(math.sqrt(f4(1) + f4(8.) * f4(k)))) * f4(.5)) j = f4(k) - f4(i) * (f4(i) - f4(1)) * f4(.5) return i4(i), i4(j)
return min( temp_distance, distance_to_point_on_equator( px_retrans_rad[0], px_retrans_rad[1], max(min(px_retrans_rad[0], lng_p1_rad), 0))) # @cc.export('int2coord', f8(i4)) @njit(f8(i4), cache=True) def int2coord(i4): return float(i4 * INT2COORD_FACTOR) # @cc.export('coord2int', i4(f8)) @njit(i4(f8), cache=True) def coord2int(double): return int(double * COORD2INT_FACTOR) # @cc.export('distance_to_polygon_exact', f8(f8, f8, i4, i4[:, :], f8[:, :])) @njit(f8(f8, f8, i4, i4[:, :], f8[:, :]), cache=True) def distance_to_polygon_exact(lng_rad, lat_rad, nr_points, points, trans_points): # transform all points (int) to coords (float) for i in range(nr_points): trans_points[0][i] = radians(int2coord(points[0][i])) trans_points[1][i] = radians(int2coord(points[1][i])) # check points -2, -1, 0 first pm1_lng = trans_points[0][0]
p1_cartesian = x_rotate(rotation_rad, pm1_cartesian) lng_p1_rad = atan2(p1_cartesian[1], p1_cartesian[0]) px_retrans_rad = cartesian2rad(*x_rotate(rotation_rad, px_cartesian)) return min(temp_distance, distance_to_point_on_equator(px_retrans_rad[0], px_retrans_rad[1], max(min(px_retrans_rad[0], lng_p1_rad), 0))) # @cc.export('int2coord', f8(i4)) @njit(f8(i4), cache=True) def int2coord(i4): return float(i4 * INT2COORD_FACTOR) # @cc.export('coord2int', i4(f8)) @njit(i4(f8), cache=True) def coord2int(double): return int(double * COORD2INT_FACTOR) # @cc.export('distance_to_polygon_exact', f8(f8, f8, i4, i4[:, :], f8[:, :])) @njit(f8(f8, f8, i4, i4[:, :], f8[:, :]), cache=True) def distance_to_polygon_exact(lng_rad, lat_rad, nr_points, points, trans_points): # transform all points (int) to coords (float) for i in range(nr_points): trans_points[0][i] = radians(int2coord(points[0][i])) trans_points[1][i] = radians(int2coord(points[1][i])) # check points -2, -1, 0 first pm1_lng = trans_points[0][0] pm1_lat = trans_points[1][0]