def gpuChambolleProjectionStopCriterion(f, mi=100, tau=0.25, tol=1e-5): ''' The 2D case of Chambolle projection algorithm. This version uses stop criterion. Source ------- Cywińska, Maria, Maciej Trusiak, and Krzysztof Patorski. "Automatized fringe pattern preprocessing using unsupervised variational image decomposition." Optics express 27.16 (2019): 22542-22562. Parameters ---------- f : cupy.ndarray image which is input for Chambolle mi : float regularization parameter that defines the separation of the energy between the fringes and noise components tau : float Chambolle projection step value tol : float error tolerance when algorithm should stop its work Returns ------- x2 : numpy.ndarray image with filtered background function n : int number of iterations that was needed to reach result image g_err : float error of the result image ''' n = 1 xi = cp.array([cp.zeros(f.shape), cp.zeros(f.shape)]) x1 = cp.zeros(f.shape) x2 = cp.zeros(f.shape) cp.cuda.Stream.null.synchronize() err_n = 0 err = [] pp = [] pr = 1 for _ in iter(int, 1): gdv = cp.array(gradient2DGPU(divergence2DGPU(xi) - f / mi)) d = cp.sqrt(cp.power(gdv[0], 2) + cp.power(gdv[1], 2)) d = cp.tile(d, [2, 1, 1]) xi = cp.divide(xi + tau * gdv, 1 + tau * d) # Reconstruction x2 = mi * divergence2DGPU(xi) # Tolerance num1 = cp.linalg.norm(x2 - x1, 2) num2 = cp.linalg.norm(f, 2) err.append(num1 / num2) g_err = cp.abs((err_n - err[n - 1]) / 2) err_n = err[n - 1] pp.append(g_err / err[0]) pr = pp[n - 1] x1 = x2 n = n + 1 if pr < tol: break return [x2, n, g_err]
def evaluate_chunks( results: [cp.ndarray, cp.ndarray, cp.ndarray], # closest triangle, distance, projection all_pts: cp.ndarray = None, vertices: cp.ndarray = None, edges: cp.ndarray = None, edge_norms: cp.ndarray = None, edge_normssq: cp.ndarray = None, normals: cp.ndarray = None, norms: cp.ndarray = None, normssq: cp.ndarray = None, zero_tensor: cp.ndarray = None, one_tensor: cp.ndarray = None, tris: cp.ndarray = None, vertex_normals: cp.ndarray = None, bounding_box: dict = None, chunk_size: int = None, num_verts: int = None) -> None: # # Expand vertex normals if non empty if vertex_normals is not None: vertex_normals = vertex_normals[tris] vertex_normals = cp.tile(cp.expand_dims(vertex_normals, axis=2), (1, 1, chunk_size, 1)) # begin = time.time() # # Load and extend the batch num_chunks = all_pts.shape[0] // chunk_size for i in range(num_chunks): # # Get subset of the query points start_index = i * chunk_size end_index = (i + 1) * chunk_size pts = all_pts[start_index:end_index, :] # # Match the dimensions to those assumed above. # REPEATED REPEATED # [triangle_index, vert_index, querypoint_index, coordinates] pts = cp.tile(cp.expand_dims(pts, axis=(0, 1)), (num_verts, 3, 1, 1)) # # Compute the differences between # vertices on each triangle and the # points of interest # # [triangle_index, vert_index, querypoint_index, coordinates] # =================== # [:,0,:,:] = p - p1 # [:,1,:,:] = p - p2 # [:,2,:,:] = p - p3 diff_vectors = pts - vertices # # Compute alpha, beta, gamma barycentric = cp.empty(diff_vectors.shape) # # gamma = u x (p - p1) barycentric[:, 2, :, :] = cp.cross(edges[:, 0, :, :], diff_vectors[:, 0, :, :]) # beta = (p - p1) x v barycentric[:, 1, :, :] = cp.cross(diff_vectors[:, 0, :, :], edges[:, 1, :, :]) # alpha = w x (p - p2) barycentric[:, 0, :, :] = cp.cross(edges[:, 2, :, :], diff_vectors[:, 1, :, :]) barycentric = cp.divide( cp.sum(cp.multiply(barycentric, normals), axis=3), normssq) # # Test conditions less_than_one = cp.less_equal(barycentric, one_tensor) more_than_zero = cp.greater_equal(barycentric, zero_tensor) # # if 0 <= gamma and gamma <= 1 # and 0 <= beta and beta <= 1 # and 0 <= alpha and alpha <= 1: cond1 = cp.logical_and(less_than_one, more_than_zero) # # if gamma <= 0: cond2 = cp.logical_not(more_than_zero[:, 2, :]) cond2 = cp.tile(cp.expand_dims(cond2, axis=1), (1, 3, 1)) # # if beta <= 0: cond3 = cp.logical_not(more_than_zero[:, 1, :]) cond3 = cp.tile(cp.expand_dims(cond3, axis=1), (1, 3, 1)) # # if alpha <= 0: cond4 = cp.logical_not(more_than_zero[:, 0, :]) cond4 = cp.tile(cp.expand_dims(cond4, axis=1), (1, 3, 1)) # # Get the projections for each case xi = cp.empty(barycentric.shape) barycentric_ext = cp.tile(cp.expand_dims(barycentric, axis=3), (1, 1, 1, 3)) proj = cp.sum(cp.multiply(barycentric_ext, vertices), axis=1) # # if 0 <= gamma and gamma <= 1 # and 0 <= beta and beta <= 1 # and 0 <= alpha and alpha <= 1: xi[cond1] = barycentric[cond1] # # if gamma <= 0: # x = p - p1 # u = p2 - p1 # a = p1 # b = p2 t2 = cp.divide( # # u.dot(x) cp.sum(cp.multiply(edges[:, 0, :, :], diff_vectors[:, 0, :, :]), axis=2), edge_normssq[:, 0]) xi2 = cp.zeros((t2.shape[0], 3, t2.shape[1])) xi2[:, 0, :] = -t2 + 1 xi2[:, 1, :] = t2 # t2 = cp.tile(cp.expand_dims(t2, axis=2), (1, 1, 3)) lz = cp.less(t2, cp.zeros(t2.shape)) go = cp.greater(t2, cp.ones(t2.shape)) proj2 = vertices[:, 0, :, :] + cp.multiply(t2, edges[:, 0, :, :]) proj2[lz] = vertices[:, 0, :, :][lz] proj2[go] = vertices[:, 1, :, :][go] # xi[cond2] = xi2[cond2] proj[cp.swapaxes(cond2, 1, 2)] = proj2[cp.swapaxes(cond2, 1, 2)] # # if beta <= 0: # x = p - p1 # v = p3 - p1 # a = p1 # b = p3 t3 = cp.divide( # # v.dot(x) cp.sum(cp.multiply(edges[:, 1, :, :], diff_vectors[:, 0, :, :]), axis=2), edge_normssq[:, 1]) xi3 = cp.zeros((t3.shape[0], 3, t3.shape[1])) xi3[:, 0, :] = -t3 + 1 xi3[:, 2, :] = t3 # t3 = cp.tile(cp.expand_dims(t3, axis=2), (1, 1, 3)) lz = cp.less(t3, cp.zeros(t3.shape)) go = cp.greater(t3, cp.ones(t3.shape)) proj3 = vertices[:, 0, :, :] + cp.multiply(t3, edges[:, 1, :, :]) proj3[lz] = vertices[:, 0, :, :][lz] proj3[go] = vertices[:, 2, :, :][go] # xi[cond3] = xi3[cond3] proj[cp.swapaxes(cond3, 1, 2)] = proj3[cp.swapaxes(cond3, 1, 2)] # # if alpha <= 0: # y = p - p2 # w = p3 - p2 # a = p2 # b = p3 t4 = cp.divide( # # w.dot(y) cp.sum(cp.multiply(edges[:, 2, :, :], diff_vectors[:, 1, :, :]), axis=2), edge_normssq[:, 2]) xi4 = cp.zeros((t4.shape[0], 3, t4.shape[1])) xi4[:, 1, :] = -t4 + 1 xi4[:, 2, :] = t4 # t4 = cp.tile(cp.expand_dims(t4, axis=2), (1, 1, 3)) lz = cp.less(t4, cp.zeros(t4.shape)) go = cp.greater(t4, cp.ones(t4.shape)) proj4 = vertices[:, 1, :, :] + cp.multiply(t4, edges[:, 2, :, :]) proj4[lz] = vertices[:, 1, :, :][lz] proj4[go] = vertices[:, 2, :, :][go] # xi[cond4] = xi4[cond4] proj[cp.swapaxes(cond4, 1, 2)] = proj4[cp.swapaxes(cond4, 1, 2)] vec_to_point = pts[:, 0, :, :] - proj distances = cp.linalg.norm(vec_to_point, axis=2) # n = "\n" # print(f"{pts[:,0,:,:]=}") # print(f"{proj=}") # print(f"{pts[:,0,:,:] - proj=}") # print(f"{distances=}") min_distances = cp.min(distances, axis=0) closest_triangles = cp.argmin(distances, axis=0) projections = proj[closest_triangles, np.arange(chunk_size), :] # # Distinguish close triangles is_close = cp.isclose(distances, min_distances) # # Determine sign signed_normal = normals[:, 0, :, :] if vertex_normals is not None: signed_normal = cp.sum(vertex_normals.transpose() * xi.transpose(), axis=2).transpose() is_negative = cp.less_equal( cp.sum(cp.multiply(vec_to_point, signed_normal), axis=2), 0.) # # Combine is_close_and_negative = cp.logical_and(is_close, is_negative) # # Determine if inside is_inside = cp.all(cp.logical_or(is_close_and_negative, cp.logical_not(is_close)), axis=0) # # Overwrite the signs of points # that are outside of the box if bounding_box is not None: # # Extract rotation_matrix = cp.asarray(bounding_box['rotation_matrix']) translation_vector = cp.asarray(bounding_box['translation_vector']) size = cp.asarray(bounding_box['size']) # # Transform transformed_pts = cp.dot( all_pts[start_index:end_index, :] - translation_vector, rotation_matrix) # # Determine if outside bbox inside_bbox = cp.all(cp.logical_and( cp.less_equal(0., transformed_pts), cp.less_equal(transformed_pts, size)), axis=1) # # Treat points outside bbox as # being outside of lumen print(f"{inside_bbox=}") is_inside = cp.logical_and(is_inside, inside_bbox) # # Apply sign to indicate whether the distance is # inside or outside the mesh. min_distances[is_inside] = -1 * min_distances[is_inside] # # Emplace results # [triangle_index, vert_index, querypoint_index, coordinates] results[0][start_index:end_index] = closest_triangles results[1][start_index:end_index] = min_distances results[2][start_index:end_index, :] = projections
def forward_pool(A_previous, stride, f, mode = "max"): ''' A forward pool step Calcul output shape : (1 + (x - f) / stride) Parameters ---------- A_previous : cp.array(examples, height, width, depth) Input images from the previous layer. stride : int Stride number. f : int Square filter dimension. mode : string, optional Pool mode 'mean' or 'max'. The default is "max". Returns ------- A : cp.array(examples, 1 + (height - f) / stride, 1 + (width - f) / stride, depth) Output layer image. ''' (m, n_H_prev, n_W_prev, n_C_prev) = A_previous.shape n_H = int(1 + (n_H_prev - f) / stride) n_W = int(1 + (n_W_prev - f) / stride) n_C = n_C_prev A = cp.zeros((m, n_H, n_W, n_C)) i0 = cp.repeat(cp.arange(f), f) i1 = stride * cp.repeat(cp.arange(n_W), n_H) j0 = cp.tile(cp.arange(f), f) j1 = stride * cp.tile(cp.arange(n_H), n_W) i = cp.reshape(i0, (-1, 1))+cp.reshape(i1, (1, -1)) j = cp.reshape(j0, (-1, 1))+cp.reshape(j1, (1, -1)) R = A_previous[:, i, j, :] if mode == "max": pl = cp.max(R, axis=1) elif mode == "mean": pl = cp.mean(R, axis=1) A = cp.reshape(pl, (m, n_H, n_W, n_C)) ''' for i in range(m): for h in range(n_H): vert_start = h*stride vert_end = h*stride+f for w in range(n_W): horiz_start = w*stride horiz_end = w*stride+f for c in range (n_C): a_prev_slice = A_previous[i, vert_start:vert_end, horiz_start:horiz_end, c] if mode == "max": A[i, h, w, c] = cp.max(a_prev_slice) elif mode == "mean": A[i, h, w, c] = cp.mean(a_prev_slice) ''' return A
def from_vec3(v: Vec3, length: int) -> Vec3List: vl = cp.tile(v.e, (length, 1)) return Vec3List(vl)
def mandelbrot_boundary_gpu(res_c, res, res_tile, iter_scale, growth_rate): ########parameters########## #res_c - initial resolution tested. I recommend using a value between 20 and 100 #res - final resolution tested. Values between 1000-5000 show that the set has a fractal dimension of 2 #res_tile - due to a high memory memory complexity (approx n^2), for testing high resolutions "tiling" is needed (see paper). This parameter controls the tile resolution. Set this parameter as high as memory allows. #Using a 1080 ti with 11GB of VRAM, I can calculate the set for resolution up to 10000x10000. For 8GB of VRAM, try res_tile = 7500; 6GB, 6000; 4GB, 5000. If you have less VRAM than 4GB, I recommend using your cpu instead of gpu. #iter_scale - Instead of calculating the mandelbrot set with a fixed amount iterations, the amount of iterations is based off of the resolution being calculated. Iter = current_res*iter_scale. #I recommend values between .5-5. #growth_rate - instead of testing all resolutions between res_c and res. I used an expontially increasing loop counter for the resolutions. growth_rate is the exponent. Must be greater than 1, the larger the value, the less resolutions are tested. #I recommend values between 1.05-1.3 ########################### #point counter, var holds total amount of points in the set for a given resolution point_c = 0 #var used to init x and y vars to store calculation results init_res = res_c #x and y tile counters, used as loop vars for looping tile over complex plane interval tile_c_x = 0 tile_c_y = 0 #determines final tile resolution tile_stop = int(res / res_tile) #tile counter, loops from 1 to tile_stop value tile_stop_c = 1 #main resolution loop, res_c works as both the initial resl parameter as well as the loop counter while res_c < res: #this logic statement controls whether or to tile. if res_c < res_tile: #calc iters for a given res_c value iterations = int(res_c * iter_scale) #clear gpu memory mempool = cp.get_default_memory_pool() mempool.free_all_blocks() #message to user on current res being calculated print("Next Resolution:", res_c, "x", res_c, ";Testing ", res_c**2, " points") #generate X and Y vectors, these vars will make up the real and imag parts of the complex plane matrix C X = cp.linspace(-2, 2, num=res_c).reshape((1, res_c)) Y = cp.linspace(-2, 2, num=res_c).reshape((res_c, 1)) C = cp.tile(X, (res_c, 1)) + 1j * cp.tile(Y, (1, res_c)) #Z matrix of zeros from mandelbrot set definition Z = cp.zeros((res_c, res_c), dtype=complex) #M is matrix of bools used to store whether or not a given point in c is in the set or not #if the abs value of a point of Z is greater than 2, that entry in M is made false, halting multiplication M = cp.full((res_c, res_c), True, dtype=bool) #perform calculations for i in tqdm(range(iterations)): Z[M] = Z[M] * Z[M] + C[M] M[cp.abs(Z) > 2] = False #Move the M array to CPU M_cpu = cp.asnumpy(M) #update user print("Now finding the set") #these nested loops loop over the logic set (which is now on the cpu) to find the amount of points in the boundary of the set for i in tqdm(range(res_c - 1)): for j in range(res_c - 1): a = M_cpu[i, j] b = M_cpu[(i + 1), j] c = M_cpu[i, (j + 1)] d = M_cpu[(i + 1), (j + 1)] if (a != b) or (a != c) or (a != d) or (b != c) or ( b != d) or (c != d): point_c = point_c + 1 else: pass #creates arrays x and y if first time through the loop #otherwise adds new entries to x and y #x and y are used to calc a linear regression, the slope of this regression is the estimate for the fractal dimension if res_c == init_res: x = np.array([m.log10(res_c)]) y = np.array([m.log10(point_c)]) else: x = np.append(x, [m.log10(res_c)], axis=0) y = np.append(y, [m.log10(point_c)], axis=0) #countinue loop res_c = m.floor(res_c * growth_rate) + 1 #reset point counter point_c = 0 else: #for any resolution greater than 'res_tile' (user specified parameter), the complex plane is "tiled" res_tile x res_tile sections, then the same calculations are performed on these tiles. #if res_tile = 5000, and res = 10000, then tile_stop will be 2. res_c = res_tile point_c = 0 #this loop contorls the amount of tiles needed for a given resolution. while tile_stop_c <= tile_stop: #update user print("Next Resolution: ", (res_c * tile_stop_c), "x", (res_c * tile_stop_c), ";Testing ", (res_c * tile_stop_c)**2, " points") #tot_c records the total amount of tiles that have been calculated, soley for user update tot_c = 1 #tile counters for x and y loop over the tile over the complex plane from -2 to 2 in both the real and imag axes tile_c_x = 0 tile_c_y = 0 #Since the complex plane has been broken up into tile_stop_c by tile_stop_c tiles, a double loop is needed to calculate the set over the whole complex plane. while tile_c_y < tile_stop_c: while tile_c_x < tile_stop_c: #update user print("Calculating", res_tile, "x", res_tile, "tile", tot_c, "out of", tile_stop_c**2) #Dynamically calculate iterations iterations = int(res_tile * iter_scale) #step tile calculates the offset of the tile from -2 step_tile = 4 / tile_stop_c #clear GPU memory mempool = cp.get_default_memory_pool() mempool.free_all_blocks() #generate complex plane from -2 to 2 on both the real and imag axes X = cp.linspace((-2 + (step_tile * tile_c_x)), (-2 + (step_tile * (tile_c_x + 1))), num=res_tile).reshape((1, res_tile)) Y = cp.linspace((-2 + (step_tile * tile_c_y)), (-2 + (step_tile * (tile_c_y + 1))), num=res_tile).reshape((res_tile, 1)) C = cp.tile( X, (res_tile, 1)) + 1j * cp.tile(Y, (1, res_tile)) Z = cp.zeros((res_tile, res_tile), dtype=complex) M = cp.full((res_tile, res_tile), True, dtype=bool) #calculate set for i in tqdm(range(iterations)): Z[M] = Z[M] * Z[M] + C[M] M[cp.abs(Z) > 2] = False #move M to cpu and calculate set size M_cpu = cp.asnumpy(M) #update user print("Now finding the set") #these nested loops loop over the logic set (which is now on the cpu) to find the amount of points in the boundary of the set for i in tqdm(range(res_tile - 1)): for j in range(res_tile - 1): a = M_cpu[i, j] b = M_cpu[(i + 1), j] c = M_cpu[i, (j + 1)] d = M_cpu[(i + 1), (j + 1)] if (a != b) or (a != c) or (a != d) or ( b != c) or (b != d) or (c != d): point_c = point_c + 1 else: pass #countinue inner loop tile_c_x = tile_c_x + 1 tot_c = tot_c + 1 #countinue outer loop tile_c_y = tile_c_y + 1 tile_c_x = 0 #outside of the doubly nested loop, record total plot resolution and total amount of points in the set x = np.append(x, [m.log10(res_c * tile_stop_c)], axis=0) y = np.append(y, [m.log10(point_c)], axis=0) #expontially iterate loop tile_stop_c = m.floor(tile_stop_c * growth_rate) + 1 #reset point counter point_c = 0 #exit main loop res_c = res #prepare x and y for linear regression x = x.reshape((-1, 1)) y = y.flatten() #calculate linear regression model = LinearRegression() model.fit(x, y) r_sq = model.score(x, y) #print results to user print('r^2:', r_sq) print("The Dimenionsality is:", model.coef_) print("The Scaling Factor is:", model.intercept_) #show loglog plot to user plt.plot(x, y, 'ro') plt.axis([0, 10, 0, 10]) plt.show()
def __call__(self, input_x, t): output = self.predictor(input_x) batch_size, _, grid_h, grid_w = output.shape self.seen += batch_size x, y, w, h, conf, prob = F.split_axis(F.reshape( output, (batch_size, self.predictor.n_boxes, self.predictor.n_classes + 5, grid_h, grid_w)), (1, 2, 3, 4, 5), axis=2) x = F.sigmoid(x) # xのactivation y = F.sigmoid(y) # yのactivation conf = F.sigmoid(conf) # confのactivation prob = F.transpose(prob, (0, 2, 1, 3, 4)) prob = F.softmax(prob) # probablitiyのacitivation # 教師データの用意 tw = xp.zeros( w.shape, dtype=xp.float32) # wとhが0になるように学習(e^wとe^hは1に近づく -> 担当するbboxの倍率1) th = xp.zeros(h.shape, dtype=xp.float32) tx = xp.tile(0.5, x.shape).astype(xp.float32) # 活性化後のxとyが0.5になるように学習() ty = xp.tile(0.5, y.shape).astype(xp.float32) if self.seen < self.unstable_seen: # centerの存在しないbbox誤差学習スケールは基本0.1 box_learning_scale = xp.tile(0.1, x.shape).astype(xp.float32) else: box_learning_scale = xp.tile(0, x.shape).astype(xp.float32) tconf = xp.zeros( conf.shape, dtype=xp.float32 ) # confidenceのtruthは基本0、iouがthresh以上のものは学習しない、ただしobjectの存在するgridのbest_boxのみ真のIOUに近づかせる conf_learning_scale = xp.tile(0.1, conf.shape).astype(xp.float32) tprob = prob.data.copy() # best_anchor以外は学習させない(自身との二乗和誤差 = 0) # 全bboxとtruthのiouを計算(batch単位で計算する) x_shift = Variable( xp.broadcast_to(xp.arange(grid_w, dtype=xp.float32), x.shape[1:])) y_shift = Variable( xp.broadcast_to( xp.arange(grid_h, dtype=xp.float32).reshape(grid_h, 1), y.shape[1:])) w_anchor = Variable( xp.broadcast_to( xp.reshape( xp.array(self.anchors, dtype=xp.float32)[:, 0], (self.predictor.n_boxes, 1, 1, 1)), w.shape[1:])) h_anchor = Variable( xp.broadcast_to( xp.reshape( xp.array(self.anchors, dtype=xp.float32)[:, 1], (self.predictor.n_boxes, 1, 1, 1)), h.shape[1:])) x_shift.to_gpu(), y_shift.to_gpu(), w_anchor.to_gpu(), h_anchor.to_gpu( ) best_ious = [] for batch in range(batch_size): n_truth_boxes = len(t[batch]) box_x = (x[batch] + x_shift) / grid_w box_y = (y[batch] + y_shift) / grid_h box_w = F.exp(w[batch]) * w_anchor / grid_w box_h = F.exp(h[batch]) * h_anchor / grid_h ious = [] for truth_index in range(n_truth_boxes): truth_box_x = Variable( xp.broadcast_to( xp.array(t[batch][truth_index]["x"], dtype=xp.float32), box_x.shape)) truth_box_y = Variable( xp.broadcast_to( xp.array(t[batch][truth_index]["y"], dtype=xp.float32), box_y.shape)) truth_box_w = Variable( xp.broadcast_to( xp.array(t[batch][truth_index]["w"], dtype=xp.float32), box_w.shape)) truth_box_h = Variable( xp.broadcast_to( xp.array(t[batch][truth_index]["h"], dtype=xp.float32), box_h.shape)) truth_box_x.to_gpu(), truth_box_y.to_gpu(), truth_box_w.to_gpu( ), truth_box_h.to_gpu() ious.append( multi_box_iou( Box(box_x, box_y, box_w, box_h), Box(truth_box_x, truth_box_y, truth_box_w, truth_box_h)).data.get()) ious = xp.array(ious) best_ious.append(xp.max(ious, axis=0)) best_ious = xp.array(best_ious) # 一定以上のiouを持つanchorに対しては、confを0に下げないようにする(truthの周りのgridはconfをそのまま維持)。 tconf[best_ious > self.thresh] = conf.data.get()[ best_ious > self.thresh] conf_learning_scale[best_ious > self.thresh] = 0 # objectの存在するanchor boxのみ、x、y、w、h、conf、probを個別修正 abs_anchors = self.anchors / xp.array([grid_w, grid_h]) for batch in range(batch_size): for truth_box in t[batch]: truth_w = int(float(truth_box["x"]) * grid_w) truth_h = int(float(truth_box["y"]) * grid_h) truth_n = 0 best_iou = 0.0 for anchor_index, abs_anchor in enumerate(abs_anchors): iou = box_iou( Box(0, 0, float(truth_box["w"]), float(truth_box["h"])), Box(0, 0, abs_anchor[0], abs_anchor[1])) if best_iou < iou: best_iou = iou truth_n = anchor_index # objectの存在するanchorについて、centerを0.5ではなく、真の座標に近づかせる。anchorのスケールを1ではなく真のスケールに近づかせる。学習スケールを1にする。 box_learning_scale[batch, truth_n, :, truth_h, truth_w] = 1.0 tx[batch, truth_n, :, truth_h, truth_w] = float(truth_box["x"]) * grid_w - truth_w ty[batch, truth_n, :, truth_h, truth_w] = float(truth_box["y"]) * grid_h - truth_h tw[batch, truth_n, :, truth_h, truth_w] = xp.log( float(truth_box["w"]) / abs_anchors[truth_n][0]) th[batch, truth_n, :, truth_h, truth_w] = xp.log( float(truth_box["h"]) / abs_anchors[truth_n][1]) tprob[batch, :, truth_n, truth_h, truth_w] = 0 tprob[batch, int(truth_box["label"]), truth_n, truth_h, truth_w] = 1 # IOUの観測 full_truth_box = Box(float(truth_box["x"]), float(truth_box["y"]), float(truth_box["w"]), float(truth_box["h"])) predicted_box = Box( (x[batch][truth_n][0][truth_h][truth_w].data.get() + truth_w) / grid_w, (y[batch][truth_n][0][truth_h][truth_w].data.get() + truth_h) / grid_h, xp.exp(w[batch][truth_n][0][truth_h][truth_w].data.get()) * abs_anchors[truth_n][0], xp.exp(h[batch][truth_n][0][truth_h][truth_w].data.get()) * abs_anchors[truth_n][1]) predicted_iou = box_iou(full_truth_box, predicted_box) tconf[batch, truth_n, :, truth_h, truth_w] = predicted_iou conf_learning_scale[batch, truth_n, :, truth_h, truth_w] = 10.0 # debug prints maps = F.transpose(prob[batch], (2, 3, 1, 0)).data print( "best confidences and best conditional probability and predicted class of each grid:" ) for i in range(grid_h): for j in range(grid_w): print("%2d" % (int(conf[batch, :, :, i, j].data.max() * 100)), end=" ") print(" ", end="") for j in range(grid_w): print("%2d" % (maps[i][j][int( maps[i][j].max(axis=1).argmax())].argmax()), end=" ") print(" ", end="") for j in range(grid_w): print("%2d" % (maps[i][j][int( maps[i][j].max(axis=1).argmax())].max() * 100), end=" ") print() print( "best default iou: %.2f predicted iou: %.2f confidence: %.2f class: %s" % (best_iou, predicted_iou, conf[batch][truth_n][0][truth_h][truth_w].data, t[batch][0]["label"])) print("-------------------------------") print("seen = %d" % self.seen) # loss計算 tx, ty, tw, th, tconf, tprob = Variable(tx), Variable(ty), Variable( tw), Variable(th), Variable(tconf), Variable(tprob) box_learning_scale, conf_learning_scale = Variable( box_learning_scale), Variable(conf_learning_scale) tx.to_gpu(), ty.to_gpu(), tw.to_gpu(), th.to_gpu(), tconf.to_gpu( ), tprob.to_gpu() box_learning_scale.to_gpu() conf_learning_scale.to_gpu() x_loss = F.sum((tx - x)**2 * box_learning_scale) / 2 y_loss = F.sum((ty - y)**2 * box_learning_scale) / 2 w_loss = F.sum((tw - w)**2 * box_learning_scale) / 2 h_loss = F.sum((th - h)**2 * box_learning_scale) / 2 c_loss = F.sum((tconf - conf)**2 * conf_learning_scale) / 2 p_loss = F.sum((tprob - prob)**2) / 2 print( "x_loss: %f y_loss: %f w_loss: %f h_loss: %f c_loss: %f p_loss: %f" % (F.sum(x_loss).data, F.sum(y_loss).data, F.sum(w_loss).data, F.sum(h_loss).data, F.sum(c_loss).data, F.sum(p_loss).data)) loss = x_loss + y_loss + w_loss + h_loss + c_loss + p_loss return loss
def tile(arr, rep): return cp.tile(arr, rep)
def forward(self, inputs): e1 = array.as_mat(inputs[0]) e2 = array.as_mat(inputs[1]) W = inputs[2] ''' xp = cuda.get_array_module(*inputs) if xp is numpy: y = numpy.einsum('ij,ik,jkl->il', e1, e2, W) else: i_len, j_len = e1.shape k_len = e2.shape[1] # 'ij,ik->ijk' e1e2 = e1[:, :, None] * e2[:, None, :] # ijk->i[jk] e1e2 = e1e2.reshape(i_len, j_len * k_len) # jkl->[jk]l W_mat = W.reshape(-1, W.shape[2]) # 'i[jk],[jk]l->il' y = e1e2.dot(W_mat) if len(inputs) == 6: V1, V2, b = inputs[3:] y += e1.dot(V1) y += e2.dot(V2) y += b ''' #modified forward calculation #print 'L7 e1.shape', #print e1.shape #print 'L7 e2.shape', #print e2.shape #print 'L7 W.shape', #print W.shape e1_cube = e1.reshape(len(e1), 1, -1).astype(dtype=e1.dtype, copy=False) #print 'L7 e1_cube.shape=', #print e1_cube.shape e1_tile = cupy.tile(e1_cube, (1, time_span, 1)).astype(dtype=e1.dtype, copy=False) #print 'L7 e1_tile.shape=', #print e1_tile.shape e2_cube = e2.reshape(len(e2), time_span, -1).astype(dtype=e1.dtype, copy=False) #print 'L7 e2_cube.shape=', #print e2_cube.shape y_cube = e1_tile * e2_cube #print 'L7 y_cube.shape=', #print y_cube.shape #print 'L7 y_cube.dtype=', #print y_cube.dtype y_sum = cupy.sum(y_cube, axis=2).astype(dtype=e1.dtype, copy=False) #print 'L7 y_sum.shape=', #print y_sum.shape y = y_sum.reshape(len(e1), -1).astype(dtype=e1.dtype, copy=False) #print 'L7 y.shape=', #print y.shape return y,
def _get_nearplane_gradients( nearplane, patches, psi, scan, probe, recover_psi=True, recover_probe=True, recover_positions=True, op=None, ): psi_update_numerator = cp.zeros(psi.shape, dtype='complex64') psi_update_denominator = cp.zeros(psi.shape, dtype='complex64') probe_update_numerator = cp.zeros(probe.shape, dtype='complex64') position_update_numerator = cp.zeros(scan.shape, dtype='float32') position_update_denominator = cp.zeros(scan.shape, dtype='float32') grad_x, grad_y = tike.ptycho.position._image_grad(op, patches) for m in range(probe.shape[-3]): diff = nearplane[..., [m], :, :] - (probe[..., [m], :, :] * patches) if recover_psi: grad_psi = cp.conj(probe[..., [m], :, :]) * diff psi_update_numerator = op.diffraction.patch.adj( patches=grad_psi[..., 0, 0, :, :], images=psi_update_numerator, positions=scan, ) probe_amp = probe[..., 0, m, :, :] * probe[..., 0, m, :, :].conj() # TODO: Allow this kind of broadcasting inside the patch operator if probe_amp.shape[-3] == 1: probe_amp = cp.tile(probe_amp, (scan.shape[-2], 1, 1)) psi_update_denominator = op.diffraction.patch.adj( patches=probe_amp, images=psi_update_denominator, positions=scan, ) if recover_probe: probe_update_numerator[..., [m], :, :] = cp.sum( cp.conj(patches) * diff, axis=-5, keepdims=True, ) if recover_positions: position_update_numerator[..., 0] += cp.sum( cp.real(cp.conj(grad_x * probe[..., [m], :, :]) * diff), axis=(-2, -1), )[..., 0, 0] position_update_denominator[..., 0] += cp.sum( cp.abs(grad_x * probe[..., [m], :, :])**2, axis=(-2, -1), )[..., 0, 0] position_update_numerator[..., 1] += cp.sum( cp.real(cp.conj(grad_y * probe[..., [m], :, :]) * diff), axis=(-2, -1), )[..., 0, 0] position_update_denominator[..., 1] += cp.sum( cp.abs(grad_y * probe[..., [m], :, :])**2, axis=(-2, -1), )[..., 0, 0] if recover_probe: probe_update_denominator = cp.sum( patches * patches.conj(), axis=-5, keepdims=True, ) else: probe_update_denominator = None return ( psi_update_numerator, psi_update_denominator, probe_update_numerator, probe_update_denominator, position_update_numerator, position_update_denominator, )
# print(B_agg) for ref_dist in range(0, len(dist_id_sel)): # Step 1: standardization of the climate data with reference period (1895-1994 on monthly basis) ICV at ICV proxy ref_dist B_j = B_agg[:, ref_dist] C_j = cp.transpose(cp.concatenate((locals()['ppt_sp_'+mdl+'_C'][:, ref_dist], locals()['ppt_sm_'+mdl+'_C'][:, ref_dist], locals()['ppt_fl_'+mdl+'_C'][:, ref_dist], locals()['ppt_wt_'+mdl+'_C'][:, ref_dist], locals()['tmax_sp_'+mdl+'_C'][:, ref_dist], locals()['tmax_sm_'+mdl+'_C'][:, ref_dist], locals()['tmax_fl_'+mdl+'_C'][:, ref_dist], locals()[ 'tmax_wt_'+mdl+'_C'][:, ref_dist], locals()['tmin_sp_'+mdl+'_C'][:, ref_dist], locals()['tmin_sm_'+mdl+'_C'][:, ref_dist], locals()['tmin_fl_'+mdl+'_C'][:, ref_dist], locals()['tmin_wt_'+mdl+'_C'][:, ref_dist], locals()['npp_'+mdl+"_C"][:, ref_dist])).reshape(13, len(locals()['ppt_sp_'+mdl+'_C']))) # C_j should be T_mon X n(var), T X K in Mohany's paper C_j_sd = C_j.std(axis=0) A_prime = A_agg/C_j_sd[:, None] B_j_prime = B_j/C_j_sd C_j_prime = C_j/C_j_sd # Step 2: principal component analyses on the reference matrix C, and principal components extraction C_j_prime_avg = cp.mean(C_j_prime, axis=0) C_j_prime_temp = cp.asnumpy(C_j_prime) m, n = np.shape(C_j_prime_temp) C_adj = [] C_j_prime_p_avg = cp.tile(C_j_prime_avg, (m, 1)) C_adj = C_j_prime - C_j_prime_p_avg # calculate the covariate matrix covC = cp.cov(C_adj.T) # solve its eigenvalues and eigenvectors covC_np = cp.asnumpy(covC) C_eigen_val, C_eigen_vec = np.linalg.eig(covC_np) # rank the eigenvalues: in here, I did not apply the truncation rule for the sake of limited variable availability index = cp.argsort(-C_eigen_val) C_eigen_val = C_eigen_val[index] C_eigen_vec = C_eigen_vec[:, index] finalData = [] # C matrix, corrected with PCA C_pca_vec = C_eigen_vec.T # A and B matrices, corrected with PCA A_prime_np = cp.asnumpy(A_prime)
def optimize(self,training_features, training_targets,weight_matrix): training_features = cupy.array(training_features) training_targets = cupy.array(training_targets) N = training_features.shape[0] M = weight_matrix.shape[1] tensor_of_x_features = cupy.tile(0.0,(N,1,training_features.shape[1])) tensor_of_x_squared = cupy.tile(0.0,(N,training_features.shape[1],training_features.shape[1])) matrix_set_diag_to_zero = cupy.tile(1.0,(training_features.shape[1],training_features.shape[1])) cupy.fill_diagonal(matrix_set_diag_to_zero,0.0) for i in range(N): tensor_of_x_features[i]=training_features[i] tensor_of_x_squared[i]=training_features[i].dot(training_features[i]) historical_gradient=cupy.tile(0.0,(weight_matrix.shape)) tensor_of_x_squared = tensor_of_x_squared*matrix_set_diag_to_zero tensor_of_x_features_squared = tensor_of_x_features*tensor_of_x_features tensor_of_proto_vx = cupy.tile(0.0,(N,1,M)) tensor_of_proto_square = cupy.tile(0.0,(N,1,M)) vector_of_prediction = cupy.tile(0.0,N) vector_of_sum = cupy.tile(1.0,(M,1)) vector_of_gradient = cupy.tile(0.0,N) weight_matrix_square = cupy.tile(0.0,(weight_matrix.shape)) update_step = cupy.tile(0.0,(weight_matrix.shape)) #batch_size = #numpy.floor(N/batch_count).astype(numpy.int32) batch_count = numpy.floor(N/self.batch_size).astype(numpy.int32) seed = 0 idxs = cupy.linspace(0,self.batch_size,self.batch_size,dtype=numpy.int32) patience_counter = 0 last_iteration_error = 0 #error_iter_array = numpy.tile(1,(iterations,1)) error_iter_array = numpy.empty(self.iterations, dtype=numpy.float32) for i in range(self.iterations): seed = seed + 1 cupy.random.seed(seed) numpy_rand_idx_list = numpy.random.permutation(N) random_idx_list = cupy.array(numpy_rand_idx_list) idxs = 0 init = 0 ending = 0 error_sum = 0 for j in range(batch_count): init = j*self.batch_size ending = (j+1)*self.batch_size idxs = random_idx_list[init:ending] weight_matrix[cupy.abs(weight_matrix)<0.0000001]=0 weight_matrix_square = weight_matrix*weight_matrix tensor_of_proto_vx = cupy.tensordot(tensor_of_x_features[idxs],weight_matrix,axes=1) tensor_of_proto_square = cupy.tensordot(tensor_of_x_features_squared[idxs],weight_matrix_square,axes=1) vector_of_prediction = cupy.tensordot(((tensor_of_proto_vx*tensor_of_proto_vx) - tensor_of_proto_square),vector_of_sum,axes=1).sum(axis=1)*0.5 b = training_targets[idxs]-vector_of_prediction #print(b.mean()) error_sum = error_sum+cupy.mean(b)#b.mean() vector_of_gradient = -2*b vrau = cupy.tensordot(tensor_of_x_squared[idxs],weight_matrix,axes=1) update_step = ((vector_of_gradient.T*vrau.T).T).sum(axis=0)+weight_matrix_square*self.regularization #ADAGRAD UPDATE historical_gradient += update_step * update_step weight_matrix -= self.alpha/(cupy.sqrt(historical_gradient)) * update_step#+0.000001 error_iter_array[i] = error_sum/batch_count if cupy.abs(cupy.abs(error_iter_array[i]) - last_iteration_error) < self.iteration_patience_threshold: patience_counter = patience_counter+1 else: patience_counter = 0 #RESET if patience_counter == self.iteration_patience: break # last_iteration_error = cupy.abs(error_iter_array[i]) return weight_matrix,error_iter_array.mean(),error_iter_array#return array with the most errors
def from_array(a): vl = cp.transpose(cp.tile(a, (3, 1))) return Vec3List(vl)
def from_vec3(v, length): v1 = cp.tile(v.e, (length, 1)) return Vec3List(v1)
def get_values(): rnd = numpy.random.RandomState(1) values = numpy.tile(rnd.uniform(0, 100, size=nx*ny//10), 10) return values
def sgd_subset(train_X, train_Y, iterations, alpha, regularization,weight_matrix): N = train_X.shape[0]#N = 6928 & 6928/866=8 M = weight_matrix.shape[1] tensor_of_x_features = cupy.tile(0.0,(N,1,trainX.shape[1])) tensor_of_x_squared = cupy.tile(0.0,(N,trainX.shape[1],trainX.shape[1])) matrix_set_diag_to_zero = cupy.tile(1.0,(trainX.shape[1],trainX.shape[1])) cupy.fill_diagonal(matrix_set_diag_to_zero,0.0) for i in range(N): tensor_of_x_features[i]=train_X[i] tensor_of_x_squared[i]=train_X[i].dot(train_X[i]) historical_gradient=cupy.tile(0.0,(weight_matrix.shape)) tensor_of_x_squared = tensor_of_x_squared*matrix_set_diag_to_zero tensor_of_x_features_squared = tensor_of_x_features*tensor_of_x_features tensor_of_proto_vx = cupy.tile(0.0,(N,1,M)) tensor_of_proto_square = cupy.tile(0.0,(N,1,M)) vector_of_prediction = cupy.tile(0.0,N) vector_of_sum = cupy.tile(1.0,(M,1)) vector_of_gradient = cupy.tile(0.0,N) weight_matrix_square = cupy.tile(0.0,(weight_matrix.shape)) update_step = cupy.tile(0.0,(weight_matrix.shape)) splits = 3#9*2#720 splits_minus_one = splits -1 n_minus_one = N -1 #print(numpy.floor(N/splits)) taker = numpy.floor(N/splits).astype(numpy.int32) seed = 0 #print(taker) idxs = cupy.linspace(start=0,stop=taker,num=taker)#,dtype=cupy.int32) for i in range(iterations): seed = seed + 1 cupy.random.seed(seed) numpy_rand_idx_list = numpy.random.permutation(N) random_idx_list = cupy.array(numpy_rand_idx_list) #skiper = 0 #idxs = 0 init = 0 ending = 0 for j in range(splits): init = j*taker ending = (j+1)*taker if j == (splits_minus_one): ending = n_minus_one idxs = random_idx_list[init:ending] weight_matrix[cupy.abs(weight_matrix)<0.0000001]=0 weight_matrix_square = weight_matrix*weight_matrix tensor_of_proto_vx = cupy.tensordot(tensor_of_x_features[idxs],weight_matrix,axes=1) tensor_of_proto_square = cupy.tensordot(tensor_of_x_features_squared[idxs],weight_matrix_square,axes=1) vector_of_prediction = cupy.tensordot(((tensor_of_proto_vx*tensor_of_proto_vx) - tensor_of_proto_square),vector_of_sum,axes=1).sum(axis=1)*0.5 b = train_Y[idxs]-vector_of_prediction #print(cupy.abs(b.mean())) vector_of_gradient = -2*b vrau = cupy.tensordot(tensor_of_x_squared[idxs],weight_matrix,axes=1) update_step = ((vector_of_gradient.T*vrau.T).T).sum(axis=0)+weight_matrix_square*regularization #ADAGRAD UPDATE historical_gradient += update_step * update_step weight_matrix -= alpha/(cupy.sqrt(historical_gradient)) * update_step#+0.000001 return weight_matrix
def backward(self, inputs, grad_outputs): e1 = array.as_mat(inputs[0]) e2 = array.as_mat(inputs[1]) W = inputs[2] gy = grad_outputs[0] #print 'L7 gy.shape', #print gy.shape ''' xp = cuda.get_array_module(*inputs) if xp is numpy: gW = numpy.einsum('ij,ik,il->jkl', e1, e2, gy) ge1 = numpy.einsum('ik,jkl,il->ij', e2, W, gy) ge2 = numpy.einsum('ij,jkl,il->ik', e1, W, gy) else: kern = cuda.reduce('T in0, T in1, T in2', 'T out', 'in0 * in1 * in2', 'a + b', 'out = a', 0, 'bilinear_product') e1_b = e1[:, :, None, None] # ij e2_b = e2[:, None, :, None] # ik gy_b = gy[:, None, None, :] # il W_b = W[None, :, :, :] # jkl gW = kern(e1_b, e2_b, gy_b, axis=0) # 'ij,ik,il->jkl' ge1 = kern(e2_b, W_b, gy_b, axis=(2, 3)) # 'ik,jkl,il->ij' ge2 = kern(e1_b, W_b, gy_b, axis=(1, 3)) # 'ij,jkl,il->ik' ret = ge1.reshape(inputs[0].shape), ge2.reshape(inputs[1].shape), gW if len(inputs) == 6: V1, V2, b = inputs[3:] gV1 = e1.T.dot(gy) gV2 = e2.T.dot(gy) gb = gy.sum(0) ge1 += gy.dot(V1.T) ge2 += gy.dot(V2.T) ret += gV1, gV2, gb ''' #modified backward calculation #calculate ge1 gy_cube = gy.reshape(len(gy), time_span, -1).astype(dtype=gy.dtype, copy=False) #print 'L7 gy_cube.shape=', #print gy_cube.shape gy_tile = cupy.tile(gy_cube, (1, 1, len(e1[0]))).astype(dtype=gy.dtype, copy=False) #print 'L7 gy_tile.shape=', #print gy_tile.shape e2_cube = e2.reshape(len(e2), time_span, -1).astype(dtype=gy.dtype, copy=False) #print 'L7 e2_cube.shape=', #print e2_cube.shape ge1_cube = gy_tile * e2_cube #print 'L7 ge1_cube.shape=', #print ge1_cube.shape #print 'L7 ge1_cube.dtype=', #print ge1_cube.dtype ge1_sum = cupy.sum(ge1_cube, axis=1).astype(dtype=gy.dtype, copy=False) #print 'L7 ge1_sum.shape=', #print ge1_sum.shape ge1 = ge1_sum.reshape(len(gy), -1).astype(dtype=gy.dtype, copy=False) #print 'L7 ge1.shape=', #print ge1.shape #calculate ge2 e1_cube = e1.reshape(len(e1), 1, -1).astype(dtype=gy.dtype, copy=False) #print 'L7 e1_cube.shape=', #print e1_cube.shape e1_tile = cupy.tile(e1_cube, (1, time_span, 1)).astype(dtype=gy.dtype, copy=False) #print 'L7 e1_tile.shape=', #print e1_tile.shape ge2_cube = e1_tile * gy_tile #print 'L7 ge2_cube.shape=', #print ge2_cube.shape #print 'L7 ge2_cube.dtype=', #print ge2_cube.dtype ge2 = ge2_cube.reshape(len(gy), -1).astype(dtype=gy.dtype, copy=False) #print 'L7 ge2.shape=', #print ge2.shape #print 'L7 W.shape=', #print W.shape gW = cupy.zeros((len(W), len(W[0]), len(W[0][0])), dtype=gy.dtype) #print 'L7 gW.shape=', #print gW.shape ret = ge1.reshape(inputs[0].shape), ge2.reshape(inputs[1].shape), gW return ret
def adj_all(self, nearplane, scan, probe, psi, overwrite=False, rpie=False): """Peform adj and adj_probe at the same time.""" assert probe.shape[:-4] == scan.shape[:-2] assert psi.shape[:-2] == scan.shape[:-2], (psi.shape, scan.shape) assert probe.shape[-4] == 1 or probe.shape[-4] == scan.shape[-2] assert nearplane.shape[:-3] == scan.shape[:-1], (nearplane.shape, scan.shape) patches = self.patch.fwd( # Could be xp.empty if scan positions are all in bounds patches=self.xp.zeros( (*scan.shape[:-2], scan.shape[-2] * nearplane.shape[-3], self.probe_shape, self.probe_shape), dtype='complex64', ), images=psi, positions=scan, patch_width=self.probe_shape, nrepeat=nearplane.shape[-3], ) patches = patches.reshape((*scan.shape[:-1], nearplane.shape[-3], self.probe_shape, self.probe_shape)) if rpie: patches_amp = self.xp.sum( patches * patches.conj(), axis=-4, keepdims=True, ) patches = patches.conj() patches *= nearplane[..., self.pad:self.end, self.pad:self.end] if not overwrite: nearplane = nearplane.copy() nearplane[..., self.pad:self.end, self.pad:self.end] *= probe.conj() if rpie: probe_amp = probe * probe.conj() # TODO: Allow this kind of broadcasting inside the patch operator probe_amp = cp.tile(probe_amp, (scan.shape[-2], 1, 1, 1)) probe_amp = self.patch.adj( patches=probe_amp.reshape( (*scan.shape[:-2], scan.shape[-2] * nearplane.shape[-3], *nearplane.shape[-2:])), images=self.xp.zeros((*scan.shape[:-2], self.nz, self.n), dtype='complex64'), positions=scan, patch_width=self.probe_shape, nrepeat=nearplane.shape[-3], ) apsi = self.patch.adj( patches=nearplane.reshape( (*scan.shape[:-2], scan.shape[-2] * nearplane.shape[-3], *nearplane.shape[-2:])), images=self.xp.zeros((*scan.shape[:-2], self.nz, self.n), dtype='complex64'), positions=scan, patch_width=self.probe_shape, nrepeat=nearplane.shape[-3], ) if rpie: return apsi, patches, patches_amp, probe_amp else: return apsi, patches
def backward(self, inputs, grad_outputs): e1 = array.as_mat(inputs[0]) e2 = array.as_mat(inputs[1]) W = inputs[2] gy = grad_outputs[0] print 'cupy.max(gy) = ', print cupy.max(gy) print 'cupy.min(gy) = ', print cupy.min(gy) #print 'backward' #print 'gy.shape', #print gy.shape ''' xp = cuda.get_array_module(*inputs) if xp is numpy: gW = numpy.einsum('ij,ik,il->jkl', e1, e2, gy) ge1 = numpy.einsum('ik,jkl,il->ij', e2, W, gy) ge2 = numpy.einsum('ij,jkl,il->ik', e1, W, gy) else: kern = cuda.reduce('T in0, T in1, T in2', 'T out', 'in0 * in1 * in2', 'a + b', 'out = a', 0, 'bilinear_product') e1_b = e1[:, :, None, None] # ij e2_b = e2[:, None, :, None] # ik gy_b = gy[:, None, None, :] # il W_b = W[None, :, :, :] # jkl gW = kern(e1_b, e2_b, gy_b, axis=0) # 'ij,ik,il->jkl' ge1 = kern(e2_b, W_b, gy_b, axis=(2, 3)) # 'ik,jkl,il->ij' ge2 = kern(e1_b, W_b, gy_b, axis=(1, 3)) # 'ij,jkl,il->ik' ''' #ge1_ext = e1*gy.astype(dtype=gy.dtype, copy=False) #Hadamard product #print 'ge1_ext.shape', #print ge1_ext.shape #ge1 = cupy.sum(ge1_ext, axis=1).astype(dtype=gy.dtype, copy=False) #print 'ge1.shape', #print ge1.shape ge1 = cupy.sum(gy, axis=1).reshape(len(gy), 1).astype(dtype=gy.dtype, copy=False) print 'cupy.max(ge1) = ', print cupy.max(ge1) print 'cupy.min(ge1) = ', print cupy.min(ge1) gy_sum = cupy.sum(gy, axis=1).reshape(len(gy), 1).astype(dtype=gy.dtype, copy=False) #print 'gy_sum.shape', #print gy_sum.shape gy_tile = cupy.tile(gy_sum, len(gy[0])).astype(dtype=gy.dtype, copy=False) #print 'gy_tile.shape', #print gy_tile.shape #print 'gy.shape', #print gy.shape #print 'gy_tile.shape', #print gy_tile.shape #print 'gy_tile / len(gy[0]).dtype', #print (gy_tile / len(gy[0])).dtype #ge_tmp1 = gy_tile / len(gy[0]) #ge_tmp2 = gy - gy_tile ge2 = (gy - gy_tile / len(gy[0])).astype(dtype=gy.dtype, copy=False) #print 'ge2.shape', #print ge2.shape print 'cupy.max(ge2) = ', print cupy.max(ge2) print 'cupy.min(ge2) = ', print cupy.min(ge2) gW = cupy.zeros(len(e1[0])*len(e2[0])*len(e2[0])).reshape(len(e1[0]), len(e2[0]), len(e2[0])).astype(dtype=gy.dtype, copy=False) #print 'gW.shape', #print gW.shape ret = ge1.reshape(inputs[0].shape), ge2.reshape(inputs[1].shape), gW if len(inputs) == 6: V1, V2, b = inputs[3:] gV1 = e1.T.dot(gy) gV2 = e2.T.dot(gy) gb = gy.sum(0) ge1 += gy.dot(V1.T) ge2 += gy.dot(V2.T) ret += gV1, gV2, gb #print 'len(ret)', #print len(ret) #print 'ret[0].shape', #print ret[0].shape #print 'ret[1].shape', #print ret[1].shape #print 'ret[2].shape', #print ret[2].shape return ret
def test_spacing(self): f = cp.array([0, 2.0, 3.0, 4.0, 5.0, 5.0]) f = cp.tile(f, (6, 1)) + f.reshape(-1, 1) x_uneven = cp.array([0.0, 0.5, 1.0, 3.0, 5.0, 7.0]) x_even = cp.arange(6.0) fdx_even_ord1 = cp.tile([2.0, 1.5, 1.0, 1.0, 0.5, 0.0], (6, 1)) fdx_even_ord2 = cp.tile([2.5, 1.5, 1.0, 1.0, 0.5, -0.5], (6, 1)) fdx_uneven_ord1 = cp.tile([4.0, 3.0, 1.7, 0.5, 0.25, 0.0], (6, 1)) fdx_uneven_ord2 = cp.tile([5.0, 3.0, 1.7, 0.5, 0.25, -0.25], (6, 1)) # evenly spaced for edge_order, exp_res in [(1, fdx_even_ord1), (2, fdx_even_ord2)]: res1 = gradient(f, 1.0, axis=(0, 1), edge_order=edge_order) res2 = gradient(f, x_even, x_even, axis=(0, 1), edge_order=edge_order) res3 = gradient(f, x_even, x_even, axis=None, edge_order=edge_order) for g1, g2 in zip(res1, res2): assert_array_equal(g1, g2) for g1, g2 in zip(res2, res3): assert_array_equal(g1, g2) assert_array_almost_equal(res1[0], exp_res.T) assert_array_almost_equal(res1[1], exp_res) res1 = gradient(f, 1.0, axis=0, edge_order=edge_order) res2 = gradient(f, x_even, axis=0, edge_order=edge_order) assert res1.shape == res2.shape assert_array_almost_equal(res2, exp_res.T) res1 = gradient(f, 1.0, axis=1, edge_order=edge_order) res2 = gradient(f, x_even, axis=1, edge_order=edge_order) assert res1.shape == res2.shape assert_array_equal(res2, exp_res) # unevenly spaced for edge_order, exp_res in [(1, fdx_uneven_ord1), (2, fdx_uneven_ord2)]: res1 = gradient(f, x_uneven, x_uneven, axis=(0, 1), edge_order=edge_order) res2 = gradient(f, x_uneven, x_uneven, axis=None, edge_order=edge_order) for g1, g2 in zip(res1, res2): assert_array_equal(g1, g2) assert_array_almost_equal(res1[0], exp_res.T) assert_array_almost_equal(res1[1], exp_res) res1 = gradient(f, x_uneven, axis=0, edge_order=edge_order) assert_array_almost_equal(res1, exp_res.T) res1 = gradient(f, x_uneven, axis=1, edge_order=edge_order) assert_array_almost_equal(res1, exp_res) # mixed res1 = gradient(f, x_even, x_uneven, axis=(0, 1), edge_order=1) res2 = gradient(f, x_uneven, x_even, axis=(1, 0), edge_order=1) assert_array_equal(res1[0], res2[1]) assert_array_equal(res1[1], res2[0]) assert_array_almost_equal(res1[0], fdx_even_ord1.T) assert_array_almost_equal(res1[1], fdx_uneven_ord1) res1 = gradient(f, x_even, x_uneven, axis=(0, 1), edge_order=2) res2 = gradient(f, x_uneven, x_even, axis=(1, 0), edge_order=2) assert_array_equal(res1[0], res2[1]) assert_array_equal(res1[1], res2[0]) assert_array_almost_equal(res1[0], fdx_even_ord2.T) assert_array_almost_equal(res1[1], fdx_uneven_ord2)
def backward(self, inputs, grad_outputs): e1 = array.as_mat(inputs[0]) e2 = array.as_mat(inputs[1]) W = inputs[2] gy = grad_outputs[0] print 'cupy.max(gy) = ', print cupy.max(gy) print 'cupy.min(gy) = ', print cupy.min(gy) #print 'backward' #print 'gy.shape', #print gy.shape ''' xp = cuda.get_array_module(*inputs) if xp is numpy: gW = numpy.einsum('ij,ik,il->jkl', e1, e2, gy) ge1 = numpy.einsum('ik,jkl,il->ij', e2, W, gy) ge2 = numpy.einsum('ij,jkl,il->ik', e1, W, gy) else: kern = cuda.reduce('T in0, T in1, T in2', 'T out', 'in0 * in1 * in2', 'a + b', 'out = a', 0, 'bilinear_product') e1_b = e1[:, :, None, None] # ij e2_b = e2[:, None, :, None] # ik gy_b = gy[:, None, None, :] # il W_b = W[None, :, :, :] # jkl gW = kern(e1_b, e2_b, gy_b, axis=0) # 'ij,ik,il->jkl' ge1 = kern(e2_b, W_b, gy_b, axis=(2, 3)) # 'ik,jkl,il->ij' ge2 = kern(e1_b, W_b, gy_b, axis=(1, 3)) # 'ij,jkl,il->ik' ''' #ge1_ext = e1*gy.astype(dtype=gy.dtype, copy=False) #Hadamard product #print 'ge1_ext.shape', #print ge1_ext.shape #ge1 = cupy.sum(ge1_ext, axis=1).astype(dtype=gy.dtype, copy=False) #print 'ge1.shape', #print ge1.shape ge1 = cupy.sum(gy, axis=1).reshape(len(gy), 1).astype(dtype=gy.dtype, copy=False) print 'cupy.max(ge1) = ', print cupy.max(ge1) print 'cupy.min(ge1) = ', print cupy.min(ge1) gy_sum = cupy.sum(gy, axis=1).reshape(len(gy), 1).astype(dtype=gy.dtype, copy=False) #print 'gy_sum.shape', #print gy_sum.shape gy_tile = cupy.tile(gy_sum, len(gy[0])).astype(dtype=gy.dtype, copy=False) #print 'gy_tile.shape', #print gy_tile.shape #print 'gy.shape', #print gy.shape #print 'gy_tile.shape', #print gy_tile.shape #print 'gy_tile / len(gy[0]).dtype', #print (gy_tile / len(gy[0])).dtype #ge_tmp1 = gy_tile / len(gy[0]) #ge_tmp2 = gy - gy_tile ge2 = (gy - gy_tile / len(gy[0])).astype(dtype=gy.dtype, copy=False) #print 'ge2.shape', #print ge2.shape print 'cupy.max(ge2) = ', print cupy.max(ge2) print 'cupy.min(ge2) = ', print cupy.min(ge2) gW = cupy.zeros(len(e1[0]) * len(e2[0]) * len(e2[0])).reshape( len(e1[0]), len(e2[0]), len(e2[0])).astype(dtype=gy.dtype, copy=False) #print 'gW.shape', #print gW.shape ret = ge1.reshape(inputs[0].shape), ge2.reshape(inputs[1].shape), gW if len(inputs) == 6: V1, V2, b = inputs[3:] gV1 = e1.T.dot(gy) gV2 = e2.T.dot(gy) gb = gy.sum(0) ge1 += gy.dot(V1.T) ge2 += gy.dot(V2.T) ret += gV1, gV2, gb #print 'len(ret)', #print len(ret) #print 'ret[0].shape', #print ret[0].shape #print 'ret[1].shape', #print ret[1].shape #print 'ret[2].shape', #print ret[2].shape return ret
def affine_position_regularization( op, psi, probe, original, updated, max_error=None, ): """Regularize position updates with an affine deformation constraint. Assume that the true position updates are a global affine transformation plus some random error. The regularized positions are then weighted average of the affine deformation applied to the original positions and the updated positions. The affine deformation, X, is represented as a (..., 2, 2) array such that updated = original @ X. X may be decomposed into scale, rotation, and shear operations. Parameters ---------- original (..., N, 2) The original scanning positions. updated (..., N, 2) The updated scanning positions. Returns ------- regularized (..., N, 2) The updated scanning regularized with affine deformation. transformation (..., 2, 2) The global affine transformation References ---------- This algorithm copied from ptychoshelves. """ # Estimate the reliability of each updated position based on the content of # the patch of the object at that position; smooth patches are less # reliable than patches with interesting features. This position relability # is some imperical formula based on weighting the local image gradient of # the object by the amount of illumination it recieved. obj_proj = op.diffraction.patch.fwd( images=psi / cp.max(cp.abs(psi), axis=(-1, -2), keepdims=True), positions=updated, patch_width=probe.shape[-1], ) nx, ny = obj_proj.shape[-2:] X, Y = cp.mgrid[-ny // 2:ny // 2, -nx // 2:nx // 2] spatial_filter = cp.exp(-(X**16 + Y**16) / (min(nx, ny) / 2.2)**16) obj_proj *= spatial_filter dX, dY = _image_grad(op, obj_proj) illum = probe[..., :, 0, 0, :, :] illum = illum * illum.conj() illum = cp.tile(illum, (1, updated.shape[-2], 1, 1)) sigma = probe.shape[-1] / 10 total_illumination = op.diffraction.patch.adj( patches=illum, images=cp.zeros(psi.shape, dtype='complex64'), positions=updated, ) total_illumination = op.propagation._fft2(total_illumination) total_illumination *= _gaussian_frequency( sigma=sigma, size=total_illumination.shape[-1], ) total_illumination *= _gaussian_frequency( sigma=sigma, size=total_illumination.shape[-2], )[..., None] total_illumination = op.propagation._ifft2(total_illumination) illum_proj = op.diffraction.patch.fwd( images=total_illumination, positions=updated, patch_width=probe.shape[-1], ) dX = abs(dX) * illum_proj.real * illum.real dY = abs(dY) * illum_proj.real * illum.real total_variation = np.stack( ( cp.sqrt(cp.mean(dX, axis=(-1, -2))), cp.sqrt(cp.mean(dY, axis=(-1, -2))), ), axis=-1, ) position_reliability = total_variation**4 / cp.mean( total_variation**4, axis=-2, keepdims=True) # Use weighted least squares to find the global affine transformation, X. # The two columns of X are independent; we solve separtely so we can use # different weights in each direction. # TODO: Use homogenous coordinates to add shifts into model X = cp.empty((*updated.shape[:-2], 2, 2), dtype='float32') X[..., 0:1] = tike.linalg.lstsq( b=updated[..., 0:1], a=original, weights=position_reliability[..., 0], ) X[..., 1:2] = tike.linalg.lstsq( b=updated[..., 1:2], a=original, weights=position_reliability[..., 1], ) logger.info(f'affine position error:\n{X}') # TODO: Decompose X into scale, rotate, shear operations. # Remove non-affine and unwanted transformations # scale, rotate, shear = _decompose_transformation() # X = scale @ rotate @ shear # Regularize the positions based on the position reliability and distance # from the original positions. relax = 0.1 # Constrain more the probes in flat regions W = relax * (1 - (position_reliability / (1 + position_reliability))) # Penalize positions with a large random error if max_error is not None: random_error = updated - original @ X W = cp.minimum( 10 * relax, W + cp.maximum(0, random_error - max_error)**2 / max_error**2, ) return (1 - W) * updated + W * original @ X, X
def forward_conv(A_previous, Filter, Bias, pad, stride, function = 'identity', verbose = False): ''' A forward convolution step. Calcul output shape : ((x-f+2*pad)/stride)+1 Parameters ---------- A_previous : cp.array(examples, height, width, depth) Input images from the previous layer. Filter : cp.array(f, f, depth, number of filter) Filter to convolve with the input image. Bias : cp.array(1, 1, 1, number of filter) Bias for each filter. pad : int Padding edge width. stride : int Stride number. Returns ------- Z : cp.array(examples, ((h-f+2*pad)/stride)+1, ((w-f+2*pad)/stride)+1), number of filter) Output layer image. ''' (m, n_H_prev, n_W_prev, n_C_prev) = A_previous.shape (f, f, n_C_prev, n_C) = Filter.shape mu = cp.mean(Filter) s = cp.std(Filter) Filter = (Filter-mu)/(s+1e-5) n_H = int(((n_H_prev-f+2*pad)/stride)+1) n_W = int(((n_W_prev-f+2*pad)/stride)+1) Z = cp.zeros([m, n_H, n_W, n_C]) A_prev_pad = cp.pad(A_previous, ((0,0), (pad,pad), (pad,pad), (0,0),), mode='constant', constant_values = (0,0)) i0 = cp.repeat(cp.arange(f), f) i1 = stride * cp.repeat(cp.arange(n_W), n_H) j0 = cp.tile(cp.arange(f), f) j1 = stride * cp.tile(cp.arange(n_H), n_W) i = cp.reshape(i0, (-1, 1))+cp.reshape(i1, (1, -1)) j = cp.reshape(j0, (-1, 1))+cp.reshape(j1, (1, -1)) k = cp.reshape(cp.repeat(cp.arange(n_C_prev), f**2), (-1, 1)) Ztest = A_prev_pad[:, i, j, :] weights = cp.reshape(Filter, (f**2, n_C_prev, n_C)) conV = cp.tensordot(weights, Ztest, ((0, 1), (1, 3))) Z = cp.reshape(cp.transpose(conV, (1, 2, 0)), (m, n_H, n_W, n_C)) + Bias Z = activation('forward', function, Z) if(verbose): print("Filter :") print(Filter) print("Weights :") print(weights) print("Z :") print(Ztest) print("Conv :") print(conV) print("Result :") print(Z) ''' for i in range(m): a_prev_pad = A_prev_pad[i, :, :, :] for h in range(n_H): vert_start = h*stride vert_end = h*stride+f for w in range(n_W): horiz_start = w*stride horiz_end = w*stride+f a_slice_prev = a_prev_pad[vert_start:vert_end, horiz_start:horiz_end, :] for c in range(n_C): Z[i, h, w, c] = cp.squeeze(cp.sum(a_slice_prev*Filter[:, :, :, c])+Bias[:, :, :, c]) ''' return Z
def P_generator_SL(MatingPool, Pop_Gradient, Boundary, Coding, MaxOffspring): N, D = MatingPool.shape if MaxOffspring < 1 or MaxOffspring > N: MaxOffspring = N if Coding == "Real": ProC = 1 ProM = 1 / D DisC = 20 DisM = 20 Out = Pop_Gradient Offspring = np.zeros((N, D)) for i in range(0, N, 2): flag = np.random.rand(1) > 0.5 #>1 时 miu1 = np.random.rand(D, ) / 2 miu2 = np.random.rand(D, ) / 2 + 0.5 miu_temp = np.random.random((D, )) dictor = MatingPool[i, :] > MatingPool[i + 1, :] MatingPool[i][dictor], MatingPool[i + 1][dictor] = MatingPool[ i + 1][dictor], MatingPool[i][dictor] Out[i][dictor], Out[i + 1][dictor] = Out[i + 1][dictor], Out[i][dictor] G_temp = Out[i:i + 2, :].copy() ## L = G_temp[0, :].copy() P = miu1.copy() P[L > 0] = miu2[L > 0].copy() P[L == 0] = miu_temp[L == 0].copy() miu = P.copy() beta = np.zeros((D, )) beta[miu <= 0.5] = (2 * miu[miu <= 0.5])**(1 / (DisC + 1)) beta[miu > 0.5] = (2 - 2 * miu[miu > 0.5])**(-1 / (DisC + 1)) beta[np.random.random((D, )) > ProC] = 1 if flag == True: beta[MatingPool[i] == 0] = 1 Offspring[i, :] = ( (MatingPool[i, :] + MatingPool[i + 1, :]) / 2) + (np.multiply( beta, (MatingPool[i, :] - MatingPool[i + 1, :]) / 2)) ## L = -G_temp[0, :].copy() P = miu1.copy() P[L > 0] = miu2[L > 0].copy() P[L == 0] = miu_temp[L == 0].copy() miu = P.copy() beta = np.zeros((D, )) beta[miu <= 0.5] = (2 * miu[miu <= 0.5])**(1 / (DisC + 1)) beta[miu > 0.5] = (2 - 2 * miu[miu > 0.5])**(-1 / (DisC + 1)) beta[np.random.random((D, )) > ProC] = 1 if flag == True: beta[MatingPool[i + 1] == 0] = 1 Offspring[i + 1, :] = ( (MatingPool[i, :] + MatingPool[i + 1, :]) / 2) - (np.multiply( beta, (MatingPool[i, :] - MatingPool[i + 1, :]) / 2)) Out[i][dictor], Out[i + 1][dictor] = Out[i + 1][dictor], Out[i][dictor] # k1 = np.random.rand(D, ) > 0.5 L = G_temp[0, :].copy() k2 = Offspring[i, :] != 0 kl1 = np.bitwise_and(k1, L < 0) L = -G_temp[1, :].copy() k2 = Offspring[i + 1, :] != 0 # kl2 = np.bitwise_and(np.bitwise_and(k1, L < 0), k2) kl2 = np.bitwise_and(k1, L < 0) Offspring[i][kl1], Offspring[i + 1][kl2] = Offspring[ i + 1][kl1], Offspring[i][kl2] Out[i][kl1], Out[i + 1][kl2] = Out[i + 1][kl1], Out[i][kl2] Offspring[i][dictor], Offspring[i + 1][dictor] = Offspring[ i + 1][dictor], Offspring[i][dictor] Offspring_temp = Offspring[:MaxOffspring, :].copy() Offspring = Offspring_temp if MaxOffspring == 1: MaxValue = Boundary[0, :] MinValue = Boundary[1, :] else: MaxValue = np.tile(Boundary[0, :], (MaxOffspring, 1)) MinValue = np.tile(Boundary[1, :], (MaxOffspring, 1)) # k = np.random.random((MaxOffspring, D)) miu = np.random.random((MaxOffspring, D)) Temp = np.bitwise_and(k <= ProM, miu < 0.5) # Offspring[Temp] = Offspring[Temp] + np.multiply((MaxValue[Temp] - MinValue[Temp]), # ((2 * miu[Temp] + np.multiply( # 1 - 2 * miu[Temp], # (1 - (Offspring[Temp] - MinValue[Temp]) / ( # MaxValue[Temp] - MinValue[Temp])) ** ( # DisM + 1))) ** (1 / ( # DisM + 1)) - 1)) Offspring[Temp] = 0 Temp = np.bitwise_and(k <= ProM, miu >= 0.5) # # Offspring[Temp] = Offspring[Temp] + np.multiply((MaxValue[Temp] - MinValue[Temp]), # (1 - ((2 * (1 - miu[Temp])) + np.multiply( # 2 * (miu[Temp] - 0.5), # (1 - (MaxValue[Temp] - Offspring[Temp]) / ( # MaxValue[Temp] - MinValue[Temp])) ** ( # DisM + 1))) ** (1 / ( # DisM + 1)))) Offspring[Temp] = 0 Offspring[Offspring > MaxValue] = MaxValue[Offspring > MaxValue] Offspring[Offspring < MinValue] = MinValue[Offspring < MinValue] elif Coding == "Binary": Offspring = [] elif Coding == "DE": Offspring = [] return Offspring
def backward_conv(dZ, A_previous, Filter, Bias, pad, stride, function = 'identity'): ''' A backward convolution step Parameters ---------- dZ : cp.array(examples, ((h-f+2*pad)/stride)+1, ((w-f+2*pad)/stride)+1), number of filter) Cost derivative from the l+1 layer. A_previous : cp.array(examples, height, width, depth) Output image from the l-1 layer. Filter : cp.array(f, f, depth, number of filter) Convolutionnal filter. Bias : cp.array(1, 1, 1, number of filter) Bias respective to each filter. pad : int Padding parameter. stride : int Stride parameter. Returns ------- dA : cp.array(examples, height, width, depth) Cost derivative from the current layer. dFilter : cp.array(f, f, depth, number of filter) Cost derivative from filter. dBias : cp.array(1, 1, 1, number of filter) Cost derivative from Bias. ''' dZ = activation('backward', function, 1, dZ) (m, n_H_prev, n_W_prev, n_C_prev) = A_previous.shape (f, f, n_C_prev, n_C) = Filter.shape (m, n_H, n_W, n_C) = dZ.shape dA = cp.zeros((m, n_H_prev, n_W_prev, n_C_prev)) dFilter = cp.zeros((f, f, n_C_prev, n_C)) dBias = cp.zeros((1, 1, 1, n_C)) dBias = cp.sum(dZ, axis=(0, 1, 2)) A_prev_pad = cp.pad(A_previous, ((0,0), (pad,pad), (pad,pad), (0,0),), mode='constant', constant_values = (0,0)) dA_prev_pad = cp.pad(dA, ((0,0), (pad,pad), (pad,pad), (0,0),), mode='constant', constant_values = (0,0)) i0 = cp.repeat(cp.arange(f), f) i1 = stride * cp.repeat(cp.arange(n_W), n_H) j0 = cp.tile(cp.arange(f), f) j1 = stride * cp.tile(cp.arange(n_H), n_W) i = cp.reshape(i0, (-1, 1))+cp.reshape(i1, (1, -1)) j = cp.reshape(j0, (-1, 1))+cp.reshape(j1, (1, -1)) Ztest = A_prev_pad[:, i, j, :] dZtest = cp.reshape(dZ, (m, -1, n_C)) dFiltertest = cp.tensordot(dZtest, cp.transpose(Ztest, (1, 0, 2, 3)), ((0, 1), (1, 2))) dFilter = cp.reshape(cp.transpose(dFiltertest, (1, 2, 0)), (f, f, n_C_prev, n_C)) dZ = cp.reshape(cp.transpose(dZ, (3, 1, 2, 0)), (n_C, -1)) weights = cp.reshape(cp.transpose(Filter, (3, 1, 2, 0)), (n_C, -1)) dA_prev_pad = cp.dot(weights.T, dZ) strPad = "same" if(pad==0): strPad = "valid" dA = Utils.column_to_image(dA_prev_pad, (m, n_C_prev, n_H_prev, n_W_prev), (f, f), stride, strPad) ''' Intuitive way (Really not optimized) for i in range(m): a_prev_pad = A_prev_pad[i, :, :, :] da_prev_pad = dA_prev_pad[i, :, :, :] for h in range(n_H): vert_start = h*stride vert_end = h*stride + f for w in range(n_W): horiz_start = w*stride horiz_end = w*stride + f a_slice = a_prev_pad[vert_start:vert_end, horiz_start:horiz_end, :] for c in range(n_C): da_prev_pad[vert_start:vert_end, horiz_start:horiz_end, :] += Filter[:,:,:,c] * dZ[i, h, w, c] #dFilter[:,:,:,c] += a_slice * dZ[i, h, w, c] #dBias[:,:,:,c] += dZ[i, h, w, c] dA[i, :, :, :] = da_prev_pad[pad:da_prev_pad.shape[0]-pad, pad:da_prev_pad.shape[1]-pad, :] ''' return dA, dFilter, dBias
def rezToPhy(ctx, dat_path=None, output_dir=None): # pull out results from kilosort's rez to either return to workspace or to # save in the appropriate format for the phy GUI to run on. If you provide # a savePath it should be a folder savePath = output_dir Path(savePath).mkdir(exist_ok=True, parents=True) ctx = checkClusters(ctx) # check clusters integrity probe = ctx.probe ir = ctx.intermediate params = ctx.params nt0 = params.nt0 # spikeTimes will be in samples, not seconds W = cp.asarray(ir.Wphy).astype(np.float32) Wrot = ir.Wrot est_contam_rate = ir.est_contam_rate good = ir.good Ths = ir.Ths st3 = cp.asarray(ir.st3_c) U = cp.asarray(ir.U_s).astype(np.float32) iNeigh = ir.iNeigh_s iNeighPC = ir.iNeighPC_s simScore = ir.simScore_s if st3.shape[1] > 4: st3 = st3[:, :4] isort = cp.argsort(st3[:, 0]) st3 = st3[isort, :] # cProj = ir.cProj_c[cp.asnumpy(isort), :] # cProjPC = ir.cProjPC_c[cp.asnumpy(isort), :, :] fs = os.listdir(savePath) for file in fs: if file.endswith('.npy'): os.remove(join(savePath, file)) if os.path.isdir(join(savePath, '.phy')): shutil.rmtree(join(savePath, '.phy')) spikeTimes = st3[:, 0].astype(cp.uint64) spikeTemplates = st3[:, 1].astype(cp.uint32) # (DEV_NOTES) if statement below seems useless due to above if statement if st3.shape[1] > 4: spikeClusters = (1 + st3[:, 4]).astype(cp.uint32) # templateFeatures = cProj templateFeatureInds = iNeigh.astype(cp.uint32) # pcFeatures = cProjPC pcFeatureInds = iNeighPC.astype(cp.uint32) whiteningMatrix = cp.asarray(Wrot) / params.scaleproc whiteningMatrixInv = cp.linalg.pinv(whiteningMatrix) amplitudes = st3[:, 2] Nchan = probe.Nchan xcoords = probe.xc ycoords = probe.yc chanMap = probe.chanMap chanMap0ind = chanMap # - 1 nt0, Nfilt = W.shape[:2] # (DEV_NOTES) 2 lines below can be combined # templates = cp.einsum('ikl,jkl->ijk', U, W).astype(cp.float32) # templates = cp.zeros((Nchan, nt0, Nfilt), dtype=np.float32, order='F') tempAmpsUnscaled = cp.zeros(Nfilt, dtype=np.float32) templates_writer = NpyWriter(join(savePath, 'templates.npy'), (Nfilt, nt0, Nchan), np.float32) for iNN in tqdm(range(Nfilt), desc="Computing templates"): t = cp.dot(U[:, iNN, :], W[:, iNN, :].T).T templates_writer.append(t) t_unw = cp.dot(t, whiteningMatrixInv) assert t_unw.ndim == 2 tempChanAmps = t_unw.max(axis=0) - t_unw.min(axis=0) tempAmpsUnscaled[iNN] = tempChanAmps.max() templates_writer.close() # templates = cp.transpose(templates, (2, 1, 0)) # now it's nTemplates x nSamples x nChannels # we include all channels so this is trivial templatesInds = cp.tile(np.arange(Nfilt), (Nchan, 1)) # here we compute the amplitude of every template... # unwhiten all the templates # tempsUnW = cp.einsum('ijk,kl->ijl', templates, whiteningMatrixinv) # tempsUnW = cp.zeros(templates.shape, dtype=np.float32, order='F') # for t in tqdm(range(templates.shape[0]), desc="Unwhitening the templates"): # tempsUnW[t, :, :] = cp.dot(templates[t, :, :], whiteningMatrixInv) # The amplitude on each channel is the positive peak minus the negative # tempChanAmps = tempsUnW.max(axis=1) - tempsUnW.min(axis=1) # The template amplitude is the amplitude of its largest channel # tempAmpsUnscaled = tempChanAmps.max(axis=1) # assign all spikes the amplitude of their template multiplied by their # scaling amplitudes # tempAmpsUnscaled = cp.(tempAmpsUnscaled, axis=0).astype(np.float32) spikeAmps = tempAmpsUnscaled[spikeTemplates] * amplitudes # take the average of all spike amps to get actual template amps (since # tempScalingAmps are equal mean for all templates) ta = clusterAverage(spikeTemplates, spikeAmps) tids = cp.unique(spikeTemplates).astype(np.int64) tempAmps = cp.zeros_like(tempAmpsUnscaled, order='F') tempAmps[ tids] = ta # because ta only has entries for templates that had at least one spike tempAmps = params.gain * tempAmps # for consistency, make first dimension template number save_pcs(ir.spikes_to_remove, ir.cProj, ir.cProjPC, savePath, st3, isort) # with open(, 'wb') as fp: # save_large_array(fp, templateFeatures) # cProj = ir.cProj_c[cp.asnumpy(isort), :] # cProjPC = ir.cProjPC_c[cp.asnumpy(isort), :, :] def _save(name, arr, dtype=None): cp.save(join(savePath, name + '.npy'), arr.astype(dtype or arr.dtype)) if savePath is not None: _save('spike_times', spikeTimes) _save('spike_templates', spikeTemplates, cp.uint32) if st3.shape[1] > 4: _save('spike_clusters', spikeClusters, cp.uint32) else: _save('spike_clusters', spikeTemplates, cp.uint32) _save('amplitudes', amplitudes) # _save('templates', templates) _save('templates_ind', templatesInds) chanMap0ind = chanMap0ind.astype(cp.int32) _save('channel_map', chanMap0ind) _save('channel_positions', np.c_[xcoords, ycoords]) # _save('template_features', templateFeatures) # with open(join(savePath, 'template_features.npy'), 'wb') as fp: # save_large_array(fp, templateFeatures) _save('template_feature_ind', templateFeatureInds.T) # _save('pc_features', pcFeatures) # with open(join(savePath, 'pc_features.npy'), 'wb') as fp: # save_large_array(fp, pcFeatures) _save('pc_feature_ind', pcFeatureInds.T) _save('whitening_mat', whiteningMatrix) _save('whitening_mat_inv', whiteningMatrixInv) _save('thresholds', Ths) if 'simScore' in ir: similarTemplates = simScore _save('similar_templates', similarTemplates) est_contam_rate[np.isnan(est_contam_rate)] = 1 with open(join(savePath, 'cluster_group.tsv'), 'w') as f: f.write('cluster_id\tgroup\n') for j in range(len(good)): if good[j]: f.write('%d\tgood\n' % j) # else: # f.write('%d\tmua\n' % j) with open(join(savePath, 'cluster_ContamPct.tsv'), 'w') as f: f.write('cluster_id\tContamPct\n') for j in range(len(good)): f.write('%d\t%.1f\n' % (j, 100 * est_contam_rate[j])) with open(join(savePath, 'cluster_Amplitude.tsv'), 'w') as f: f.write('cluster_id\tAmplitude\n') for j in range(len(good)): f.write('%d\t%.1f\n' % (j, tempAmps[j])) # make params file if not os.path.exists(join(savePath, 'params.py')): with open(join(savePath, 'params.py'), 'w') as f: if os.path.isabs(dat_path): f.write('dat_path = "%s"\n' % dat_path) else: f.write('dat_path = "../%s"\n' % dat_path) f.write('n_channels_dat = %d\n' % probe.NchanTOT) f.write('dtype = "int16"\n') f.write('offset = 0\n') f.write('hp_filtered = False\n') f.write('sample_rate = %i\n' % params.fs) f.write('template_scaling = %.1f\n' % params.templateScaling)
def from_array(a: cp.ndarray) -> Vec3List: vl = cp.transpose(cp.tile(a, (3, 1))) return Vec3List(vl)
def compute(self): self.log.node("VesselSegmentation compute") # GETTING WIDGET INFO compute = self.getVal('compute') doHanning2 = self.getVal('Hann 2D Filter') Hann2_taper = self.getVal('Hann 2D Taper (%)') Hann2_width = self.getVal('Hann 2D Width (%)') Flip_image = self.getVal('Flip Image') doHanning1 = self.getVal('Hann 1D Filter') Hann1_taper = self.getVal('Hann 1D Taper (%)') Hann1_width = self.getVal('Hann 1D Width (%)') Hann1_stopVal = self.getVal('Hann 1D StopVal (%)') / 100.0 ZeroFill_factor_value = self.getVal('Fourier interpolation factor') if ZeroFill_factor_value == 0: ZeroFill_factor = 1 elif ZeroFill_factor_value == 1: ZeroFill_factor = 2 elif ZeroFill_factor_value == 2: ZeroFill_factor = 4 elif ZeroFill_factor_value == 3: ZeroFill_factor = 8 elif ZeroFill_factor_value == 4: ZeroFill_factor = 16 # GETTING PORT INFO (1/2) #data = np.abs(self.getData('data')) # not sure why one would do absolute here? data = cp.asarray(self.getData('data')) if (compute and (ZeroFill_factor_value > 0 or doHanning1 or doHanning2)): import triggeredGASSP.gridding.Kaiser2D_utils as kaiser2D # doHanning1 means through plane, assuming that the data is only 3D in this case if doHanning1: # FFT to k-space kSpace = cp.fft.fftshift(cp.fft.ifftn(cp.fft.ifftshift( data, axes=(-3, -2, -1)), axes=(-3, -2, -1)), axes=(-3, -2, -1)) # add Hanning filters 2D in-plane and 1D thru z to smooth out kspace before ZF if doHanning2: self.log.node('Applying 2D Hann filter (in plane)') # gridded k-space here Hann2_win = cp.asarray( kaiser2D.window2(kSpace.shape[-2:], Hann2_taper, Hann2_width)) for i_slc in range(kSpace.shape[0]): kSpace[i_slc, ...] *= Hann2_win self.log.node(" in slice " + str(i_slc)) if doHanning1: self.log.node('Applying 1D Hann filter (thru slices)') # gridded k-space here # Hann1_width should always be 100% and stopVal > 0 Hann1_win = cp.asarray( kaiser2D.window1(kSpace.shape[-3], Hann1_taper, Hann1_width, stopVal=Hann1_stopVal)) self.log.node("Hann1 shape: " + str(Hann1_win.shape)) Hann1_winTile = Hann1_win[:, cp.newaxis] Hann1_winTile = Hann1_winTile[:, :, cp.newaxis] Hann1_winTile = cp.tile( Hann1_winTile, (1, kSpace.shape[-2], kSpace.shape[-1])) self.log.node("Hann1 tiled shape: " + str(Hann1_winTile.shape)) kSpace *= Hann1_winTile # zero filling based on FFT nodes for i in range(data.ndim): zpad_length = (ZeroFill_factor * data.shape[-i - 1]) - data.shape[-i - 1] if zpad_length >= 0: zpad_before = int(zpad_length / 2.0 + 0.5) zpad_after = int(zpad_length / 2.0) temp = cp.insert( temp, data.shape[-i - 1] * cp.ones(zpad_after), 0.0, (-i - 1)) temp = cp.insert(temp, cp.zeros(zpad_before), 0.0, (-i - 1)) # often the image needs to be flipped to correspond to radiological orientation if Flip_image == 0: out = cp.fft.fftshift( cp.fft.fftn(cp.fft.ifftshift(temp), axes=(-3, -2, -1))) else: out = cp.fft.fftshift( cp.fft.ifftn(cp.fft.ifftshift(temp), axes=(-3, -2, -1))) else: # data dimensions nx = data.shape[-1] ny = data.shape[-2] if data.ndim == 2: extra_dim1 = 1 extra_dim2 = 1 elif data.ndim == 3: extra_dim1 = data.shape[-3] extra_dim2 = 1 elif data.ndim == 4: extra_dim1 = data.shape[-3] extra_dim2 = data.shape[-4] elif data.ndim > 4: self.log.warn("Only up to 4 dimensions implemented yet.") data.shape = [extra_dim2, extra_dim1, ny, nx] # GPU memory limitation - loop over cardiac phases - assumed to be extra_dim1 out = np.zeros([ extra_dim2, extra_dim1, ZeroFill_factor * ny, ZeroFill_factor * nx ], dtype=data.dtype) if doHanning2: self.log.node('Pre-calculate 2D Hann filter (in plane)') Hann2_win = cp.asarray( kaiser2D.window2(data.shape[-2:], Hann2_taper, Hann2_width)) for extra_idx1 in range(extra_dim1): # inverse FFT data in-plane only kSpace = cp.fft.fftshift(cp.fft.ifft2(cp.fft.ifftshift( data[:, extra_idx1, :, :], axes=(-1, -2)), axes=(-1, -2)), axes=(-1, -2)) if doHanning2: self.log.node('Applying 2D Hann filter (in plane)') for idx_dim2 in range(extra_dim2): kSpace[idx_dim2, ...] *= Hann2_win # zero filling based on FFT nodes if ZeroFill_factor_value == 0: zero_filled_kSpace = kSpace else: zero_filled_kSpace = cp.zeros([ extra_dim2, ZeroFill_factor * ny, ZeroFill_factor * nx ], dtype=kSpace.dtype) zpad_length = (ZeroFill_factor * data.shape[-1]) - data.shape[-1] zpad_before = int(zpad_length / 2.0 + 0.5) zpad_after = int(zpad_length / 2.0) zero_filled_kSpace[:, zpad_before:-zpad_after, zpad_before:-zpad_after] = kSpace # often the image needs to be flipped to correspond to radiological orientation if Flip_image == 0: out[:, extra_idx1, :, :] = cp.asnumpy( cp.fft.fftshift(cp.fft.fftn(cp.fft.ifftshift( zero_filled_kSpace, axes=(-2, -1)), axes=(-2, -1)), axes=(-2, -1))) else: out[:, extra_idx1, :, :] = cp.asnumpy( cp.fft.fftshift(cp.fft.ifftn(cp.fft.ifftshift( zero_filled_kSpace, axes=(-2, -1)), axes=(-2, -1)), axes=(-2, -1))) self.setData('Zero filled data', out.squeeze()) if doHanning2 and Hann2_win is not None: self.setData('Hann 2D window', cp.asnumpy(Hann2_win)) if doHanning1 and Hann1_win is not None: self.setData('Hann 1D window', cp.asnumpy(Hann1_win)) return 0
def chambolleProjectionGPU(f, f_ref, mi=100, tau=0.25, tol=1e-5): ''' The 2D case of Chambolle projection algorithm. This version uses reference image. Source ------- Cywińska, Maria, Maciej Trusiak, and Krzysztof Patorski. "Automatized fringe pattern preprocessing using unsupervised variational image decomposition." Optics express 27.16 (2019): 22542-22562. Parameters ---------- f : cupy.ndarray image which is input for Chambolle f_ref : cupy.ndarray image og input but perfectly without background function mi : float regularization parameter that defines the separation of the energy between the fringes and noise components tau : float Chambolle projection step value tol : float error tolerance when algorithm should stop its work Returns ------- x_best : numpy.ndarray image with filtered background function it_min : int number of iterations that was needed to reach result image rms_min : float error of the result image ''' n = 1 xi = cp.array([cp.zeros(f.shape), cp.zeros(f.shape)]) x1 = cp.zeros(f.shape) x2 = cp.zeros(f.shape) x_best = cp.zeros(f.shape) rms_min_A = [] rms_min = 1.0 it_min = 0 for _ in iter(int, 1): gdv = cp.array(gradient2DGPU(divergence2DGPU(xi) - f / mi)) d = cp.sqrt(cp.power(gdv[0], 2) + cp.power(gdv[1], 2)) d = cp.tile(d, [2, 1, 1]) xi = cp.divide(xi + tau * gdv, 1 + tau * d) x2 = mi * divergence2DGPU(xi) diff = x2 - f_ref rms_n = cp.sqrt(cp.var(diff.flatten())) if len(rms_min_A) < 100: rms_min_A.append(rms_min) else: rms_min_A.pop(0) rms_min_A.append(rms_min) if rms_n < rms_min: rms_diff = rms_min_A[0] - rms_min_A[-1] rms_local_diff = rms_min - rms_n if (rms_diff < 10 * tol): if (rms_local_diff < tol): rms_min = rms_n it_min = n break rms_min = rms_n it_min = n x1 = x2 n = n + 1 if n - it_min >= 100: break pass x_best = x2 return [x_best, it_min, rms_min]
def admittanceMatrixE(self): shapeParams = self.shapeFunctionParameters() whereIsZero = (cp.absolute(shapeParams) - 1e-12 < 0) indexZero = cp.where(whereIsZero) isConst = indexZero[2] == 2 # 1 for const x, 0 for const y indicesConstX = cp.where(isConst)[0] indicesConstY = cp.where(~isConst)[0] sortedElNodeIndices = cp.sort(self.nodeisElectrode[self.isValid], axis=1) admittanceMatrixE = cp.zeros((self.n_pts, self.ne)) shapeMatrix = cp.zeros((shapeParams.shape[0], shapeParams.shape[1], 2)) integratingMatrix = cp.zeros((shapeParams.shape[0], 2)) shapeMatrix[indicesConstY, :, 0] = shapeParams[ indicesConstY, :, 0] + shapeParams[indicesConstY, :, 2] * self.pts[ sortedElNodeIndices, :][indicesConstY, 1, 1][:, None] shapeMatrix[indicesConstY, :, 1] = shapeParams[indicesConstY, :, 1] shapeMatrix[indicesConstX, :, 0] = shapeParams[ indicesConstX, :, 0] + shapeParams[indicesConstX, :, 1] * self.pts[ sortedElNodeIndices, :][indicesConstX, 1, 0][:, None] shapeMatrix[indicesConstX, :, 1] = shapeParams[indicesConstX, :, 2] integratingMatrix[indicesConstY, 0] = self.pts[sortedElNodeIndices, :][ indicesConstY, 1, 0] - self.pts[sortedElNodeIndices, :][indicesConstY, 0, 0] integratingMatrix[indicesConstY, 1] = 0.5 * ( cp.power(self.pts[sortedElNodeIndices, :][indicesConstY, 1, 0], 2) - cp.power(self.pts[sortedElNodeIndices, :][indicesConstY, 0, 0], 2)) integratingMatrix[indicesConstX, 0] = self.pts[sortedElNodeIndices, :][ indicesConstX, 1, 1] - self.pts[sortedElNodeIndices, :][indicesConstX, 0, 1] integratingMatrix[indicesConstX, 1] = 0.5 * ( cp.power(self.pts[sortedElNodeIndices, :][indicesConstX, 1, 1], 2) - cp.power(self.pts[sortedElNodeIndices, :][indicesConstX, 0, 1], 2)) #print(integratingMatrix.shape) integrals = cp.einsum('ijk, ik -> ij', shapeMatrix, integratingMatrix) integrals[:] = cp.absolute(integrals) #integr = cp.sum(cp.multiply(shapeMatrix, integratingMatrix[:, None]), axis=2) #print(cp.sum(cp.round_(integrals, 16) == cp.round_(integr, 16))) indexElectrode = sortedElNodeIndices[:, 0] // self.n_per_el #print(indexElectrode) integrals = -integrals / self.z[indexElectrode][:, None, None] integrals = integrals.ravel() indexElectrode = cp.tile(indexElectrode, (self.n_per_el, 1)).T.ravel() #print(self.tri[twoFromElectrode][isValid]) indexNode = self.tri[self.twoFromElectrode][self.isValid].ravel() #admittanceMatrixE [self.tri[twoFromElectrode][isValid].ravel(), indexElectrode] += integrals.ravel() indSort = cp.argsort(indexNode) indexNode = indexNode[indSort] indexElectrode = indexElectrode[indSort] integrals = integrals[indSort] unique, counts = cp.unique(indexNode, return_counts=True) #print("number of unique entries", unique.shape) #print("counts \n", counts) index_pointer = cp.zeros(self.n_pts + 1) sum_count = cp.cumsum(counts) #print(sum_count) index_pointer[unique[:] + 1] = sum_count[:] #print(index_pointer) nonzeroes = cp.nonzero(index_pointer)[0] #print(nonzeroes) mask = cp.zeros(index_pointer.shape[0], dtype='b1') mask[nonzeroes] = True mask[0] = True zeroes = cp.where(~mask)[0] #time_loop = time() while (index_pointer[1:] == 0).any(): index_pointer[zeroes] = index_pointer[zeroes - 1] '''for i in range(index_pointer.shape[0]): if i == 0: continue elif index_pointer[i] == 0: index_pointer[i] = index_pointer[i-1]''' #print('time for loop ',time()-time_loop) index_pointer2 = cp.arange(self.n_pts + 1) #print('indexEl', indexElectrode) #print(index_pointer.shape) admittanceMatrixE = sp.csr_matrix( (integrals, indexElectrode, index_pointer), shape=(self.n_pts, self.ne), dtype=integrals.dtype) adm = admittanceMatrixE.toarray() #print(integrals) #print(indexNode) #print(indexElectrode) #a = (sortedElNodeIndices[0,0]) #print(adm[4]) # print(adm[:,1]) #print('sum zeroes ',cp.sum(adm>0)) return adm
def learn_mapping(self, seeds, loss_type, **kwargs): """ Learn the mapping function. The objective is l2 loss or hinge loss. Orthogonal constraint is optional. -Input: seeds: A list of two lists. seeds[0][i] and seeds[1][i] specifies a seeding pair loss_type: 'l2' or 'hinge' 'l2': min_R \sum_i |R * x_i - y_i|^2 'hinge': min_R \sum_i \sum_{j!=i} max{0, th_i + |R * x_i - y_i|^2 - |R * x_i - y_j|^2} The objetive is optimzied by SGD. Each iteration samples some random negative examples kwargs: misc parameters, mostly for hinge loss minimizer orth: (False) whether to contrain the mapping being orthogonal epochs: number of epochs in SGD optimizer if loss_type='hinge' seed_per_batch, number of seeds per minibatch in SGD lr: learning rate dist: the distance (cosine or squared Euclidean) in hinge loss. Cosine is suggested. samples: number of negative samples per seeding pair alpha: determine threshold `th_i` in the following way th_i = percentile( d(R * x_i, y_j) - d(R * x_i, y_i), alpha) constant_th: constant threshold for all pairs. If given, alpha is futile. src_vocab, tgt_vocab: source and target vocabs. If given, will report P@1 during the minimization of hinge loss queries: for reporting test accuracy, src_vocab and tgt_vocab must be given -Output: W: linear mapping """ # prepare default params orth = kwargs.get('orth') epochs = kwargs.get('epochs') dist = kwargs.get('dist') lr = kwargs.get('lr') s_per_b = kwargs.get('seed_per_batch') sample_method = kwargs.get('sample_method') ns = kwargs.get('samples') alpha = kwargs.get('alpha') constant_th = kwargs.get('constant_th') src_vocab = kwargs.get('src_vocab', None) tgt_vocab = kwargs.get('tgt_vocab', None) queries = kwargs.get('queries', None) seed_dict = {} for i, j in zip(*seeds): if i not in seed_dict: seed_dict[i] = [j] else: seed_dict[i].append(j) if orth: C = self.src_space[seeds[0]].T.dot(self.tgt_space[seeds[1]]) U, _, Vh = xp.linalg.svd(C) self.W = U.dot(Vh) else: if not gpu: self.W = xp.linalg.lstsq(self.src_space[seeds[0]], self.tgt_space[seeds[1]], rcond=None)[0] else: self.W = xp.linalg.pinv(self.src_space[seeds[0]]).dot( self.tgt_space[seeds[1]]) if loss_type == 'l2': self.loss = xp.linalg.norm( self.src_space[seeds[0]].dot(self.W)\ - self.tgt_space[seeds[1]]) ** 2 elif loss_type == 'hinge': # SGD optimizer, each iteration samples seeds and negative samples # Initialized with l2 solution self.loss = [] dim = self.src_space.shape[1] total_it = 0 th = self.determine_th(seeds, alpha, dist, constant_th) for ep in range(epochs): S = [_ for _ in zip(*(seeds + [th]))] shuffle(S) for it in range(0, len(S), s_per_b): i_s, i_t, th_i = zip(*S[it:min(len(S), it + s_per_b)]) i_s, i_t, th_i = list(i_s), list(i_t), xp.array(th_i) B = len(i_s) Wx = self.src_space[i_s].dot(self.W) D = distance_function(Wx, self.tgt_space, dist) if sample_method == 'random': j = [xp.random.choice( [_ for _ in range(self.tgt_size)\ if _ not in seed_dict[i_s_]], ns).tolist() for i_s_ in i_s] elif sample_method == 'top': j = [] for ii, i_s_ in enumerate(i_s): d_ii = xp.copy(D[ii]) d_ii[seed_dict[i_s_]] = xp.float('inf') j.append( top_k(d_ii, ns, biggest=False)[0].tolist()) delta = D[xp.tile(range(B), (ns, 1)).T, j]\ - D[xp.arange(B), i_t][:, None] # print some diagnostics ell = xp.sum(xp.maximum(th_i[:, None] - delta, 0)) / B if gpu: ell = ell.get() self.loss.append(ell) if total_it % 100 == 0: if all([src_vocab, tgt_vocab, queries]): P_at_1 = self.report_precision( src_vocab, tgt_vocab, queries) p_str = ', p@1 {}%'.format(P_at_1[0]) else: p_str = '' print("Epoch {}, Iter {}, loss {:.2f}".format( ep, total_it, ell) + p_str, flush=True) incur_loss = delta < th_i[:, None] n_incur = [xp.sum(xp.array(_)) for _ in incur_loss] if dist == 'sqeuc': delta_y = [xp.sum(self.tgt_space[j[_]][incur_loss[_]],\ axis=0) - self.tgt_space[i_t[_]] * n_incur[_]\ for _ in range(B)] grad = self.src_space[i_s].T.dot(xp.vstack(delta_y)) elif dist == 'cos': Wx_norm = xp.linalg.norm(Wx, ord=2, axis=1, keepdims=1) delta_y = [ xp.sum(self.tgt_space[j[_]][incur_loss[_]] / (eps+\ xp.linalg.norm(self.tgt_space[j[_]][incur_loss[_]], ord=2, axis=1, keepdims=1)), axis=0) -\ self.tgt_space[i_t[_]] /\ xp.linalg.norm(self.tgt_space[i_t[_]]) * n_incur[_] for _ in range(B) ] delta_cos = [xp.sum(delta[_][incur_loss[_]])\ for _ in range(B)] grad = (self.src_space[i_s] / Wx_norm).T.dot( xp.vstack(delta_y)) +\ (self.src_space[i_s] * xp.vstack(delta_cos)).T\ .dot(Wx/Wx_norm) if orth: # Use Cayley transform to maintain orthogonality A = grad.dot(self.W.T) A = A - A.T Q = xp.linalg.inv(xp.eye(dim) + lr / 2 * A).dot(xp.eye(dim) - lr / 2 * A) self.W = Q.dot(self.W) else: self.W -= lr * grad / B total_it += 1 return self.W
def backward(self, error): x, axis = self.cache shape = [1] * len(x.value.shape) shape[axis] = x.value.shape[axis] x.accumulate(np.tile(error, shape))