def ssb_kernel(processed4D,real_calibration,aperture,voltage): data_size = processed4D.shape wavelength = e_lambda(voltage) cutoff = aperture/wavelength four_y = np.fft.fftshift(np.fft.fftfreq(data_size[0], real_calibration)) four_x = np.fft.fftshift(np.fft.fftfreq(data_size[1], real_calibration)) Four_Y,Four_X = np.meshgrid(four_y,four_x) FourXY = np.sqrt((Four_Y ** 2) + (Four_X**2)) Left_Lobe = np.zeros(data_size,dtype=bool) RightLobe = np.zeros_like(Left_Lobe) #convert to CuPy arrays Four_Y = cp.asarray(Four_Y) Four_X = cp.asarray(Four_X) FourXY = cp.asarray(FourXY) Left_Lobe = cp.asarray(Left_Lobe) RightLobe = cp.asarray(RightLobe) rsize = cp.asarray((data_size[0],data_size[1]),dtype=int) #pass to JIT kernel lobe_calc(Left_Lobe,RightLobe,Four_Y,Four_X,FourXY,rsize,cutoff) data_phase = phase_cupy(processed4D) data_ampli = ampli_cupy(processed4D) left_trotter = cp.multiply(data_ampli[Left_Lobe],cp.exp((1j)*data_phase[Left_Lobe])) left_image = cp.asnumpy(cp.fft.ifft2(cp.sum(left_trotter,axis=-1))) righttrotter = cp.multiply(data_ampli[RightLobe],cp.exp((1j)*data_phase[RightLobe])) rightimage = cp.asnumpy(cp.fft.ifft2(cp.sum(righttrotter,axis=-1))) return left_image,right_image
def inverse_transform(self, array_in, array_out): """ Perform the inverse Fourier transform of array_in, and store the result in array_out Parameters ---------- array_in, array_out: cuda device arrays or numpy arrays When using the GPU, these should be cuda device array. When using the CPU, array_in should be one of the two buffers that are returned by `get_buffers` """ if self.use_cuda: # Copy 2D arrays to 1D array for optimized 1D batch FFT cuda_copy_2d_to_1d[self.dim_grid, self.dim_block](array_in, self.buffer1d_in) # Perform forward FFT self.fft.fft(self.buffer1d_in, self.buffer1d_out, cufft.CUFFT_INVERSE) # Normalize inverse FFT cupy.multiply(self.buffer1d_out, self.inv_Nz, out=self.buffer1d_out) # Copy 1D arrays back to 2D array cuda_copy_1d_to_2d[self.dim_grid, self.dim_block](self.buffer1d_out, array_out) elif self.use_mkl: # Perform the inverse FFT on the CPU using MKL self.mklfft.inverse_transform(array_in, array_out) else: # Perform the inverse FFT on the CPU using FFTW self.ifft.update_arrays(new_input_array=array_in, new_output_array=array_out) self.ifft()
def back_prop(self, dloss_dy): """ Do backward propagation through the layer. Parameters ---------- dloss_dy : cp.array of floats, shape (number of examples,) + self.output_size Derivative of loss with respect to output values. Returns ------- cp.array of floats, shape (number of examples,) + self.input_size Derivative of loss with respect to input values. """ # Keep track of all the dimensions. nr_examples = dloss_dy.shape[0] a, b, _ = self.input_size m, n, nr_channels = self.output_size p, q = self.pool_size # Expand the derivative to the input shape. dloss_dy_reshaped = dloss_dy.reshape( (nr_examples, m, 1, n, 1, nr_channels)) dloss_dy_expanded = cp.multiply( dloss_dy_reshaped, cp.ones((1, 1, p, 1, q, 1), dtype=cp.int8)) dloss_dy_expanded = dloss_dy_expanded.reshape( (nr_examples, a, b, nr_channels)) # Apply the cached mask to the derivative. return cp.multiply(dloss_dy_expanded, self.i_cache)
def _derivativenorm(self): """Compute the derivative of the norm Returns ------- derivative : numpy array, shape (m_parameters,) """ w2 = cp.reshape(self.w,(self.n_features,self.d,self.D,self.D)) derivative = cp.zeros((self.n_features,self.d,self.D,self.D)) tmp=cp.zeros((self.n_features,self.D)) tmp2=cp.zeros((self.n_features,self.D)) tmp[0,:]=cp.sum(cp.square(w2[0,:,0,:]),0) for i in range(1,self.n_features-1): tmp[i,:]=cp.dot(tmp[i-1,:],cp.sum(cp.square(w2[i,:,:,:]),0)) tmp[self.n_features-1,:]=cp.inner(tmp[self.n_features-2,:], cp.sum(cp.square(w2[self.n_features-1,:,:,0]),0)) tmp2[self.n_features-1,:]=cp.sum(cp.square(w2[self.n_features-1,:,:,0]),0) for i in range(self.n_features-2,-1,-1): tmp2[i,:]=cp.dot(cp.sum(cp.square(w2[i,:,:,:]),0),tmp2[i+1,:]) tmp2[0,:]=cp.inner(cp.sum(cp.square(w2[0,:,0,:]),0),tmp2[1,:]) for j in range(self.d): derivative[0,j,0,:]=cp.multiply(tmp2[1,:],2*(w2[0,j,0,:])) derivative[self.n_features-1,j,:,0]=\ cp.multiply(tmp[self.n_features-2,:],2*(w2[self.n_features-1,j,:,0])) for i in range(1,self.n_features-1): temp3=cp.outer(tmp[i-1,:],tmp2[i+1,:]) for j in range(self.d): derivative[i,j,:,:]=cp.multiply(temp3,2*(w2[i,j,:,:])) return derivative.reshape(self.m_parameters)
def get_flat_dpc(data4D_flat, chunks=8, centered=True): stops = np.zeros(chunks + 1, dtype=np.int) stops[0:chunks] = np.arange(0, data4D_flat.shape[0], (data4D_flat.shape[0] / chunks)) stops[chunks] = data4D_flat.shape[0] if centered: cent_x = cp.asarray(data4D_flat.shape[2]) / 2 cent_y = cp.asarray(data4D_flat.shape[1]) / 2 else: CentralDisk = np.median(data4D_flat, axis=0) cent_x, cent_y, _ = st.util.sobel_circle(CentralDisk) cent_x = cp.asarray(cent_x) cent_y = cp.asarray(cent_y) yy, xx = cp.mgrid[0:data4D_flat.shape[1], 0:data4D_flat.shape[2]] FlatSum = cp.asarray(np.sum(data4D_flat, axis=(-1, -2))) YCom_CPU = np.zeros(data4D_flat.shape[0], dtype=data4D_flat.dtype) XCom_CPU = np.zeros(data4D_flat.shape[0], dtype=data4D_flat.dtype) for ii in range(chunks): startval = stops[ii] stop_val = stops[ii + 1] gpu_4Dchunk = cp.asarray(data4D_flat[startval:stop_val, :, :]) FlatY = cp.multiply(gpu_4Dchunk, yy) FlatX = cp.multiply(gpu_4Dchunk, xx) YCom = (cp.sum(FlatY, axis=(-1, -2)) / FlatSum[startval:stop_val]) - cent_y XCom = (cp.sum(FlatX, axis=(-1, -2)) / FlatSum[startval:stop_val]) - cent_x YCom_CPU[startval:stop_val] = cp.asnumpy(YCom) XCom_CPU[startval:stop_val] = cp.asnumpy(XCom) del YCom, XCom, gpu_4Dchunk, cent_x, cent_y, FlatSum return YCom_CPU, XCom_CPU
def backward_pass(x, y, output, hidden_output, W_output): output_error = -(y - output) # Calculate error output_over_net = output*(1 - output) # Derivative of sigmoid function sigmoid_on_error = cp.multiply(output_error, output_over_net) # Calculate the sigmoid function's affect on error W_output = cp.transpose(W_output) hidden_error = cp.dot(sigmoid_on_error, W_output) # Calculate the affect of output weights on hidden weights' error hidden_over_net = hidden_output*(1 - hidden_output) # Derivative of sigmoid function sigmoid_on_hidden_error = cp.multiply(hidden_error, hidden_over_net) # Calculate the sigmoid function's affect on error # Correctly arrange matrices for calculations x = cp.atleast_2d(x) hidden_output = cp.atleast_2d(hidden_output) x_transpose = cp.transpose(x) hidden_output_transpose = cp.transpose(hidden_output) sigmoid_on_hidden_error = sigmoid_on_hidden_error.reshape(1, sigmoid_on_hidden_error.size) sigmoid_on_error = sigmoid_on_error.reshape(1, sigmoid_on_error.size) # Calculate weight changes W_hidden_c = cp.dot(x_transpose, sigmoid_on_hidden_error) W_output_c = cp.dot(hidden_output_transpose, sigmoid_on_error) # Calculate bias changes B_hidden_c = sigmoid_on_hidden_error B_output_c = sigmoid_on_error return W_output_c, W_hidden_c, B_hidden_c, B_output_c
def normal(loc=0.0, scale=1.0, size=None, dtype=float): """Returns an array of normally distributed samples. Args: loc (float or array_like of floats): Mean of the normal distribution. scale (float or array_like of floats): Standard deviation of the normal distribution. size (int or tuple of ints): The shape of the array. If ``None``, a zero-dimensional array is generated. dtype: Data type specifier. Only :class:`numpy.float32` and :class:`numpy.float64` types are allowed. Returns: cupy.ndarray: Normally distributed samples. .. seealso:: :func:`numpy.random.normal` """ rs = _generator.get_random_state() if size is None and any(isinstance(arg, cupy.ndarray) for arg in [scale, loc]): size = cupy.broadcast_arrays(loc, scale)[0].shape x = rs.normal(0, 1, size, dtype) cupy.multiply(x, scale, out=x) cupy.add(x, loc, out=x) return x
def normal(self, loc=0.0, scale=1.0, size=None, dtype=float): """Returns an array of normally distributed samples. .. seealso:: - :func:`cupy.random.normal` for full documentation - :meth:`numpy.random.RandomState.normal` """ dtype = _check_and_get_dtype(dtype) if size is None: size = cupy.broadcast(loc, scale).shape if dtype.char == 'f': func = curand.generateNormal else: func = curand.generateNormalDouble if isinstance(scale, cupy.ndarray): x = self._generate_normal(func, size, dtype, 0.0, 1.0) cupy.multiply(x, scale, out=x) cupy.add(x, loc, out=x) elif isinstance(loc, cupy.ndarray): x = self._generate_normal(func, size, dtype, 0.0, scale) cupy.add(x, loc, out=x) else: x = self._generate_normal(func, size, dtype, loc, scale) return x
def calculate_loss_modified(prediction, y): prediction[prediction == 0] = 0.00000001 y[y == 0] = 0.00000001 lossExpression = -cp.sum( cp.multiply(y, cp.log(prediction)) + cp.multiply( cp.ones(y.shape) - y, cp.log(cp.ones(y.shape) - prediction))) return lossExpression
def __call__(self, params, g_params): new_params = tuple( cp.add( param, cp.subtract(cp.multiply(self.momentum, cp.subtract(param, v)), cp.multiply(self.rate, g_param))) for param, g_param, v in zip(params, g_params, self.v)) self.v = params return new_params
def __iteration(self, gamma: cp.float64): """ Iteration of the inner loop of the (iterated) Tikhonov method. :param gamma: Regularization parameter of the Tikhonov algorithm. :type gamma: float :return: Numpy array with the solution in given iteration. """ LU, P = linalg.lu_factor(cp.add(self.KHK, cp.multiply(gamma, self.identity))) self.current = linalg.lu_solve((LU, P), cp.add(self.q_estimator, cp.multiply(gamma, self.previous)))
def gpu_gaussian(self, a, b, s): km = cp.empty(shape=(a.shape[0], b.shape[0]), dtype=a.dtype) km = cp.multiply(cp.dot(a, b.T, out=km), -2, out=km) km += cp.power(a, 2).sum(axis=1).reshape(-1, 1) km += cp.power(b, 2).sum(axis=1) cp.multiply(km, -1 / (2 * s * s), out=km) cp.exp(km, out=km) return km
def pulse_compression(x, template, normalize=False, window=None, nfft=None): """ Pulse Compression is used to increase the range resolution and SNR by performing matched filtering of the transmitted pulse (template) with the received signal (x) Parameters ---------- x : ndarray Received signal, assume 2D array with [num_pulses, sample_per_pulse] template : ndarray Transmitted signal, assume 1D array normalize : bool Normalize transmitted signal window : array_like, callable, string, float, or tuple, optional Specifies the window applied to the signal in the Fourier domain. nfft : int, size of FFT for pulse compression. Default is number of samples per pulse Returns ------- compressedIQ : ndarray Pulse compressed output """ [num_pulses, samples_per_pulse] = x.shape if nfft is None: nfft = samples_per_pulse if window is not None: Nx = len(template) if callable(window): W = window(cp.fft.fftfreq(Nx)) elif isinstance(window, cp.ndarray): if window.shape != (Nx, ): raise ValueError("window must have the same length as data") W = window else: W = get_window(window, Nx, False) template = cp.multiply(template, W) if normalize is True: template = cp.divide(template, cp.linalg.norm(template)) fft_x = cp.fft.fft(x, nfft) fft_template = cp.conj(cp.tile(cp.fft.fft(template, nfft), (num_pulses, 1))) compressedIQ = cp.fft.ifft(cp.multiply(fft_x, fft_template), nfft) return compressedIQ
def feedforward(self, input): self.input = input self.ft = sigmoid(self.Wf@input + [email protected]_ht + self.bf) # forget gate self.it = sigmoid(self.Wi@input + [email protected]_ht + self.bi) # update gate self.ot = sigmoid(self.Wo@input + [email protected]_ht + self.bo) # output gate self.ct_bar = tanh(self.Wc @ input + self.Uc @ self.prev_ht + self.bc) # outputs self.ct = cp.multiply(self.ft, self.prev_ct) + cp.multiply(self.it, self.ct_bar) self.ht = cp.multiply(self.ot, tanh(self.ct)) return self.ct, self.ht
def update_reservoir(self, u, n, Y): # u is input at specific time # u has shape (N_u (3 for L63)) # See page 16 eqtn 18 of Lukosevicius PracticalESN for feedback info. x_n_tilde = cp.tanh( cp.matmul(self.W, self.x[n]) + cp.array(sp.matmul(self.W_in, sp.hstack((sp.array([1]), u)))) + cp.array(sp.matmul(self.W_fb, Y))) self.x[n+1] = cp.multiply((1-cp.array(self.alpha_matrix)), cp.array(self.x[n])) \ + cp.multiply(cp.array(self.alpha_matrix), x_n_tilde)
def diffuse_slime_trail(): cp.multiply( SlimeWorld.cells, SlimeWorld.trail_reduction_factor, out=SlimeWorld.cells, casting="unsafe", ) convolve(SlimeWorld.cells, SlimeWorld.trail_kernel, output=SlimeWorld.cells)
def error_minimization(W, b, zeta, a, prev_layer, activation_func, den_activation, y, w=None, d=None, y_pred=None): dW = {} dB = {} delta = {} try: batch_size = y.shape[1] except IndexError: batch_size = 1 y = cp.reshape(y, (y.shape[0], batch_size)) is_last_layer = (type(w) == type(d)) and (type(d) == type(None)) if is_last_layer: delta['s'] = cp.subtract(a['s'], y) dB['s'] = (1 / batch_size) * cp.sum(delta['s'], axis=1) dB['s'] = cp.reshape(dB['s'], (dB['s'].shape[0], 1, 1)) delta['s'] = cp.reshape(delta['s'], (delta['s'].shape[0], 1, delta['s'].shape[1])) dW['s'] = (1 / batch_size) * cp.einsum('nik,kjn->nij', delta['s'], a['d'].T) else: w = cp.array(w) deltaW = cp.einsum('nik,kij->nj', w.T, d) deltaW = cp.reshape(deltaW, (deltaW.shape[0], 1, deltaW.shape[1])) a_der = activation(str(activation_func) + '_der', zeta['s']) delta['s'] = cp.multiply(deltaW, a_der) dB['s'] = (1 / batch_size) * cp.sum(delta['s'].squeeze(), axis=1) dB['s'] = cp.reshape(dB['s'], (dB['s'].shape[0], 1, 1)) dW['s'] = (1 / batch_size) * cp.einsum('nik,kjn->nij', delta['s'], a['d'].T) deltaW = cp.einsum('nik,kij->knj', W['s'].T, delta['s']) a_der = activation(den_activation + '_der', zeta['d']) delta['d'] = cp.multiply(deltaW, a_der) dB['d'] = (1 / batch_size) * cp.sum(delta['d'], axis=2) dB['d'] = cp.reshape(dB['d'], (dB['d'].shape[0], dB['d'].shape[1], 1)) dW['d'] = (1 / batch_size) * cp.dot(delta['d'], prev_layer.T) return [dW, dB, delta]
def func(a, t, params, A, function, bT, x, division): index = int(t * (division - 1)) return cp.multiply( -1., cp.add( cp.dot(a, params[1][index]), cp.dot( cp.multiply( bT, cp.multiply(params[0][index], function(cp.dot(x[index], A.T)))), A)))
def dyad_transform(self, grids): """ Experimental: compute spectrum of the dyad v_i * v_j """ k_xx = grids.fourier_transform(function=cp.multiply(self.arr[0, 1:-1, :, 1:-1, :], self.arr[0, 1:-1, :, 1:-1, :])) k_xy = grids.fourier_transform(function=cp.multiply(self.arr[0, 1:-1, :, 1:-1, :], self.arr[1, 1:-1, :, 1:-1, :])) k_yy = grids.fourier_transform(function=cp.multiply(self.arr[1, 1:-1, :, 1:-1, :], self.arr[1, 1:-1, :, 1:-1, :])) self.dyad_spectrum = cp.array([[k_xx, k_xy], [k_xy, k_yy]])
def calcDistField(point_file, h5name, save_location): data_file = h5py.File(h5name) data = data_file['data'][:] data_dim = data.shape[0] data_file.close() ptfile = h5py.File(point_file) sample_points = ptfile['points'][:] ptfile.close() sample_size = sample_points.shape[0] #gpu parallelization memory_pool = cupy.get_default_memory_pool() pinned_memory_pool = cupy.get_default_pinned_memory_pool() distancesgpu = numpy.zeros((data_dim, data.shape[1], sample_size)) x = cupy.asarray(sample_points) allpts = cupy.tile(x, (data.shape[1], 1)) blocks = int(numpy.ceil(sample_size * data.shape[1] / 8192)) del x print(blocks) yy = cupy.asarray(data) for inst in range(data_dim): if inst % 200 == 0: print(inst) y = yy[inst] xx = allpts + cupy.tile(y, (1, sample_size)).reshape(-1, 3) xdot = cupy.sum(cupy.multiply(xx, xx), axis=1) dt = cupy.zeros((sample_size * data.shape[1], )) for blk in range(blocks): idstart = int(blk * 8192) idend = int((blk + 1) * 8192) dists = cupy.tile(xdot[idstart:idend], (y.shape[0], 1)).transpose( ) - 2 * cupy.matmul(xx[idstart:idend], y.transpose()) + cupy.tile( cupy.sum(cupy.multiply(y, y), axis=1).transpose(), (xx[idstart:idend].shape[0], 1)) dt[idstart:idend] = cupy.amin(dists, axis=1) del dists dt = cupy.reshape(dt, (-1, sample_size)) distancesgpu[inst] = cupy.asnumpy(dt) del dt del xx del xdot memory_pool.free_all_blocks() pinned_memory_pool.free_all_blocks() # save file saveh5 = h5py.File(save_location, 'w') saveh5.create_dataset('distances', data=distancesgpu) saveh5.close()
def _preprocess(self, frame): frame_dev = cp.asarray(frame) # resize zoom = np.roll(self.inp_handle.shape, -1) / frame_dev.shape small_dev = cupyx.scipy.ndimage.zoom(frame_dev, zoom, order=1, mode='opencv', grid_mode=True) # BGR to RGB rgb_dev = small_dev[..., ::-1] # HWC -> CHW chw_dev = rgb_dev.transpose(2, 0, 1) # normalize to [0, 1] interval cp.multiply(chw_dev, 1 / 255., out=self.inp_handle)
def do_rmsprop(self, X, Y, update, learning_rate, **kwargs): layers = len(self.structure) - 1 grads = self.calculate_grads(X, Y, kwargs["l2_reg_param"]) for ii in cp.arange(1, layers + 1): update["w" + str(ii)] = kwargs["beta"] * update.get( "w" + str(ii), 0) + (1 - kwargs["beta"]) * cp.square( cp.sum(grads["w" + str(ii)], axis=0)) update["b" + str(ii)] = kwargs["beta"] * update.get( "b" + str(ii), 0) + (1 - kwargs["beta"]) * cp.square( cp.sum(grads["b" + str(ii)], axis=1).reshape(-1, 1)) self.params["w"+str(ii)] -= cp.multiply((learning_rate/ cp.sqrt(kwargs["epsilon"] + update["w"+str(ii)])),\ cp.sum(grads["w"+str(ii)],axis=0)) self.params["b"+str(ii)] -= cp.multiply((learning_rate / cp.sqrt(kwargs["epsilon"] + update["b"+str(ii)])),\ cp.sum(grads["b"+str(ii)],axis=1).reshape(-1,1)) return update
def calcMSeries(t_axis, h, t_0, m_0, noiseFlag = False): if noiseFlag: # RK4 solution with thermal noise m_rk4 = [] m_prev = m_0 t_prev = t_0 H_eff = cp.array([0, 0, float(H_k*m_0[2])]) + cp.multiply(demag_const, m_0) + cp.array([thermalConst*cp.random.normal(0,1), thermalConst*cp.random.normal(0,1), thermalConst*cp.random.normal(0,1)]) for t in t_axis: print("#################################################################") print("step t:"+str(t)) new_m = llgsRK4Heun(t_prev, m_prev, t, h, H_eff) print("new m:"+str(new_m)) m_rk4.append(new_m) m_prev = new_m t_prev = t H_eff = cp.array([0, 0, float(H_k*new_m[2])]) + cp.multiply(demag_const, new_m) + cp.array([thermalConst*cp.random.normal(0,1), thermalConst*cp.random.normal(0,1), thermalConst*cp.random.normal(0,1)]) print("new H_eff:"+str(H_eff)) else: # RK4 solution without thermal noise m_rk4 = [] m_prev = m_0 t_prev = t_0 H_eff = cp.array([0, 0, float(H_k*m_0[2])]) + cp.multiply(demag_const, m_0) for t in t_axis: print("#################################################################") print("step t:"+str(t)) new_m = llgsRK(t_prev, m_prev, t, h, H_eff) print("new m:"+str(new_m)) m_rk4.append(new_m) m_prev = new_m t_prev = t H_eff = cp.array([0, 0, float(H_k*new_m[2])]) + cp.multiply(demag_const, new_m) print("new H_eff:"+str(H_eff)) # change all elements to list and float for i in range(0, len(m_rk4)): m_rk4[i] = m_rk4[i].tolist() for j in range(0, len(m_rk4[i])): m_rk4[i][j] = float(m_rk4[i][j]) return m_rk4
def var(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): """Returns the variance along an axis. Args: a (cupy.ndarray): Array to compute variance. axis (int): Along which axis to compute variance. The flattened array is used by default. dtype: Data type specifier. out (cupy.ndarray): Output array. keepdims (bool): If True, the axis is remained as an axis of size one. Returns: cupy.ndarray: The variance of the input array along the axis. .. seealso:: :func:`numpy.var` """ if axis is None: axis = tuple(range(a.ndim)) if not isinstance(axis, tuple): axis = (axis,) if dtype is None and issubclass(a.dtype.type, (numpy.integer, numpy.bool_)): dtype = numpy.dtype(numpy.float64) arrmean = mean(a, axis=axis, dtype=dtype, keepdims=True) x = cupy.subtract(a, arrmean, dtype=dtype) cupy.square(x, x) ret = cupy.sum(x, axis=axis, dtype=dtype, out=out, keepdims=keepdims) rcount = max(_count_reduce_items(a, axis) - ddof, 0) return cupy.multiply(ret, ret.dtype.type(1.0 / rcount), out=ret)
def GlobalReg(X, T, sigma2, outliers): """ :params: :return """ [N, D] = X.shape M = T.shape[0] # Calculate P matrix # Nominator of P P_num = cp.sum((X[None, :, :] - T[:, None, :])**2, axis=2) P_num = cp.exp(-P_num / (2 * sigma2)) # Denominator of P P_den = cp.sum(P_num, axis=0) P_den = cp.tile(P_den, (M, 1)) P_den[P_den == 0] = 2.220446049250313e-16 c = ((((2 * cp.pi * sigma2)**D / 2) * (outliers / (1 - outliers))) * (M / N)) P_den += c P = cp.divide(P_num, P_den) P1 = cp.sum(P, axis=1) Pt1 = cp.sum(P, axis=0) c1 = c * cp.ones(N) K1 = cp.dot(cp.transpose(P_num), cp.ones(M)) a = cp.tile(cp.divide(1, K1 + c1).reshape(N, 1), D) Px = cp.dot(P_num, (cp.multiply(a, X))) return P1, Pt1, Px
def step(self, model, solution, old_solution, *args, **kwargs): """Correlation coefficient at current step. Args: model (tomomak.Model): used model. solution (ndarray): supposed solution. old_solution (ndarray): supposed_solution at a previous iteration. *args: not used, but needed to be here in order to work with Solver properly. **kwargs: not used, but needed to be here in order to work with Solver properly. Returns: float: correlation coefficient. """ det_num = model.detector_signal.shape[0] det_num2 = det_num**2 f_s = cp.sum(old_solution) f_new_s = cp.sum(solution) corr = det_num2 * cp.sum(np.multiply(solution, old_solution)) corr = corr - f_s * f_new_s divider = det_num2 * cp.sum(np.multiply(solution, solution)) tmp = f_new_s**2 divider = cp.sqrt(divider - tmp) corr = corr / divider divider = det_num2 * cp.sum(cp.multiply(old_solution, old_solution)) tmp = f_s**2 divider = cp.sqrt(divider - tmp) if divider: res = corr / divider else: res = np.nan self.data.append(res) self.data.append(res) return res
def kron(a, b): """Returns the kronecker product of two arrays. Args: a (~cupy.ndarray): The first argument. b (~cupy.ndarray): The second argument. Returns: ~cupy.ndarray: Output array. .. seealso:: :func:`numpy.kron` """ a_ndim = a.ndim b_ndim = b.ndim if a_ndim == 0 or b_ndim == 0: return cupy.multiply(a, b) ndim = b_ndim a_shape = a.shape b_shape = b.shape if a_ndim != b_ndim: if b_ndim > a_ndim: a_shape = (1,) * (b_ndim - a_ndim) + a_shape else: b_shape = (1,) * (a_ndim - b_ndim) + b_shape ndim = a_ndim axis = ndim - 1 out = core.tensordot_core(a, b, None, a.size, b.size, 1, a_shape + b_shape) for _ in six.moves.range(ndim): out = core.concatenate_method(out, axis=axis) return out
def backward(self, dA): """Implementation of backward pooling using stride tricks. Args: dA (np.array): derivative of output values Returns: np.array: derivative of intput values """ if len(dA.shape) == 2: dA = dA.reshape(dA.shape[1], *self.dim_out[1:]) self.dX[:, :, :, :] = 0 n_h = self.dim_out[1] n_w = self.dim_out[2] shape = ( self.X.shape[0], # m n_h, n_w, self.f, self.f, self.X.shape[-1]) # n_c strides = (self.X.strides[0], self.X.strides[1] * self.stride, self.X.strides[2] * self.stride, self.X.strides[1], self.X.strides[2], self.X.strides[3]) M = np.lib.stride_tricks.as_strided( self.X, shape=shape, strides=strides) # , writeable=False) # dangerous: writing into memory, don't mess up strides ! M_dX = np.lib.stride_tricks.as_strided( self.dX, shape=shape, strides=strides) # , writeable=True) mask = np.max(M, axis=(-3, -2), keepdims=True) == M M_dX += np.multiply(mask, dA[:, :, :, None, None]) return self.dX
def fit_dropout(self, epochs=1, batch_size=1, p=0.5, gamma=0.9, **args): X = args['X_train'] y = args['y_train'] if 'verbose' in args: verbose = args['verbose'] else: verbose = None loss_val = cp.zeros((cp.int(epochs))) par_gpu = deepcopy(self.start) momemtum = {var: cp.zeros_like(par_gpu[var]) for var in par_gpu.keys()} for i in range(int(epochs)): for batch in self.iterate_minibatches(X, y, batch_size): X_batch, y_batch = batch Z = cp.random.binomial(1, p, size=X_batch.shape) X_batch_dropout = cp.multiply(X_batch, Z) grad_p = self.model.grad(par_gpu, X_train=X_batch_dropout, y_train=y_batch) for var in par_gpu.keys(): momemtum[var] = gamma * momemtum[ var] + -self.step_size * grad_p[var] par_gpu[var] += momemtum[var] loss_val[i] = self.model.negative_log_posterior(par_gpu, X_train=X_batch, y_train=y_batch) if verbose and (i % (epochs / 10) == 0): print('loss: {0:.4f}'.format(cp.asnumpy(loss_val[i]))) return par_gpu, loss_val
def get_square_sampling_probas(attractivity_cells, square_ids_cells, coords_squares, dscale=1): # compute sum attractivities in squares sum_attractivity_squares, unique_squares = sum_by_group( values=attractivity_cells, groups=square_ids_cells) # Compute distances between all squares and squares having sum_attractivity > 0 mask_attractivity = (sum_attractivity_squares > 0) eligible_squares = unique_squares[mask_attractivity] sum_attractivity_squares = sum_attractivity_squares[mask_attractivity] # Compute distance between cells, add `intra_square_dist` for average intra cell distance inter_square_dists = cdist(coords_squares).astype(cp.float32) inter_square_dists = inter_square_dists[:, eligible_squares] square_sampling_probas = cp.multiply(inter_square_dists, -dscale) square_sampling_probas = cp.exp(square_sampling_probas) square_sampling_probas *= sum_attractivity_squares[ None, :] # row-wise multiplication square_sampling_probas /= cp.linalg.norm(square_sampling_probas, ord=1, axis=1, keepdims=True) square_sampling_probas = square_sampling_probas.astype(cp.float32) return square_sampling_probas
def inner(a, b): """Returns the inner product of two arrays. It uses the last axis of each argument to take sum product. Args: a (cupy.ndarray): The first argument. b (cupy.ndarray): The second argument. Returns: cupy.ndarray: The inner product of ``a`` and ``b``. .. seealso:: :func:`numpy.inner` """ a_ndim = a.ndim b_ndim = b.ndim if a_ndim == 0 or b_ndim == 0: return cupy.multiply(a, b) a_axis = a_ndim - 1 b_axis = b_ndim - 1 if a.shape[-1] != b.shape[-1]: raise ValueError('Axis dimension mismatch') if a_axis: a = cupy.rollaxis(a, a_axis, 0) if b_axis: b = cupy.rollaxis(b, b_axis, 0) ret_shape = a.shape[1:] + b.shape[1:] k = a.shape[0] n = a.size // k m = b.size // k return _tensordot_core(a, b, None, n, m, k, ret_shape)
def train(self): # clear grads self.q_func.zerograds() # pull tuples from memory pool batch_tuples = self.replay.pull(Config.batch_size) if not len(batch_tuples): return # stack inputs cur_x = [self.env.getX(t.state) for t in batch_tuples] next_x = [self.env.getX(t.next_state) for t in batch_tuples] # merge inputs into one array if Config.gpu: cur_x = [cupy.expand_dims(t, 0) for t in cur_x] cur_x = cupy.concatenate(cur_x, 0) next_x = [cupy.expand_dims(t, 0) for t in next_x] next_x = cupy.concatenate(next_x, 0) else: cur_x = np.stack(cur_x) next_x = np.stack(next_x) # get cur outputs cur_output = self.QFunc(self.q_func, cur_x) # get next outputs, NOT target next_output = self.QFunc(self.q_func, next_x) # choose next action for each output next_action = [ self.env.getBestAction( o.data, [t.next_state for t in batch_tuples] ) for o in next_output # for each head in Model ] # get next outputs, target next_output = self.QFunc(self.target_q_func, next_x) # clear err of tuples for t in batch_tuples: t.err = 0. # store err count err_count_list = [0.] * len(batch_tuples) # compute grad's weights weights = np.array([t.P for t in batch_tuples], np.float32) if Config.gpu: weights = cuda.to_gpu(weights) if self.replay.getPoolSize(): weights *= self.replay.getPoolSize() weights = weights ** -Config.beta weights /= weights.max() if Config.gpu: weights = cupy.expand_dims(weights, 1) else: weights = np.expand_dims(weights, 1) # update beta Config.beta = min(1, Config.beta + Config.beta_add) # compute grad for each head for k in range(Config.K): if Config.gpu: cur_output[k].grad = cupy.zeros_like(cur_output[k].data) else: cur_output[k].grad = np.zeros_like(cur_output[k].data) # compute grad from each tuples for i in range(len(batch_tuples)): if batch_tuples[i].mask[k]: cur_action_value = \ cur_output[k].data[i][batch_tuples[i].action].tolist() reward = batch_tuples[i].reward next_action_value = \ next_output[k].data[i][next_action[k][i]].tolist() target_value = reward # if not empty position, not terminal state if batch_tuples[i].next_state.in_game: target_value += Config.gamma * next_action_value loss = cur_action_value - target_value cur_output[k].grad[i][batch_tuples[i].action] = 2 * loss # count err if cur_action_value: batch_tuples[i].err += abs(loss / cur_action_value) err_count_list[i] += 1 # multiply weights with grad and clip if Config.gpu: cur_output[k].grad = cupy.multiply( cur_output[k].grad, weights) cur_output[k].grad = cupy.clip(cur_output[k].grad, -1, 1) else: cur_output[k].grad = np.multiply( cur_output[k].grad, weights) cur_output[k].grad = np.clip(cur_output[k].grad, -1, 1) # backward cur_output[k].backward() # adjust grads of shared for param in self.q_func.shared.params(): param.grad /= Config.K # update params self.optimizer.update() # avg err for i in range(len(batch_tuples)): if err_count_list[i] > 0: batch_tuples[i].err /= err_count_list[i] self.replay.merge(Config.alpha) return np.mean([t.err for t in batch_tuples])
def tensordot(a, b, axes=2): """Returns the tensor dot product of two arrays along specified axes. This is equivalent to compute dot product along the specified axes which are treated as one axis by reshaping. Args: a (cupy.ndarray): The first argument. b (cupy.ndarray): The second argument. axes: - If it is an integer, then ``axes`` axes at the last of ``a`` and the first of ``b`` are used. - If it is a pair of sequences of integers, then these two sequences specify the list of axes for ``a`` and ``b``. The corresponding axes are paired for sum-product. out (cupy.ndarray): Output array. Returns: cupy.ndarray: The tensor dot product of ``a`` and ``b`` along the axes specified by ``axes``. .. seealso:: :func:`numpy.tensordot` """ a_ndim = a.ndim b_ndim = b.ndim if a_ndim == 0 or b_ndim == 0: if axes != 0 and axes != ((), ()): raise ValueError('An input is zero-dim while axes has dimensions') return cupy.multiply(a, b) if isinstance(axes, collections.Sequence): if len(axes) != 2: raise ValueError('Axes must consist of two arrays.') a_axes, b_axes = axes if numpy.isscalar(a_axes): a_axes = a_axes, if numpy.isscalar(b_axes): b_axes = b_axes, else: a_axes = tuple(six.moves.range(a_ndim - axes, a_ndim)) b_axes = tuple(six.moves.range(axes)) sum_ndim = len(a_axes) if sum_ndim != len(b_axes): raise ValueError('Axes length mismatch') for a_axis, b_axis in zip(a_axes, b_axes): if a.shape[a_axis] != b.shape[b_axis]: raise ValueError('Axis dimension mismatch') # Make the axes non-negative a = _move_axes_to_head(a, [axis % a_ndim for axis in a_axes]) b = _move_axes_to_head(b, [axis % b_ndim for axis in b_axes]) ret_shape = a.shape[sum_ndim:] + b.shape[sum_ndim:] k = internal.prod(a.shape[:sum_ndim]) n = a.size // k m = b.size // k return _tensordot_core(a, b, None, n, m, k, ret_shape)
def _tensordot_core(a, b, out, n, m, k, ret_shape): ret_dtype = a.dtype.char if ret_dtype != b.dtype.char: ret_dtype = numpy.find_common_type((ret_dtype, b.dtype), ()).char # Cast to float32 or float64 if ret_dtype == 'f' or ret_dtype == 'd': dtype = ret_dtype else: dtype = numpy.find_common_type((ret_dtype, 'f'), ()).char a = a.astype(dtype, copy=False) b = b.astype(dtype, copy=False) if not a.size or not b.size: if a.size or b.size: raise ValueError('cannot dot zero-sized and non-zero-sized arrays') if out is None: return cupy.zeros(ret_shape, dtype=ret_dtype) else: out.fill(0) return out if out is None: out = cupy.empty(ret_shape, dtype) if dtype == ret_dtype: ret = out else: ret = cupy.empty(ret_shape, ret_dtype) else: ret = out if out.dtype != dtype: out = cupy.empty(ret_shape, dtype) # It copies the operands if needed if a.shape != (k, n): a = cupy.reshape(a, (k, n)) if b.shape != (k, m): b = cupy.reshape(b, (k, m)) c = out if c.shape != (n, m): c = c.view() c.shape = (n, m) # Be careful that cuBLAS uses the FORTRAN-order matrix representation. if k == 1: if n == 1: # Scalar-vector product cupy.multiply(a, b, c) elif m == 1: # Scalar-vector product cupy.multiply(a.T, b, c) else: # Outer product A^T * B # c is C-contiguous while cuBLAS requires F-contiguous arrays, so # we compute C^T = B^T * A here. handle = cuda.Device().cublas_handle c.fill(0) a, inca = _to_cublas_vector(a, 1) b, incb = _to_cublas_vector(b, 1) if dtype == 'f': ger = cublas.sger elif dtype == 'd': ger = cublas.dger ger(handle, m, n, 1, b.data.ptr, incb, a.data.ptr, inca, c.data.ptr, m) if dtype != ret_dtype: elementwise.copy(out, ret) return ret handle = cuda.Device().cublas_handle if n == 1: if m == 1: # Inner product a, inca = _to_cublas_vector(a, 0) b, incb = _to_cublas_vector(b, 0) mode = cublas.getPointerMode(handle) cublas.setPointerMode(handle, cublas.CUBLAS_POINTER_MODE_DEVICE) if dtype == 'f': dot = cublas.sdot elif dtype == 'd': dot = cublas.ddot try: dot(handle, k, a.data.ptr, inca, b.data.ptr, incb, c.data.ptr) finally: cublas.setPointerMode(handle, mode) else: # Matrix-vector product B^T * A a, inca = _to_cublas_vector(a, 0) b, transb, ldb = _mat_to_cublas_contiguous(b, 1) if transb: # gemv requires (m, k) as the original matrix dimensions # rather than the transposed dimensions. m, k = k, m if dtype == 'f': gemv = cublas.sgemv elif dtype == 'd': gemv = cublas.dgemv gemv(handle, transb, m, k, 1, b.data.ptr, ldb, a.data.ptr, inca, 0, c.data.ptr, 1) elif m == 1: # Matrix-vector product A^T * B a, transa, lda = _mat_to_cublas_contiguous(a, 1) b, incb = _to_cublas_vector(b, 0) if transa: # gemv requires (n, k) as the original matrix dimensions rather # than the transposed dimensions. n, k = k, n if dtype == 'f': gemv = cublas.sgemv elif dtype == 'd': gemv = cublas.dgemv gemv(handle, transa, n, k, 1, a.data.ptr, lda, b.data.ptr, incb, 0, c.data.ptr, 1) else: # Matrix-Matrix product A^T * B # c is C-contiguous while cuBLAS assumes F-contiguous inputs, so we # compute C^T = B^T * A here. a, transa, lda = _mat_to_cublas_contiguous(a, 0) b, transb, ldb = _mat_to_cublas_contiguous(b, 1) if dtype == 'f': gemm = cublas.sgemm elif dtype == 'd': gemm = cublas.dgemm gemm(handle, transb, transa, m, n, k, 1, b.data.ptr, ldb, a.data.ptr, lda, 0, c.data.ptr, m) if dtype != ret_dtype: elementwise.copy(out, ret) return ret
def tensordot(a, b, axes=2, out=None): """Returns the tensor dot product of two arrays along specified axes. This is equivalent to compute dot product along the specified axes which are treated as one axis by reshaping. Args: a (cupy.ndarray): The first argument. b (cupy.ndarray): The second argument. axes: - If it is an integer, then ``axes`` axes at the last of ``a`` and the first of ``b`` are used. - If it is a pair of sequences of integers, then these two sequences specify the list of axes for ``a`` and ``b``. The corresponding axes are paired for sum-product. out (cupy.ndarray): Output array. Returns: cupy.ndarray: The tensor dot product of ``a`` and ``b`` along the axes specified by ``axes``. .. seealso:: :func:`numpy.tensordot` """ if a.ndim == 0 or b.ndim == 0: if axes != 0 and axes != ((), ()): raise ValueError('An input is zero-dim while axes has dimensions') return cupy.multiply(a, b, out=out) ret_dtype = numpy.find_common_type([a.dtype, b.dtype], []) # Cast to float32 or float64 dtype = numpy.find_common_type([a.dtype, b.dtype, 'f'], []) a = a.astype(dtype, copy=False) b = b.astype(dtype, copy=False) if a.dtype.type == numpy.float32: dot = cublas.sdot gemv = cublas.sgemv ger = cublas.sger gemm = cublas.sgemm elif a.dtype.type == numpy.float64: dot = cublas.ddot gemv = cublas.dgemv ger = cublas.dger gemm = cublas.dgemm if numpy.isscalar(axes): axes = [list(six.moves.range(a.ndim - axes, a.ndim)), list(six.moves.range(axes))] else: axes = list(axes) if numpy.isscalar(axes[0]): axes[0] = (axes[0],) if numpy.isscalar(axes[1]): axes[1] = (axes[1],) if len(axes) != 2: raise ValueError('Axes must consist of two arrays.') if len(axes[0]) != len(axes[1]): raise ValueError('Axes length mismatch') for a_axis, b_axis in zip(*axes): if not (-a.ndim <= a_axis < a.ndim and -b.ndim <= b_axis < b.ndim): raise IndexError('Axis overrun') if a.shape[a_axis] != b.shape[b_axis]: raise ValueError('Axis dimension mismatch') # Make the axes non-negative axes = (tuple(axis % a.ndim for axis in axes[0]), tuple(axis % b.ndim for axis in axes[1])) sum_ndim = len(axes[0]) a = _move_axes_to_head(a, axes[0]) b = _move_axes_to_head(b, axes[1]) m = internal.prod(b.shape[sum_ndim:]) n = internal.prod(a.shape[sum_ndim:]) ret_shape = a.shape[sum_ndim:] + b.shape[sum_ndim:] if out is not None: if out.size != internal.prod(ret_shape): raise ValueError('Output array has an invalid size') if not out.flags.c_contiguous: raise ValueError('Output array must be C-contiguous') if 0 in a.shape or 0 in b.shape: if 0 not in a.shape or 0 not in b.shape: raise ValueError('cannot dot zero-sized and non-zero-sized arrays') if out is None: return cupy.zeros(ret_shape, dtype=ret_dtype) else: out.fill(0) return out if out is None: out = cupy.empty(ret_shape, dtype=dtype) if dtype == ret_dtype: ret = out else: ret = cupy.empty(ret_shape, dtype=ret_dtype) else: ret = out if out.dtype != dtype: out = cupy.empty(ret_shape, dtype=dtype) k = a.size // n # It copies the operands if needed a = a.reshape(k, n) b = b.reshape(k, m) c = out.view() c.shape = (n, m) # Be careful that cuBLAS uses the FORTRAN-order matrix representation. handle = cuda.Device().cublas_handle if k == 1: if n == 1 or m == 1: # Scalar-vector product cupy.multiply(a.T, b, c) else: # Outer product A^T * B # c is C-contiguous while cuBLAS requires F-contiguous arrays, so # we compute C^T = B^T * A here. c.fill(0) a, inca = _to_cublas_vector(a, 1) b, incb = _to_cublas_vector(b, 1) ger(handle, m, n, 1, b._fptr, incb, a._fptr, inca, c._fptr, m) elif n == 1: if m == 1: # Inner product a, inca = _to_cublas_vector(a, 0) b, incb = _to_cublas_vector(b, 0) mode = cublas.getPointerMode(handle) cublas.setPointerMode(handle, cublas.CUBLAS_POINTER_MODE_DEVICE) try: dot(handle, k, a._fptr, inca, b._fptr, incb, c._fptr) finally: cublas.setPointerMode(handle, mode) else: # Matrix-vector product B^T * A a, inca = _to_cublas_vector(a, 1) b, transb, ldb = _mat_to_cublas_contiguous(b.T) if transb: # gemv requires (m, k) as the original matrix dimensions # rather than the transposed dimensions. m, k = k, m gemv(handle, transb, m, k, 1, b._fptr, ldb, a._fptr, inca, 0, c._fptr, 1) elif m == 1: # Matrix-vector product A^T * B a, transa, lda = _mat_to_cublas_contiguous(a.T) b, incb = _to_cublas_vector(b, 1) if not transa: # gemv requires (n, k) as the original matrix dimensions rather # than the transposed dimensions. n, k = k, n gemv(handle, transa, n, k, 1, a._fptr, lda, b._fptr, incb, 0, c._fptr, 1) else: # Matrix-Matrix product A^T * B # c is C-contiguous while cuBLAS assumes F-contiguous inputs, so we # compute C^T = B^T * A here. a, transa, lda = _mat_to_cublas_contiguous(a) b, transb, ldb = _mat_to_cublas_contiguous(b.T) gemm(handle, transb, transa, m, n, k, 1, b._fptr, ldb, a._fptr, lda, 0, c._fptr, m) if dtype != ret_dtype: elementwise.copy(out, ret) return ret