def back_prop(self, dloss_dy): """ Do backpropagation. Parameters ---------- dloss_dy : cp.array of floats, shape (nr_examples,) + self.output_size Derivative of the loss with respect to output values. Returns ------- dloss_dx : cp.array of floats, shape (nr_examples,) + self.input_size Outputs. """ nr_examples = dloss_dy.shape[0] # Calculate derivatives. dloss_dz = self.ac_func_d(self.z_cache, dloss_dy) self.dloss_dw = (1 / nr_examples) * cp.tensordot( self.x_cache, dloss_dz, axes=((0, 3, 5), (0, 1, 2))) d_y_times_filters = cp.tensordot(self.d_y, self.weights, axes=((1, ), (2, ))) dz_dx = cp.tensordot(self.d_x, d_y_times_filters, axes=((1, ), (3, ))) dloss_dx = cp.tensordot(dloss_dz, dz_dx, axes=((1, 2, 3), (1, 3, 5))) dloss_dx = dloss_dx[:, self.padding[1][0]:self.padded_size[0] - self.padding[1][1], self.padding[2][0]:self.padded_size[1] - self.padding[2][1], :] self.dloss_db = cp.average(self.dloss_dw, axis=(0, 1, 2)) # Return derivative of loss with respect to inputs x return dloss_dx
def backward(self, w, grad): # 这部分的推导可以看 /resource下的cnn_bp.md if w != -1: grad = grad @ w self.dz = grad * self.activation.backward() self.dw = np.tensordot( self.dz, self.input_split, axes=[(0, 2, 3), (0, 2, 3)]) / self.m self.db = np.mean(self.dz, axis=(0, 2, 3)) pad_diff = 2 * (self.shape[2] - self.dz.shape[2]) * self.strides self.padding_layer_bp = ZeroPadding2d(pad_diff // 2, pad_diff // 2) self.dz = self.padding_layer_bp.forward(self.dz) self.dz = self.split_by_strides(self.dz, kh=self.kernel_size[0], kw=self.kernel_size[1], s=self.strides) # 翻转180度 self.dz = np.flip(self.dz, axis=4) self.dz = np.flip(self.dz, axis=4) # 这里炫个技,其实和算dw的tensor dot一样的,但是cupy不支持einsum # grad = np.einsum('mcab,nmwhab->ncwh', self.w, self.dz) grad = np.tensordot(self.w, self.dz, axes=[(0, 2, 3), (1, 4, 5)]).transpose([1, 0, 2, 3]) if self.padding_layer is not None: return self.padding_layer.backward(w=-1, grad=grad) return -1, grad
def forward_prop(self, x): """ Forward propagation. Parameters ---------- x : cp.array of floats, shape (nr_examples,) + self.input_size Inputs. Returns ------- cp.array of floats, shape (nr_examples,) + self.output_size Outputs. """ # Add padding. x_pad = cp.pad(x, self.padding, 'constant', constant_values=0) # Keep track of dimensions. nr_examples, _, _, k = x.shape m, n, c = self.output_size p, q = self.filter_size # Create x_cache and z_cache. x_pad_times_d_x = cp.tensordot(x_pad, self.d_x, axes=((1, ), (0, ))) self.x_cache = cp.tensordot(x_pad_times_d_x, self.d_y, axes=((1, ), (0, ))) self.z_cache = cp.tensordot( self.x_cache, self.weights, axes=((1, 2, 4), (0, 1, 2))) + self.bias return self.ac_func(self.z_cache)
def tensordot_adjoint_0(B, G, axes, A_ndim, B_ndim): # The adjoint of the operator # A |--> np.tensordot(A, B, axes) if B_ndim == 0: return G * B G_axes = ocp.arange(ocp.ndim(G)) if type(axes) is int: axes = max(axes, 0) B_axes = ocp.arange(B_ndim) return ocp.tensordot(G, B, [G_axes[A_ndim - axes:], B_axes[axes:]]) elif type(axes[0]) is int: axes = [axes[0] % A_ndim, axes[1] % B_ndim] B_axes = ocp.arange(B_ndim) return ocp.tensordot( G, B, [G_axes[A_ndim - 1:], ocp.delete(B_axes, axes[1])]) # noqa: E501 else: A_axes = ocp.arange(A_ndim) B_axes = ocp.arange(B_ndim) summed_axes = [ ocp.asarray(axes[0]) % A_ndim, ocp.asarray(axes[1]) % B_ndim, ] # noqa: E501 other_axes = [ ocp.delete(A_axes, summed_axes[0]), ocp.delete(B_axes, summed_axes[1]), # noqa: E501 ] out = ocp.tensordot(G, B, [G_axes[len(other_axes[0]):], other_axes[1]]) perm = ocp.argsort( ocp.concatenate( (other_axes[0], summed_axes[0][ocp.argsort(summed_axes[1])]))) return ocp.transpose(out, perm)
def softmax_d(z, dloss_dy): """ Derivatives with respect to inputs of softmax function, only for 1D layers. Parameters ---------- z : cp.array of floats, shape (number of examples, number of nodes) z-cache. dloss_dy : cp.array of floats, shape (number of examples, number of nodes) Derivatives of loss with respect to outputs of relu function. Returns ------- dloss_dz : cp.array of floats, shape (number of examples, number of nodes) Derivatives of loss with respect to inputs of softmax function. """ # Prepare. y = softmax(z) eye = cp.eye(y.shape[1], dtype=cp.int8) dloss_dz = cp.zeros(y.shape, dtype=cp.float32) # Iterate over training examples. for n in range(y.shape[0]): y_tdot_y = -1 * cp.tensordot(y[n], y[n], axes=0) y_d_kron = cp.multiply(y[n].reshape(y.shape[1], 1), eye) dy_dz = cp.add(y_tdot_y, y_d_kron) dloss_dz[n] = cp.tensordot(dloss_dy[n], dy_dz, axes=1) return dloss_dz
def evol(s, B, U, chi, d): for i_bond in [0, 1]: ia = np.mod(i_bond - 1, 2) ib = np.mod(i_bond, 2) ic = np.mod(i_bond + 1, 2) chia = B[ib].shape[1] chic = B[ic].shape[2] # Construct theta matrix and time evolution # theta = cp.tensordot(B[ib], B[ic], axes=(2, 1)) # i a j b theta = cp.tensordot(U, theta, axes=([2, 3], [0, 2])) # ip jp a b theta = cp.tensordot(cp.diag(s[ia]), theta, axes=([1, 2])) # a ip jp b theta = cp.reshape(cp.transpose(theta, (1, 0, 2, 3)), (d * chia, d * chic)) # ip a jp b # Schmidt decomposition # X, Y, Z = cp.linalg.svd(theta, full_matrices=0) chi2 = np.min([cp.sum(Y > 10.**(-10)).get(), chi]) piv = cp.zeros(len(Y), cp.bool) piv[(cp.argsort(Y)[::-1])[:chi2]] = True Y = Y[piv] invsq = cp.sqrt(sum(Y**2)) X = X[:, piv] Z = Z[piv, :] # Obtain the new values for B and s # s[ib] = Y / invsq X = cp.reshape(X, (d, chia, chi2)) X = cp.transpose(cp.tensordot(cp.diag(s[ia]**(-1)), X, axes=(1, 1)), (1, 0, 2)) B[ib] = cp.tensordot(X, cp.diag(s[ib]), axes=(2, 0)) B[ic] = cp.transpose(cp.reshape(Z, (chi2, d, chic)), (1, 0, 2)) return s, B
def back_prop(self, dloss_dy): """ Do backward propagation through the network and update the weights accordingly. Parameters ---------- dloss_dy : cp.array of floats, shape (number of examples, number of nodes) Derivative of loss with respect to output values. Returns ------- dloss_dx : cp.array of floats, shape (number of examples, number of input values) Derivative of loss with respect to input values. """ nr_examples = dloss_dy.shape[0] # Calculate derivatives. dloss_dz = self.ac_func_d(self.z_cache, dloss_dy) self.dloss_dw = cp.tensordot( self.x_cache, dloss_dz, axes=((0, ), (0, ))) / nr_examples dloss_dx = cp.tensordot(dloss_dz, self.weights, axes=((1, ), (1, ))) self.dloss_db = cp.sum(self.dloss_dw, axis=0, keepdims=True) / self.input_size # Return derivative of loss with respect to inputs x return dloss_dx
def backward(self, dA): """Using numpy stride tricks for the backward propagation implementation. Args: dA (np.array): gradient of output values Returns: np.array: dX gradient of input values """ if len(dA.shape) == 2: dZ = dA.reshape(dA.shape[1], *self.dim_out[1:]) * self._deriv_relu( self.Z) else: dZ = dA * self._deriv_relu(self.Z) if dZ.shape[0] != self.dZ_pad.shape[0]: self.dZ_pad = self._allocate_dZ_pad(dZ.shape[0]) self.dW[:, :, :, :] = 0 self.db[:, :, :, :] = 0 (m, n_H_prev, n_W_prev, n_C_prev) = self.dim_in (f, f, n_C_prev, n_C) = self.W.shape stride = self.stride (m, n_H, n_W, n_C) = dZ.shape W_rot = np.rot90(self.W, 2) pad_dZ = self.W.shape[0] - (self.pad + 1) if pad_dZ == 0: self.dZ_pad[:, 0::stride, 0::stride] = dZ else: self.dZ_pad[:, pad_dZ:-pad_dZ:stride, pad_dZ:-pad_dZ:stride, :] = dZ shape = ( self.dZ_pad.shape[0], # m self.dZ_pad.shape[1] - W_rot.shape[0] + 1, # X_nx self.dZ_pad.shape[2] - W_rot.shape[1] + 1, # X_ny self.dZ_pad.shape[3], # dZ_nc W_rot.shape[0], # f W_rot.shape[1]) # f strides = (self.dZ_pad.strides[0], self.dZ_pad.strides[1], self.dZ_pad.strides[2], self.dZ_pad.strides[3], self.dZ_pad.strides[1], self.dZ_pad.strides[2]) M = np.lib.stride_tricks.as_strided( self.dZ_pad, shape=shape, strides=strides) # , writeable=False,) self.dX = np.tensordot(M, W_rot, axes=((4, 5, 3), (0, 1, 3))) #self.dX = np.einsum('pqrs,bmnspq->bmnr', W_rot, M) shape_Z = (f, f, n_C_prev, m, n_H, n_W) strides_Z = (self.X_pad.strides)[1:] + (self.X_pad.strides)[0:3] strides_Z = (*strides_Z[:-2], strides_Z[-2] * stride, strides_Z[-1] * stride) M = np.lib.stride_tricks.as_strided( self.X_pad, shape=shape_Z, strides=strides_Z) # , writeable=False) # self.dW = np.einsum('abcd,pqsabc->pqsd', dZ, M) self.dW = np.tensordot(M, dZ, axes=((3, 4, 5), (0, 1, 2))) self.dW += self.lamb / self.dim_in[0] * self.W # self.db = np.einsum('abcd->d', dZ).reshape(1, 1, 1, n_C) self.db = np.sum(dZ, axis=(0, 1, 2)).reshape(1, 1, 1, n_C) return self.dX
def dot_adjoint_0(B, G, A_ndim, B_ndim): # The adjoint of the operator # A |--> np.dot(A, B) if B_ndim == 0 or B_ndim == 1 or A_ndim == 0: contract_num = max(0, B_ndim - (A_ndim != 0)) return ocp.tensordot(G, B, contract_num) else: return ocp.tensordot(G, ocp.swapaxes(B, -1, -2), B_ndim - 1)
def magnetization(s, B, d): sz = cp.diag([Sz(conf, 0) for conf in range(0, d)]) # sz=cp.array([[0,1],[1,0]]) mag = cp.array(0., dtype=np.float32) for i_bond in range(2): sB = cp.tensordot(cp.diag(s[np.mod(i_bond - 1, 2)]), B[i_bond], axes=(1, 1)) C = cp.tensordot(sB, cp.conj(sB), axes=([0, 2], [0, 2])) mag += cp.real(cp.tensordot(C, sz, axes=([0, 1], [0, 1])).get()) return mag * 0.5
def apply_unitary_transformation_to_density_matrices_cupy( unitary: cupy.ndarray, density_matrices: cupy.ndarray ): dim1, dim2 = unitary.shape num_states, dim3, dim4 = density_matrices.shape assert dim1 == dim2 == dim3 == dim4 # unitary U[i,k] D[nwf,k,l] -> B[i,nwf,l] mat_b = tensordot(unitary, density_matrices, axes=[[1], [1]]) # B[i,nwf,k] U'[k,l] -> ret[i,nwf,l] ret = tensordot(mat_b, transpose(conj(unitary)), axes=[[2], [0]]) return swapaxes(ret, axis1=0, axis2=1)
def apply_isometry_to_density_matrices_cupy( isometry: cupy.ndarray, density_matrices: cupy.ndarray ): outdim, dim1 = isometry.shape num_states, dim2, dim3 = density_matrices.shape assert dim1 == dim2 == dim3 # unitary U[i,k] D[nwf,k,l] -> B[i,nwf,l] mat_b = tensordot(isometry, density_matrices, axes=[[1], [1]]) # B[i,nwf,k] U'[k,l] -> ret[i,nwf,l] ret = tensordot(mat_b, transpose(conj(isometry)), axes=[[2], [0]]) return swapaxes(ret, axis1=0, axis2=1)
def initialize(self, grids, numbers=None): # default argument if numbers is None: numbers = [2, 3, 4] # temp grid variables x2 = cp.tensordot(grids.x.arr_cp, cp.ones((self.y_res, self.y_ord)), axes=0) y2 = cp.tensordot(cp.ones((self.x_res, self.x_ord)), grids.y.arr_cp, axes=0) # 2D ABC flow superposition p = np.pi * np.random.randn(len(numbers)) # phases arr_x = sum([cp.cos(number * y2 + p[idx]) for idx, number in enumerate(numbers)]) arr_y = sum([cp.sin(number * x2 + p[idx]) for idx, number in enumerate(numbers)]) self.arr = cp.array([arr_x, arr_y])
def binary_to_decimal(X): """ | This function takes :code:`X` of shape (n_images, L2, y, x) as an argument. | Supporse that :code:`X[k]` (0 <= k < n_images) can be represented as .. code-block:: none X[k] = [map_k[0], map_k[1], ..., map_k[L2-1]] where the shape of each map_k is (y, x). Then we calculate .. code-block:: none a[0] * map_k[0] + a[1] * map_k[1] + ... + a[L2-1] * map_k[L2-1] for each :code:`X[k]`, where :math:`a = [2^{L2-1}, 2^{L2-2}, ..., 2^{0}]` Therefore, the output shape must be (n_images, y, x) Parameters ---------- X: xp.ndarray Feature maps """ a = xp.arange(X.shape[1])[::-1] a = xp.power(2, a) return xp.tensordot(X, a, axes=([1], [0]))
def forward_prop(x, local_time, sequence, isFirst, timestamp, satellite_name): s = cp.empty([local_time, distance_forward, channels_hidden, M, N]) e = cp.empty([local_time, distance_forward]) alpha = cp.empty([local_time, distance_forward]) p = cp.empty([local_time, channels_p, M, N]) # Hidden Unit h = cp.empty([local_time + 1, channels_hidden, M, N]) h[-1] = cp.zeros([channels_hidden, M, N]) # LSTM FORWARD PROPAGATION for t in np.arange(local_time): # Attention Network for z in range( timestamp + t - (distance + learning_window), timestamp + distance_forward + t - (distance + learning_window)): temp = cp.concatenate( (cp.asarray(satellite_images[sequence][z]), h[t - 1]), axis=0) s[t][z - (timestamp + t - (distance + learning_window))] = tanh( cp.asarray( F.convolution_2d(temp.reshape( 1, channels_img + channels_hidden, M, N), e_kernel, b=None, pad=pad_constant)[0].data) + bias_e) s_temp = s[t][z - (timestamp + t - (distance + learning_window))].reshape( M * N * channels_hidden) e[t][z - (timestamp + t - (distance + learning_window))] = cp.dot( v_connected_weights, s_temp) + bias_v[z - (timestamp + t - (distance + learning_window))] xtemp = satellite_images[sequence][timestamp - distance:timestamp - distance + distance_forward, 0] alpha[t] = softmax(e[t]) p[t] = cp.tensordot(alpha[t], cp.asarray(xtemp), axes=1).reshape( 1, M, N) # Sum all x arrays up, weighted array temporary = cp.concatenate((x[t], p[t], h[t - 1]), axis=0) temporary = temporary.reshape( 1, channels_img + channels_p + channels_hidden, M, N) h[t] = tanh( cp.asarray( F.convolution_2d(temporary, main_kernel, b=None, pad=2) [0].data) + bias_h) # 1 x 1 convolution output = cp.matmul(connected_weights, h[local_time - 1].reshape( channels_hidden, M * N)).reshape(M, N) + bias_y[0] true_output = rect_linear(output) return true_output, output, cp.reshape( h[local_time - 1], (channels_hidden, M * N)), p, h, s, e, alpha, xtemp
def convolve2d(in1, in2, mode='full'): """ note only support H * W * N * 1 convolve 2d """ in1 = in1.transpose(2, 3, 0, 1) # to N * C * H * W in2 = in2.transpose(2, 3, 0, 1) out_c, _, kh, kw = in2.shape n, _, h, w = in1.shape if mode == 'full': ph, pw = kh - 1, kw - 1 out_h, out_w = h - kh + 1 + ph * 2, w - kw + 1 + pw * 2 # TODO elif mode == 'valid': ph, pw = 0, 0 out_h, out_w = h - kh + 1, w - kw + 1 # TODO else: raise NotImplementedError y = cp.empty((n, out_c, out_h, out_w), dtype=in1.dtype) col = im2col_gpu(in1, kh, kw, 1, 1, ph, pw) y = cp.tensordot(col, in2, ((1, 2, 3), (1, 2, 3))).astype(in1.dtype, copy=False) y = cp.rollaxis(y, 3, 1) return y.transpose(2, 3, 0, 1)
def fourier_basis(self, function, idx): """ On GPU, compute Fourier coefficients on the LGL grid of the given grid function """ # print(function.shape) # print(self.spectral_transform.shape) # quit() return cp.tensordot(function, self.spectral_transform, axes=(idx, [0, 1])) * self.dx / self.length
def outer2(a, b): """ Compute outer tensor product of vectors a, b :param a: vector a_i :param b: vector b_j :return: tensor a_i b_j """ return cp.tensordot(a, b, axes=0)
def _upsampled_dft(self, array, region_sz, offsets=None): """ Upsampled DFT by matrix multiplication. This code is intended to provide the same result as if the following operations are performed: - Embed the array to a larger one of size `upsample_factor` times larger in each dimension. - ifftshift to bring the center of the image to (1, 1) - Take the FFT of the larger array. - Extract region of size [region_sz] from the result, starting with offsets. It achieves this result by computing the DFT in the output array without the need to zeropad. Much faster and memroy efficient than the zero-padded FFT approach if region_sz is much smaller than array.size * upsample_factor. Args: array (cp.ndarray): DFT of the data to be upsampled region_sz (int or tuple of int): size of the region to be sampled offsets (int or tuple of int): offsets to the sampling region Returns: (cp.ndarray): upsampled DFT of the specified region """ try: if len(region_sz) != array.ndim: raise ValueError("upsampled region size must match array dimension") except TypeError: # expand integer to list region_sz = (region_sz,) * array.ndim if offsets is None: offsets = (0,) * array.ndim else: if len(offsets) != array.ndim: raise ValueError("axis offsets must match array dimension") dim_props = zip(reversed(array.shape), reversed(region_sz), reversed(offsets)) for ax_sz, up_ax_sz, ax_offset in dim_props: # float32 sample frequencies fftfreq = ( cp.hstack( ( cp.arange(0, (ax_sz - 1) // 2 + 1, dtype=cp.float32), cp.arange(-(ax_sz // 2), 0, dtype=cp.float32), ) ) / ax_sz / self.upsample_factor ) # upsampling kernel kernel = cp.exp( (1j * 2 * np.pi) * (cp.arange(up_ax_sz, dtype=np.float32) - ax_offset)[:, None] * fftfreq ) # convolve array = cp.tensordot(kernel, array, axes=(1, -1)) return array
def dot_adjoint_1(A, G, A_ndim, B_ndim): # The adjoint of the operator # B |--> np.dot(A, B) needs_transpose = B_ndim > 1 and A_ndim != 0 swap = ((lambda x: ocp.swapaxes(x, -1, -2)) if needs_transpose else (lambda x: x)) # noqa: E501 if A_ndim == 0 or A_ndim == 1 or B_ndim == 0: contract_num = max(0, A_ndim - (B_ndim != 0)) return swap(ocp.tensordot(G, A, contract_num)) else: return swap( ocp.tensordot( G, A, [ range(-A_ndim - B_ndim + 2, -B_ndim + 1), range(A_ndim - 1), ], # noqa: E501 ))
def __init__(self, low, high, res, basis, spectrum=False, fine=False, linspace=False): self.low = low self.high = high self.res = int(res) # somehow gets non-int... self.res_ghosts = int(res + 2) # resolution including ghosts self.order = basis.order # domain and element widths self.length = self.high - self.low self.dx = self.length / self.res # element Jacobian self.J = 2.0 / self.dx # The grid does not have a basis but does have quad weights self.quad_weights = cp.tensordot(cp.ones(self.res), cp.asarray(basis.weights), axes=0) # arrays self.arr = np.zeros((self.res_ghosts, self.order)) self.create_grid(basis.nodes) self.arr_cp = cp.asarray(self.arr) self.midpoints = np.array([(self.arr[i, -1] + self.arr[i, 0]) / 2.0 for i in range(1, self.res_ghosts - 1)]) self.arr_max = np.amax(abs(self.arr)) # velocity axis gets a positive/negative indexing slice self.one_negatives = cp.where(condition=self.arr_cp < 0, x=1, y=0) self.one_positives = cp.where(condition=self.arr_cp >= 0, x=1, y=0) # fine array if fine: fine_num = 25 # 200 for 1D poisson study self.arr_fine = np.array([np.linspace(self.arr[i, 0], self.arr[i, -1], num=fine_num) for i in range(self.res_ghosts)]) if linspace: lin_num = 400 self.arr_lin = np.linspace(self.low, self.high, num=lin_num) # spectral coefficients if spectrum: self.nyquist_number = 2.0 * self.res # # 2.5 * # mode number of nyquist frequency # print(self.nyquist_number) self.k1 = 2.0 * np.pi / self.length # fundamental mode self.wave_numbers = self.k1 * np.arange(1 - self.nyquist_number, self.nyquist_number) self.d_wave_numbers = cp.asarray(self.wave_numbers) self.grid_phases = cp.asarray(np.exp(1j * np.tensordot(self.wave_numbers, self.arr[1:-1, :], axes=0))) if linspace: self.lin_phases = cp.asarray(np.exp(1j * np.tensordot(self.wave_numbers, self.arr_lin, axes=0))) # Spectral matrices self.spectral_transform = basis.fourier_transform_array(self.midpoints, self.J, self.wave_numbers) self.inverse_transform = basis.inverse_transform_array(self.midpoints, self.J, self.wave_numbers)
def calc_combined_view(ioperands, subscripts): """Calculates 'i,j->ij' by cupy.tensordot. Args: ioperands (sequence of arrays): Arrays to be combined. subscripts (sequence of str): Specifies the subscripts. """ result = ioperands[0] for ioperand in ioperands[1:]: # TODO(fukatani): add up at here if enable. result = cupy.tensordot(result, ioperand, axes=0) return result, ''.join(subscripts)
def tensordot_adjoint_1(A, G, axes, A_ndim, B_ndim): # The adjoint of the operator # B |--> np.tensordot(A, B, axes) if A_ndim == 0: return G * A G_axes = ocp.arange(ocp.ndim(G)) if type(axes) is int: axes = max(axes, 0) A_axes = ocp.arange(A_ndim) return ocp.tensordot( A, G, [A_axes[:A_ndim - axes], G_axes[:A_ndim - axes]]) # noqa: E501 elif type(axes[0]) is int: axes = [axes[0] % A_ndim, axes[1] % B_ndim] A_axes = ocp.arange(A_ndim) return ocp.tensordot( A, G, [ocp.delete(A_axes, axes[0]), G_axes[:A_ndim - 1]]) # noqa: E501 else: A_axes = ocp.arange(A_ndim) B_axes = ocp.arange(B_ndim) summed_axes = [ ocp.asarray(axes[0]) % A_ndim, ocp.asarray(axes[1]) % B_ndim, ] # noqa: E501 other_axes = [ ocp.delete(A_axes, summed_axes[0]), ocp.delete(B_axes, summed_axes[1]), # noqa: E501 ] out = ocp.tensordot(A, G, [other_axes[0], G_axes[:len(other_axes[0])]]) perm = ocp.argsort( ocp.concatenate( (summed_axes[1][ocp.argsort(summed_axes[0])], other_axes[1]))) return ocp.transpose(out, perm)
def __imul__(self, rhs: Any) -> "Tensor": if isinstance(rhs, Number) or isinstance(rhs, xp.ndarray): self._data *= rhs elif isinstance(rhs, Tensor): axes = getEinsumRule(self._indices, rhs._indices) res_indices = ([ idx for i, idx in enumerate(self._indices) if i not in axes[0] ] + [ idx for j, idx in enumerate(rhs._indices) if j not in axes[1] ]) if not self.use_cutensor: self._data = xp.tensordot(self._data, rhs._data, axes=axes) else: a = xp.ascontiguousarray(self._data) b = xp.ascontiguousarray(rhs._data) c = xp.zeros([idx.size for idx in res_indices]) desc_a = cutensor.create_tensor_descriptor(a) desc_b = cutensor.create_tensor_descriptor(b) desc_c = cutensor.create_tensor_descriptor(c) mode_a = [chr(97 + i) for i in range(self._rank)] mode_b = [ chr(97 + i) for i in range(self._rank, self._rank + rhs._rank) ] for i, j in zip(axes[0], axes[1]): mode_b[j] = mode_a[i] mode_c = ( [mode_a[i] for i in range(self._rank) if i not in axes[0]] + [mode_b[j] for j in range(rhs._rank) if j not in axes[1]]) mode_a = cutensor.create_mode(*mode_a) mode_b = cutensor.create_mode(*mode_b) mode_c = cutensor.create_mode(*mode_c) cutensor.contraction(1.0, a, desc_a, mode_a, b, desc_b, mode_b, 0.0, c, desc_c, mode_c) self._data = c self._indices = res_indices self._rank = len(self._indices) else: msg = f"Unsupported __imul__ with rhs of type {type(rhs)}" logger.error(msg) raise RuntimeError(msg) return self
def forward(self, x_input): """ :param x_input: n,c,w.h """ assert x_input.dtype == 'float16', ValueError( "输入图片请归一化到0-1,并且请astype到float16") assert len(x_input.shape) == 4, ValueError("输入数据必须是4维的,分别是NCWH") assert x_input.shape[2] == x_input.shape[3], ValueError( "输入图片必须是正方形,维度分别是NCWH") # 首先进行padding if self.padding == 'same': padding_size = ( x_input.shape[2] - 1) * self.strides + self.kernel_size[0] - x_input.shape[2] if padding_size % 2 == 0: self.padding_layer = ZeroPadding2d(padding_size // 2, padding_size // 2) else: self.padding_layer = ZeroPadding2d(padding_size // 2, padding_size // 2 + 1) x_input = self.padding_layer.forward(x_input) n, c, w, h = x_input.shape self.shape = x_input.shape self.m = n if self.w is None: self.w = np.random.normal( size=[self.units, c, self.kernel_size[0], self.kernel_size[1] ]) * 0.01 # 然后再按照dot的窗口进行split x_input = self.split_by_strides(x_input, kh=self.kernel_size[0], kw=self.kernel_size[1], s=self.strides) self.input_split = x_input return self.activation.forward( np.tensordot(x_input, self.w, axes=[(1, 4, 5), ( 1, 2, 3)]).transpose([0, 3, 1, 2]) + self.b.reshape(-1, 1, 1)).astype('float16')
def apply_kraus_ops_to_density_matrices_cupy( kraus_ops: cupy.ndarray, density_matrices: cupy.ndarray ): num_kraus_ops, matrix_dim, matrix_dim2 = kraus_ops.shape if matrix_dim != matrix_dim2: raise ValueError(kraus_ops.shape) num_wfs, den_mat_dim, den_mat_dim2 = density_matrices.shape if den_mat_dim != den_mat_dim2: raise ValueError(density_matrices.shape) if matrix_dim != den_mat_dim: raise ValueError("{0:d}, {1:d}".format(int(matrix_dim), int(den_mat_dim))) del matrix_dim2, den_mat_dim2, den_mat_dim mat_b = einsum("aij,bjk->abik", kraus_ops, density_matrices) assert mat_b.shape == (num_kraus_ops, num_wfs, matrix_dim, matrix_dim) adjoint = empty_like(kraus_ops) for idx in range(num_kraus_ops): adjoint[idx, :, :] = transpose(conj(kraus_ops[idx])) mat_c = tensordot(mat_b, adjoint, axes=[[0, 3], [0, 1]]) assert mat_c.shape == (num_wfs, matrix_dim, matrix_dim) return mat_c
def forward(self, X): """Forward propagation Args: x (np.array): array of dimension dim_in (m, n_h_p, n_w_p, n_c_p) Returns: np.array: output. """ n_h = int((X.shape[1] - self.f + 2 * self.pad) / self.stride) + 1 n_w = int((X.shape[2] - self.f + 2 * self.pad) / self.stride) + 1 if X.shape != self.dim_in: self.X_pad = np.pad(np.zeros(X.shape), ((0, 0), (self.pad, self.pad), (self.pad, self.pad), (0, 0)), mode='constant', constant_values=(0, 0)) if self.pad != 0: self.X_pad[:, self.pad:-self.pad, self.pad:-self.pad, :] = X else: self.X_pad = X # compute Z for multiple input images and multiple filters shape = (self.f, self.f, self.dim_in[-1], X.shape[0], n_h, n_w, 1) strides = (self.X_pad.strides * 2)[1:] strides = (*strides[:-3], strides[-3] * self.stride, strides[-2] * self.stride, strides[-1]) M = np.lib.stride_tricks.as_strided( self.X_pad, shape=shape, strides=strides) # , writeable=False) self.Z = np.tensordot(M, self.W, axes=([0, 1, 2], [0, 1, 2]))[:, :, :, 0, :] #self.Z = np.einsum('pqrs,pqrtbmn->tbms', self.W, M, optimize='optimal') self.Z = self.Z + self.b if self.activation == 'relu': return self._relu(self.Z) elif self.activation == 'none': return self.Z
# Note: tensordot is the numpy top-level namespace but not in np.linalg # Note: axes must be a tuple, unlike np.tensordot where it can be an array or array-like. def tensordot( x1: Array, x2: Array, /, *, axes: Union[int, Tuple[Sequence[int], Sequence[int]]] = 2) -> Array: # Note: the restriction to numeric dtypes only is different from # np.tensordot. if x1.dtype not in _numeric_dtypes or x2.dtype not in _numeric_dtypes: raise TypeError('Only numeric dtypes are allowed in tensordot') return Array._new(np.tensordot(x1._array, x2._array, axes=axes)) # Note: trace is the numpy top-level namespace, not np.linalg def trace(x: Array, /, *, offset: int = 0) -> Array: """ Array API compatible wrapper for :py:func:`np.trace <numpy.trace>`. See its docstring for more information. """ if x.dtype not in _numeric_dtypes: raise TypeError('Only numeric dtypes are allowed in trace') # Note: trace always operates on the last two axes, whereas np.trace # operates on the first two axes by default return Array._new( np.asarray(np.trace(x._array, offset=offset, axis1=-2, axis2=-1)))
def forward_conv(A_previous, Filter, Bias, pad, stride, function = 'identity', verbose = False): ''' A forward convolution step. Calcul output shape : ((x-f+2*pad)/stride)+1 Parameters ---------- A_previous : cp.array(examples, height, width, depth) Input images from the previous layer. Filter : cp.array(f, f, depth, number of filter) Filter to convolve with the input image. Bias : cp.array(1, 1, 1, number of filter) Bias for each filter. pad : int Padding edge width. stride : int Stride number. Returns ------- Z : cp.array(examples, ((h-f+2*pad)/stride)+1, ((w-f+2*pad)/stride)+1), number of filter) Output layer image. ''' (m, n_H_prev, n_W_prev, n_C_prev) = A_previous.shape (f, f, n_C_prev, n_C) = Filter.shape mu = cp.mean(Filter) s = cp.std(Filter) Filter = (Filter-mu)/(s+1e-5) n_H = int(((n_H_prev-f+2*pad)/stride)+1) n_W = int(((n_W_prev-f+2*pad)/stride)+1) Z = cp.zeros([m, n_H, n_W, n_C]) A_prev_pad = cp.pad(A_previous, ((0,0), (pad,pad), (pad,pad), (0,0),), mode='constant', constant_values = (0,0)) i0 = cp.repeat(cp.arange(f), f) i1 = stride * cp.repeat(cp.arange(n_W), n_H) j0 = cp.tile(cp.arange(f), f) j1 = stride * cp.tile(cp.arange(n_H), n_W) i = cp.reshape(i0, (-1, 1))+cp.reshape(i1, (1, -1)) j = cp.reshape(j0, (-1, 1))+cp.reshape(j1, (1, -1)) k = cp.reshape(cp.repeat(cp.arange(n_C_prev), f**2), (-1, 1)) Ztest = A_prev_pad[:, i, j, :] weights = cp.reshape(Filter, (f**2, n_C_prev, n_C)) conV = cp.tensordot(weights, Ztest, ((0, 1), (1, 3))) Z = cp.reshape(cp.transpose(conV, (1, 2, 0)), (m, n_H, n_W, n_C)) + Bias Z = activation('forward', function, Z) if(verbose): print("Filter :") print(Filter) print("Weights :") print(weights) print("Z :") print(Ztest) print("Conv :") print(conV) print("Result :") print(Z) ''' for i in range(m): a_prev_pad = A_prev_pad[i, :, :, :] for h in range(n_H): vert_start = h*stride vert_end = h*stride+f for w in range(n_W): horiz_start = w*stride horiz_end = w*stride+f a_slice_prev = a_prev_pad[vert_start:vert_end, horiz_start:horiz_end, :] for c in range(n_C): Z[i, h, w, c] = cp.squeeze(cp.sum(a_slice_prev*Filter[:, :, :, c])+Bias[:, :, :, c]) ''' return Z
def backward_conv(dZ, A_previous, Filter, Bias, pad, stride, function = 'identity'): ''' A backward convolution step Parameters ---------- dZ : cp.array(examples, ((h-f+2*pad)/stride)+1, ((w-f+2*pad)/stride)+1), number of filter) Cost derivative from the l+1 layer. A_previous : cp.array(examples, height, width, depth) Output image from the l-1 layer. Filter : cp.array(f, f, depth, number of filter) Convolutionnal filter. Bias : cp.array(1, 1, 1, number of filter) Bias respective to each filter. pad : int Padding parameter. stride : int Stride parameter. Returns ------- dA : cp.array(examples, height, width, depth) Cost derivative from the current layer. dFilter : cp.array(f, f, depth, number of filter) Cost derivative from filter. dBias : cp.array(1, 1, 1, number of filter) Cost derivative from Bias. ''' dZ = activation('backward', function, 1, dZ) (m, n_H_prev, n_W_prev, n_C_prev) = A_previous.shape (f, f, n_C_prev, n_C) = Filter.shape (m, n_H, n_W, n_C) = dZ.shape dA = cp.zeros((m, n_H_prev, n_W_prev, n_C_prev)) dFilter = cp.zeros((f, f, n_C_prev, n_C)) dBias = cp.zeros((1, 1, 1, n_C)) dBias = cp.sum(dZ, axis=(0, 1, 2)) A_prev_pad = cp.pad(A_previous, ((0,0), (pad,pad), (pad,pad), (0,0),), mode='constant', constant_values = (0,0)) dA_prev_pad = cp.pad(dA, ((0,0), (pad,pad), (pad,pad), (0,0),), mode='constant', constant_values = (0,0)) i0 = cp.repeat(cp.arange(f), f) i1 = stride * cp.repeat(cp.arange(n_W), n_H) j0 = cp.tile(cp.arange(f), f) j1 = stride * cp.tile(cp.arange(n_H), n_W) i = cp.reshape(i0, (-1, 1))+cp.reshape(i1, (1, -1)) j = cp.reshape(j0, (-1, 1))+cp.reshape(j1, (1, -1)) Ztest = A_prev_pad[:, i, j, :] dZtest = cp.reshape(dZ, (m, -1, n_C)) dFiltertest = cp.tensordot(dZtest, cp.transpose(Ztest, (1, 0, 2, 3)), ((0, 1), (1, 2))) dFilter = cp.reshape(cp.transpose(dFiltertest, (1, 2, 0)), (f, f, n_C_prev, n_C)) dZ = cp.reshape(cp.transpose(dZ, (3, 1, 2, 0)), (n_C, -1)) weights = cp.reshape(cp.transpose(Filter, (3, 1, 2, 0)), (n_C, -1)) dA_prev_pad = cp.dot(weights.T, dZ) strPad = "same" if(pad==0): strPad = "valid" dA = Utils.column_to_image(dA_prev_pad, (m, n_C_prev, n_H_prev, n_W_prev), (f, f), stride, strPad) ''' Intuitive way (Really not optimized) for i in range(m): a_prev_pad = A_prev_pad[i, :, :, :] da_prev_pad = dA_prev_pad[i, :, :, :] for h in range(n_H): vert_start = h*stride vert_end = h*stride + f for w in range(n_W): horiz_start = w*stride horiz_end = w*stride + f a_slice = a_prev_pad[vert_start:vert_end, horiz_start:horiz_end, :] for c in range(n_C): da_prev_pad[vert_start:vert_end, horiz_start:horiz_end, :] += Filter[:,:,:,c] * dZ[i, h, w, c] #dFilter[:,:,:,c] += a_slice * dZ[i, h, w, c] #dBias[:,:,:,c] += dZ[i, h, w, c] dA[i, :, :, :] = da_prev_pad[pad:da_prev_pad.shape[0]-pad, pad:da_prev_pad.shape[1]-pad, :] ''' return dA, dFilter, dBias