def test_einsum(backendopt): for datatype in backendopt: T.set_backend(datatype) x2 = ad.Variable(name="x2", shape=[3, 2]) x3 = ad.Variable(name="x3", shape=[2, 3]) matmul = ad.einsum('ik,kj->ij', x2, x3) y = ad.sum(matmul) grad_x2, grad_x3 = ad.gradients(y, [x2, x3]) executor = ad.Executor([y, grad_x2, grad_x3]) x2_val = T.tensor([[1, 2], [3, 4], [5, 6]]) # 3x2 x3_val = T.tensor([[7, 8, 9], [10, 11, 12]]) # 2x3 y_val, grad_x2_val, grad_x3_val = executor.run(feed_dict={ x2: x2_val, x3: x3_val }) expected_grad_sum = T.ones_like(T.dot(x2_val, x3_val)) expected_yval = T.sum(T.dot(x2_val, x3_val)) expected_grad_x2_val = T.dot(expected_grad_sum, T.transpose(x3_val)) expected_grad_x3_val = T.dot(T.transpose(x2_val), expected_grad_sum) assert isinstance(y, ad.Node) assert T.array_equal(y_val, expected_yval) assert T.array_equal(grad_x2_val, expected_grad_x2_val) assert T.array_equal(grad_x3_val, expected_grad_x3_val)
def call(self, inputs, **kwargs): source, target = inputs target_shape = keras.backend.shape(target) if keras.backend.image_data_format() == 'channels_first': source = backend.transpose(source, (0, 2, 3, 1)) output = backend.resize_images(source, (target_shape[2], target_shape[3]), method='nearest') output = backend.transpose(output, (0, 3, 1, 2)) return output else: return backend.resize_images(source, (target_shape[1], target_shape[2]), method='nearest')
def test_hvp2(backendopt): for datatype in backendopt: T.set_backend(datatype) x = ad.Variable(name="x", shape=[3, 1]) H = ad.Variable(name="H", shape=[3, 3]) v = ad.Variable(name="v", shape=[3, 1]) y = ad.sum( ad.einsum("ab,bc->ac", ad.einsum("ab,bc->ac", ad.transpose(x), H), x)) grad_x, = ad.gradients(y, [x]) Hv, = ad.hvp(output_node=y, node_list=[x], vector_list=[v]) executor = ad.Executor([y, grad_x, Hv]) x_val = T.tensor([[1.], [2.], [3]]) # 3x1 v_val = T.tensor([[1.], [2.], [3]]) # 3x1 H_val = T.tensor([[2., 0., 0.], [0., 2., 0.], [0., 0., 2.]]) # 3x3 y_val, grad_x_val, Hv_val = executor.run(feed_dict={ x: x_val, H: H_val, v: v_val }) expected_yval = T.sum(T.transpose(x_val) @ H_val @ x_val) expected_grad_x_val = 2 * H_val @ x_val expected_hv_val = T.tensor([[4.], [8.], [12.]]) assert isinstance(y, ad.Node) assert T.array_equal(y_val, expected_yval) assert T.array_equal(grad_x_val, expected_grad_x_val) assert T.array_equal(Hv_val, expected_hv_val)
def test_jtjvps(backendopt): for datatype in backendopt: T.set_backend(datatype) x = ad.Variable(name="x", shape=[2]) A = ad.Variable(name="A", shape=[3, 2]) v = ad.Variable(name="v", shape=[2]) y = ad.einsum('ab, b->a', A, x) jtjvp_x, = ad.jtjvps(y, [x], [v]) executor = ad.Executor([y, jtjvp_x]) x_val = T.tensor([1., 2.]) A_val = T.tensor([[1., 2.], [3., 4.], [5, 6]]) v_val = T.tensor([3., 4.]) y_val, jtjvp_x_val = executor.run(feed_dict={ x: x_val, A: A_val, v: v_val }) expected_yval = T.einsum('ab, b->a', A_val, x_val) expected_jtjvp_x_val = T.einsum('ba, ac->bc', T.transpose(A_val), A_val) expected_jtjvp_x_val = T.einsum('ab, b->a', expected_jtjvp_x_val, v_val) assert isinstance(jtjvp_x, ad.Node) assert T.array_equal(y_val, expected_yval) assert T.array_equal(jtjvp_x_val, expected_jtjvp_x_val)
def test_executor_debug_symmetry(backendopt): for datatype in backendopt: T.set_backend(datatype) A = ad.Variable(name="A", shape=[3, 3], symmetry=[[0, 1]]) out = ad.einsum("ab,bc->ac", A, A) A_val = T.random((3, 3)) A_val += T.transpose(A_val) executor = ad.Executor([out]) executor.run(feed_dict={A: A_val}, debug=True)
def call(self, x, **kwargs): debug_print("call") # filters = K.zeros(shape=(N_filt, Filt_dim)) min_freq = 50.0 min_band = 50.0 filt_beg_freq = K.abs(self.filt_b1) + min_freq / self.freq_scale filt_end_freq = filt_beg_freq + (K.abs(self.filt_band) + min_band / self.freq_scale) n = np.linspace(0, self.Filt_dim, self.Filt_dim) window = 0.54 - 0.46 * K.cos(2 * math.pi * n / self.Filt_dim) window = K.cast(window, "float32") window = K.variable(window) t_right_linspace = np.linspace(1, (self.Filt_dim - 1) / 2, int((self.Filt_dim - 1) / 2)) t_right = K.variable(t_right_linspace / self.fs) # Compute the filters. output_list = [] for i in range(self.N_filt): low_pass1 = ( 2 * self.filt_beg_freq[i] * sinc(self.filt_beg_freq[i] * self.freq_scale, self.t_right)) low_pass2 = ( 2 * self.filt_end_freq[i] * sinc(self.filt_end_freq[i] * self.freq_scale, self.t_right)) band_pass = low_pass2 - low_pass1 band_pass = band_pass / K.max(band_pass) output_list.append(band_pass * self.window) filters = K.stack(output_list) # (80, 251) filters = K.transpose(filters) # (251, 80) filters = K.reshape( filters, (self.Filt_dim, 1, self.N_filt) ) # (251,1,80) in TF: (filter_width, in_channels, out_channels) in # PyTorch (out_channels, in_channels, filter_width) """Given an input tensor of shape [batch, in_width, in_channels] if data_format is "NWC", or [batch, in_channels, in_width] if data_format is "NCW", and a filter / kernel tensor of shape [filter_width, in_channels, out_channels], this op reshapes the arguments to pass them to conv2d to perform the equivalent convolution operation. Internally, this op reshapes the input tensors and invokes tf.nn.conv2d. For example, if data_format does not start with "NC", a tensor of shape [batch, in_width, in_channels] is reshaped to [ batch, 1, in_width, in_channels], and the filter is reshaped to [1, filter_width, in_channels, out_channels]. The result is then reshaped back to [batch, out_width, out_channels] (where out_width is a function of the stride and padding as in conv2d) and returned to the caller. """ # Do the convolution. debug_print("call") debug_print(" x", x) debug_print(" filters", filters) out = K.conv1d(x, kernel=filters) debug_print(" out", out) return out
def step(self, hess_fn, grads, regularization): A = self.A self.gamma = [] self.gamma.append( (T.transpose(A[1]) @ A[1]) * (T.transpose(A[2]) @ A[2])) self.gamma.append( (T.transpose(A[0]) @ A[0]) * (T.transpose(A[2]) @ A[2])) self.gamma.append( (T.transpose(A[0]) @ A[0]) * (T.transpose(A[1]) @ A[1])) P = self.compute_block_diag_preconditioner(regularization) delta, counter = self.fast_precond_conjugate_gradient( hess_fn, grads, P, regularization) self.total_iters += counter self.atol = self.num * group_vecnorm(delta) print(f"cg iterations: {counter}") print(f"total cg iterations: {self.total_iters}") print(f"total cg time: {self.total_cg_time}") self.A[0] += delta[0] self.A[1] += delta[1] self.A[2] += delta[2] return self.A, self.total_cg_time
def test_cpd_shared_exec(backendopt): dim = 3 for datatype in backendopt: T.set_backend(datatype) input_val = init_rand_cp(dim, size, rank) A_list, input_tensor_val = input_val A_val, B_val, C_val = A_list outputs = cpd_als_shared_exec(dim, size, rank, 1, input_val) # expected values A_val = T.einsum( "abc,bk,ck->ak", input_tensor_val, B_val, C_val) @ T.inv( (T.transpose(B_val) @ B_val) * (T.transpose(C_val) @ C_val)) B_val = T.einsum( "abc,ak,ck->bk", input_tensor_val, A_val, C_val) @ T.inv( (T.transpose(A_val) @ A_val) * (T.transpose(C_val) @ C_val)) C_val = T.einsum( "abc,ak,bk->ck", input_tensor_val, A_val, B_val) @ T.inv( (T.transpose(A_val) @ A_val) * (T.transpose(B_val) @ B_val)) assert T.norm(outputs[0] - A_val) < 1e-8 assert T.norm(outputs[1] - B_val) < 1e-8 assert T.norm(outputs[2] - C_val) < 1e-8
def test_hessian_quadratic(backendopt): for datatype in backendopt: T.set_backend(datatype) x = ad.Variable(name="x", shape=[3]) H = ad.Variable(name="H", shape=[3, 3]) y = ad.einsum("i,ij,j->", x, H, x) hessian = ad.hessian(y, [x]) executor = ad.Executor([hessian[0][0]]) x_val = T.random([3]) H_val = T.random((3, 3)) hessian_val, = executor.run(feed_dict={x: x_val, H: H_val}) assert T.array_equal(hessian_val, H_val + T.transpose(H_val))
def test_HinverseG(backendopt): for datatype in backendopt: T.set_backend(datatype) N = 10 T.seed(1224) A = T.random([N, N]) A = T.transpose(A) @ A A = A + T.identity(N) b = T.random([N]) def hess_fn(x): return [T.einsum("ab,b->a", A, x[0])] error_tol = 1e-9 x, = conjugate_gradient(hess_fn, [b], error_tol) assert (T.norm(T.abs(T.einsum("ab,b->a", A, x) - b)) <= 1e-4)
def test_transpose_einsum(backendopt): for datatype in backendopt: T.set_backend(datatype) x = ad.Variable(name="x", shape=[3, 2]) y = ad.sum(ad.einsum("ij->ji", x)) grad_x, = ad.gradients(y, [x]) executor = ad.Executor([y, grad_x]) x_val = T.tensor([[1, 2], [3, 4], [5, 6]]) # 3x2 y_val, grad_x_val = executor.run(feed_dict={x: x_val}) expected_yval = T.sum(T.transpose(x_val)) expected_grad_x_val = T.ones_like(x_val) assert isinstance(y, ad.Node) assert T.array_equal(y_val, expected_yval) assert T.array_equal(grad_x_val, expected_grad_x_val)
def dmrg_local_update(intermediate, eigvec, max_mps_rank): """ Perform local update for DMRG. Parameters ---------- intermediate: the input einsum node. Its inputs are two mps sites. eigvec: the eigenvector to get the low rank decomposition. max_mps_rank: maximum mps tensor rank. """ # parse intermediate strings inputs = intermediate.inputs assert len(inputs) == 2 # Here input names are formatted as A{i}. index_input_0 = int(inputs[0].name[1:]) index_input_1 = int(inputs[1].name[1:]) in_subs, out_subs, _ = _parse_einsum_input( (intermediate.einsum_subscripts, *intermediate.inputs)) if index_input_0 > index_input_1: # right site appers first right_subs, left_subs = in_subs.split(',') else: left_subs, right_subs = in_subs.split(',') map_subs_indices = dict(zip(out_subs, list(range(len(intermediate.shape))))) contract_char, = list(set(left_subs) - set(out_subs)) left_uncontract_chars = list(set(left_subs) - set(contract_char)) right_uncontract_chars = list(set(right_subs) - set(contract_char)) left_indices = [map_subs_indices[char] for char in left_uncontract_chars] right_indices = [map_subs_indices[char] for char in right_uncontract_chars] left_uncontract_str = "".join(left_uncontract_chars) right_uncontract_str = "".join(right_uncontract_chars) ############################################################# # svd decomposition to get updated sites eigvec_shape = intermediate.shape eigvec_mat = T.transpose(eigvec, left_indices + right_indices) eigvec_mat = T.reshape(eigvec_mat, (np.prod([eigvec_shape[i] for i in left_indices]), -1)) U, s, VT = T.svd(eigvec_mat) rank = min([max_mps_rank, eigvec_mat.shape[0], eigvec_mat.shape[1]]) U, s, VT = U[:, :rank], s[:rank], VT[:rank, :] VT = T.diag(s) @ VT U = T.reshape(U, [eigvec_shape[i] for i in left_indices] + [rank]) VT = T.reshape(VT, ([rank] + [eigvec_shape[i] for i in right_indices])) left = T.einsum(f"{left_uncontract_str}{contract_char}->{left_subs}", U) right = T.einsum(f"{contract_char}{right_uncontract_str}->{right_subs}", VT) return left, right