def cpd_graph(dim, size, rank): cg = CharacterGetter() input_tensor = ad.Variable(name='input_tensor', shape=[size for _ in range(dim)]) input_tensor_subs = "".join([cg.getchar() for _ in range(dim)]) rank_char = cg.getchar() A_list = [] A_list_subs = [] for i in range(dim): node = ad.Variable(name=f'A{i}', shape=[size, rank]) A_list.append(node) A_list_subs.append(f"{input_tensor_subs[i]}{rank_char}") input_subs = ','.join(A_list_subs) einsum_subscripts = input_subs + '->' + input_tensor_subs output = ad.einsum(einsum_subscripts, *A_list) residual = output - input_tensor residual_shape = list(range(len(residual.shape))) loss = ad.tensordot(residual, residual, axes=[residual_shape, residual_shape]) return A_list, input_tensor, loss, residual
def test_tensordot(backendopt): for datatype in backendopt: T.set_backend(datatype) a = ad.Variable(name="a", shape=[3, 3, 3, 3]) b = ad.Variable(name="b", shape=[3, 3, 3, 3]) result = ad.tensordot(a, b, axes=[[1, 3], [0, 1]]) result2 = ad.einsum("abcd,bdef->acef", a, b) assert tree_eq(result, result2, [a, b])
def inner_product(vector_list, gradient_list): assert len(vector_list) == len(gradient_list) assert len(vector_list) >= 1 inner_product_nodes = [ ad.tensordot(v, g, [list(range(len(v.shape))), list(range(len(v.shape)))]) for v, g in zip(vector_list, gradient_list) ] sum_node = sum_node_list(inner_product_nodes) return sum_node
def _get_sub_hessian(cls, index, mpo_graph, mps_graph): # rebuild mps graph intermediate_set = { mps_graph.inputs[index], mps_graph.inputs[index + 1] } split_input_nodes = list(set(mps_graph.inputs) - intermediate_set) mps = split_einsum(mps_graph.output, split_input_nodes) # get the intermediate node intermediate, = [ node for node in mps.inputs if isinstance(node, ad.EinsumNode) ] mps_outer_product = ad.tensordot(mps, mps, axes=[[], []]) mpo_axes = list(range(len(mpo_graph.output.shape))) # The 0.5 factor makes sure that the Hessian can be written as an einsum objective = 0.5 * ad.tensordot( mps_outer_product, mpo_graph.output, axes=[mpo_axes, mpo_axes]) hes = ad.hessian(objective, [intermediate]) return intermediate, hes[0][0]
def cpd_als_shared_exec(dim, size, rank, num_iter, input_val=[]): A_list, input_tensor, loss, residual = cpd_graph(dim, size, rank) full_hessian = ad.hessian(loss, A_list) hessians = [full_hessian[i][i] for i in range(len(full_hessian))] grads = ad.gradients(loss, A_list) updates = [ ad.tensordot(ad.tensorinv(hes), grad, [[2, 3], [0, 1]]) for (hes, grad) in zip(hessians, grads) ] new_A_list = [simplify(A - update) for (A, update) in zip(A_list, updates)] new_A_list = generate_sequential_optimal_tree(new_A_list, A_list) executor = ad.Executor(new_A_list) executor_loss = ad.Executor([simplify(loss)]) if input_val == []: A_val_list, input_tensor_val = init_rand_cp(dim, size, rank) else: A_val_list, input_tensor_val = input_val for iter in range(num_iter): t0 = time.time() # als iterations for i in range(len(A_list)): feed_dict = dict(zip(A_list, A_val_list)) feed_dict.update({input_tensor: input_tensor_val}) if i == 0: A_val_list[0], = executor.run(feed_dict=feed_dict, out_nodes=[new_A_list[0]]) else: A_val_list[i], = executor.run(feed_dict=feed_dict, reset_graph=False, evicted_inputs=[A_list[i - 1]], out_nodes=[new_A_list[i]]) feed_dict = dict(zip(A_list, A_val_list)) feed_dict.update({input_tensor: input_tensor_val}) loss_val, = executor_loss.run(feed_dict=feed_dict) print(f'At iteration {iter} the loss is: {loss_val}') t1 = time.time() print(f"[ {iter} ] Sweep took {t1 - t0} seconds") return A_val_list
def tucker_als_graph_shared_exec(dim, size, rank): """ Build the graph used for Tucker ALS with shared execution. Parameters ---------- dim: dimensionality of the input tensor size: the size of input tensor's each dim rank: the rank of the decomposition Returns ------- tg: an TuckerGraph object executor: An shared executor loss: the optimized graph for tucker loss updates: an list containing updates graphs for each dimension intermediates: list of einsum nodes. Each node is the objective each Tucker ALS step optimized for """ tg = TuckerGraph(dim, size, rank) updates = [] for i in range(dim): core_A = tg.intermediates[i] hes = ad.hessian(tg.losses[i], [core_A]) hes = hes[0][0] grad, = ad.gradients(tg.losses[i], [core_A]) new_core_A = core_A - ad.tensordot( ad.tensorinv(hes), grad, [[i + dim for i in range(dim)], [i for i in range(dim)]]) updates.append(simplify(new_core_A)) loss = simplify(tg.losses[0]) for i in range(1, len(tg.losses)): assert loss.name == simplify(tg.losses[i]).name updates = generate_sequential_optimal_tree(updates, tg.A_list) executor_updates = ad.Executor(updates) executor_loss = ad.Executor([loss]) return tg, executor_updates, executor_loss, loss, updates, tg.intermediates
def _build_graph_w_intermediate(self, index): """ rebuild the graph so that intermediate will be an input of output. """ intermediate_set = {self.core, self.A_list[index]} split_input_nodes = list(set(self.output.inputs) - intermediate_set) output = split_einsum(self.output, split_input_nodes) # get the intermediate node intermediate, = [ node for node in output.inputs if isinstance(node, ad.EinsumNode) ] residual = output - self.X residual_shape = list(range(len(residual.shape))) loss = ad.tensordot(residual, residual, axes=[residual_shape, residual_shape]) return intermediate, loss
def tucker_als_graph(dim, size, rank): """ Build the graph used for Tucker ALS. Parameters ---------- dim: dimensionality of the input tensor size: the size of input tensor's each dim rank: the rank of the decomposition Returns ------- tg: an TuckerGraph object executors: list of executors. Each executor is used for one step of Tucker ALS intermediates: list of einsum nodes. Each node is the objective each Tucker ALS step optimized for """ tg = TuckerGraph(dim, size, rank) executors_update = [] for i in range(dim): core_A = tg.intermediates[i] hes = ad.hessian(tg.losses[i], [core_A]) hes = hes[0][0] grad, = ad.gradients(tg.losses[i], [core_A]) new_core_A = core_A - ad.tensordot( ad.tensorinv(hes), grad, [[i + dim for i in range(dim)], [i for i in range(dim)]]) executor = ad.Executor([simplify(new_core_A)]) executors_update.append(executor) executor_loss = ad.Executor([simplify(tg.losses[0])]) return tg, executors_update, executor_loss, tg.intermediates
def cpd_als(dim, size, rank, num_iter, input_val=[]): A_list, input_tensor, loss, residual = cpd_graph(dim, size, rank) full_hessian = ad.hessian(loss, A_list) hessians = [full_hessian[i][i] for i in range(len(full_hessian))] grads = ad.gradients(loss, A_list) updates = [ ad.tensordot(ad.tensorinv(hes), grad, [[2, 3], [0, 1]]) for (hes, grad) in zip(hessians, grads) ] new_A_list = [simplify(A - update) for (A, update) in zip(A_list, updates)] executor = ad.Executor(new_A_list) executor_loss = ad.Executor([simplify(loss)]) if input_val == []: A_val_list, input_tensor_val = init_rand_cp(dim, size, rank) else: A_val_list, input_tensor_val = input_val for iter in range(num_iter): # als iterations for i in range(len(A_list)): feed_dict = dict(zip(A_list, A_val_list)) feed_dict.update({input_tensor: input_tensor_val}) A_val_list[i], = executor.run(feed_dict=feed_dict, out_nodes=[new_A_list[i]]) feed_dict = dict(zip(A_list, A_val_list)) feed_dict.update({input_tensor: input_tensor_val}) loss_val, = executor_loss.run(feed_dict=feed_dict) print(f'At iteration {iter} the loss is: {loss_val}') return A_val_list