Ejemplo n.º 1
0
def cpd_graph(dim, size, rank):
    cg = CharacterGetter()

    input_tensor = ad.Variable(name='input_tensor',
                               shape=[size for _ in range(dim)])
    input_tensor_subs = "".join([cg.getchar() for _ in range(dim)])

    rank_char = cg.getchar()

    A_list = []
    A_list_subs = []
    for i in range(dim):
        node = ad.Variable(name=f'A{i}', shape=[size, rank])
        A_list.append(node)
        A_list_subs.append(f"{input_tensor_subs[i]}{rank_char}")

    input_subs = ','.join(A_list_subs)
    einsum_subscripts = input_subs + '->' + input_tensor_subs
    output = ad.einsum(einsum_subscripts, *A_list)

    residual = output - input_tensor
    residual_shape = list(range(len(residual.shape)))

    loss = ad.tensordot(residual,
                        residual,
                        axes=[residual_shape, residual_shape])

    return A_list, input_tensor, loss, residual
Ejemplo n.º 2
0
def test_tensordot(backendopt):
    for datatype in backendopt:
        T.set_backend(datatype)

        a = ad.Variable(name="a", shape=[3, 3, 3, 3])
        b = ad.Variable(name="b", shape=[3, 3, 3, 3])
        result = ad.tensordot(a, b, axes=[[1, 3], [0, 1]])
        result2 = ad.einsum("abcd,bdef->acef", a, b)

        assert tree_eq(result, result2, [a, b])
Ejemplo n.º 3
0
def inner_product(vector_list, gradient_list):
    assert len(vector_list) == len(gradient_list)
    assert len(vector_list) >= 1
    inner_product_nodes = [
        ad.tensordot(v, g,
                     [list(range(len(v.shape))),
                      list(range(len(v.shape)))])
        for v, g in zip(vector_list, gradient_list)
    ]
    sum_node = sum_node_list(inner_product_nodes)
    return sum_node
Ejemplo n.º 4
0
    def _get_sub_hessian(cls, index, mpo_graph, mps_graph):

        # rebuild mps graph
        intermediate_set = {
            mps_graph.inputs[index], mps_graph.inputs[index + 1]
        }
        split_input_nodes = list(set(mps_graph.inputs) - intermediate_set)
        mps = split_einsum(mps_graph.output, split_input_nodes)

        # get the intermediate node
        intermediate, = [
            node for node in mps.inputs if isinstance(node, ad.EinsumNode)
        ]
        mps_outer_product = ad.tensordot(mps, mps, axes=[[], []])
        mpo_axes = list(range(len(mpo_graph.output.shape)))

        # The 0.5 factor makes sure that the Hessian can be written as an einsum
        objective = 0.5 * ad.tensordot(
            mps_outer_product, mpo_graph.output, axes=[mpo_axes, mpo_axes])
        hes = ad.hessian(objective, [intermediate])

        return intermediate, hes[0][0]
Ejemplo n.º 5
0
def cpd_als_shared_exec(dim, size, rank, num_iter, input_val=[]):

    A_list, input_tensor, loss, residual = cpd_graph(dim, size, rank)

    full_hessian = ad.hessian(loss, A_list)
    hessians = [full_hessian[i][i] for i in range(len(full_hessian))]
    grads = ad.gradients(loss, A_list)

    updates = [
        ad.tensordot(ad.tensorinv(hes), grad, [[2, 3], [0, 1]])
        for (hes, grad) in zip(hessians, grads)
    ]

    new_A_list = [simplify(A - update) for (A, update) in zip(A_list, updates)]
    new_A_list = generate_sequential_optimal_tree(new_A_list, A_list)

    executor = ad.Executor(new_A_list)
    executor_loss = ad.Executor([simplify(loss)])

    if input_val == []:
        A_val_list, input_tensor_val = init_rand_cp(dim, size, rank)
    else:
        A_val_list, input_tensor_val = input_val

    for iter in range(num_iter):
        t0 = time.time()
        # als iterations
        for i in range(len(A_list)):

            feed_dict = dict(zip(A_list, A_val_list))
            feed_dict.update({input_tensor: input_tensor_val})

            if i == 0:
                A_val_list[0], = executor.run(feed_dict=feed_dict,
                                              out_nodes=[new_A_list[0]])
            else:
                A_val_list[i], = executor.run(feed_dict=feed_dict,
                                              reset_graph=False,
                                              evicted_inputs=[A_list[i - 1]],
                                              out_nodes=[new_A_list[i]])

        feed_dict = dict(zip(A_list, A_val_list))
        feed_dict.update({input_tensor: input_tensor_val})
        loss_val, = executor_loss.run(feed_dict=feed_dict)

        print(f'At iteration {iter} the loss is: {loss_val}')
        t1 = time.time()
        print(f"[ {iter} ] Sweep took {t1 - t0} seconds")

    return A_val_list
Ejemplo n.º 6
0
def tucker_als_graph_shared_exec(dim, size, rank):
    """
    Build the graph used for Tucker ALS with shared execution.

    Parameters
    ----------
    dim: dimensionality of the input tensor
    size: the size of input tensor's each dim
    rank: the rank of the decomposition

    Returns
    -------
    tg: an TuckerGraph object
    executor: An shared executor
    loss: the optimized graph for tucker loss
    updates: an list containing updates graphs for each dimension
    intermediates: list of einsum nodes. Each node is the objective
        each Tucker ALS step optimized for
    """
    tg = TuckerGraph(dim, size, rank)

    updates = []
    for i in range(dim):

        core_A = tg.intermediates[i]
        hes = ad.hessian(tg.losses[i], [core_A])
        hes = hes[0][0]
        grad, = ad.gradients(tg.losses[i], [core_A])

        new_core_A = core_A - ad.tensordot(
            ad.tensorinv(hes), grad,
            [[i + dim for i in range(dim)], [i for i in range(dim)]])

        updates.append(simplify(new_core_A))

    loss = simplify(tg.losses[0])
    for i in range(1, len(tg.losses)):
        assert loss.name == simplify(tg.losses[i]).name

    updates = generate_sequential_optimal_tree(updates, tg.A_list)
    executor_updates = ad.Executor(updates)
    executor_loss = ad.Executor([loss])

    return tg, executor_updates, executor_loss, loss, updates, tg.intermediates
Ejemplo n.º 7
0
    def _build_graph_w_intermediate(self, index):
        """
        rebuild the graph so that intermediate will be an input of output.
        """
        intermediate_set = {self.core, self.A_list[index]}
        split_input_nodes = list(set(self.output.inputs) - intermediate_set)
        output = split_einsum(self.output, split_input_nodes)

        # get the intermediate node
        intermediate, = [
            node for node in output.inputs if isinstance(node, ad.EinsumNode)
        ]

        residual = output - self.X

        residual_shape = list(range(len(residual.shape)))
        loss = ad.tensordot(residual,
                            residual,
                            axes=[residual_shape, residual_shape])

        return intermediate, loss
Ejemplo n.º 8
0
def tucker_als_graph(dim, size, rank):
    """
    Build the graph used for Tucker ALS.

    Parameters
    ----------
    dim: dimensionality of the input tensor
    size: the size of input tensor's each dim
    rank: the rank of the decomposition

    Returns
    -------
    tg: an TuckerGraph object
    executors: list of executors. Each executor is used for
        one step of Tucker ALS
    intermediates: list of einsum nodes. Each node is the objective
        each Tucker ALS step optimized for
    """
    tg = TuckerGraph(dim, size, rank)

    executors_update = []

    for i in range(dim):

        core_A = tg.intermediates[i]
        hes = ad.hessian(tg.losses[i], [core_A])
        hes = hes[0][0]
        grad, = ad.gradients(tg.losses[i], [core_A])

        new_core_A = core_A - ad.tensordot(
            ad.tensorinv(hes), grad,
            [[i + dim for i in range(dim)], [i for i in range(dim)]])

        executor = ad.Executor([simplify(new_core_A)])
        executors_update.append(executor)

    executor_loss = ad.Executor([simplify(tg.losses[0])])

    return tg, executors_update, executor_loss, tg.intermediates
Ejemplo n.º 9
0
def cpd_als(dim, size, rank, num_iter, input_val=[]):

    A_list, input_tensor, loss, residual = cpd_graph(dim, size, rank)

    full_hessian = ad.hessian(loss, A_list)
    hessians = [full_hessian[i][i] for i in range(len(full_hessian))]
    grads = ad.gradients(loss, A_list)

    updates = [
        ad.tensordot(ad.tensorinv(hes), grad, [[2, 3], [0, 1]])
        for (hes, grad) in zip(hessians, grads)
    ]

    new_A_list = [simplify(A - update) for (A, update) in zip(A_list, updates)]

    executor = ad.Executor(new_A_list)
    executor_loss = ad.Executor([simplify(loss)])

    if input_val == []:
        A_val_list, input_tensor_val = init_rand_cp(dim, size, rank)
    else:
        A_val_list, input_tensor_val = input_val

    for iter in range(num_iter):
        # als iterations
        for i in range(len(A_list)):

            feed_dict = dict(zip(A_list, A_val_list))
            feed_dict.update({input_tensor: input_tensor_val})
            A_val_list[i], = executor.run(feed_dict=feed_dict,
                                          out_nodes=[new_A_list[i]])

        feed_dict = dict(zip(A_list, A_val_list))
        feed_dict.update({input_tensor: input_tensor_val})
        loss_val, = executor_loss.run(feed_dict=feed_dict)
        print(f'At iteration {iter} the loss is: {loss_val}')

    return A_val_list