Esempio n. 1
0
def tensordot(x, y, axes=2):
    """Simple translation of tensordot syntax to einsum.
    """
    torch, _ = _get_torch_and_device()

    if _TORCH_HAS_TENSORDOT:
        return torch.tensordot(x, y, dims=axes)

    xnd = x.ndimension()
    ynd = y.ndimension()

    # convert int argument to (list[int], list[int])
    if isinstance(axes, int):
        axes = range(xnd - axes, xnd), range(axes)

    # convert (int, int) to (list[int], list[int])
    if isinstance(axes[0], int):
        axes = (axes[0],), axes[1]
    if isinstance(axes[1], int):
        axes = axes[0], (axes[1],)

    # initialize empty indices
    x_ix = [None] * xnd
    y_ix = [None] * ynd
    out_ix = []

    # fill in repeated indices
    available_ix = iter(_torch_symbols_base)
    for ax1, ax2 in zip(*axes):
        repeat = next(available_ix)
        x_ix[ax1] = repeat
        y_ix[ax2] = repeat

    # fill in the rest, and maintain output order
    for i in range(xnd):
        if x_ix[i] is None:
            leave = next(available_ix)
            x_ix[i] = leave
            out_ix.append(leave)
    for i in range(ynd):
        if y_ix[i] is None:
            leave = next(available_ix)
            y_ix[i] = leave
            out_ix.append(leave)

    # form full string and contract!
    einsum_str = "{},{}->{}".format(*map("".join, (x_ix, y_ix, out_ix)))
    return einsum(einsum_str, x, y)
Esempio n. 2
0
def apply_TM_1sO(state, env, edge, op=None, verbosity=0):
    r"""
    :param state: underlying 1-site C4v symmetric wavefunction
    :param env: C4v symmetric environment corresponding to ``state``
    :param edge: tensor of dimensions :math:`\chi \times D^2 \times \chi`
    :param op: operator to be inserted into transfer matrix
    :param verbosity: logging verbosity
    :type state: IPEPS_C4V
    :type env: ENV_C4V
    :type edge: torch.tensor
    :type op: torch.tensor
    :type verbosity: int
    :return: ``edge`` with a single instance of the transfer matrix applied.
             The resulting tensor has an identical index structure as the 
             original ``edge`` 
    :rtype: torch.tensor
    
    Applies a single instance of the "transfer matrix" to the ``edge`` tensor  
    by contracting the following network::

         -----T----------
        |     |     
       edge--(a^+ op a)--
        |     |     
         -----T----------

    where the physical indices `s` and `s'` of the on-site tensor :math:`a` 
    and it's hermitian conjugate :math:`a^\dagger` are contracted with 
    identity :math:`\delta_{s,s'}` or ``op`` (if supplied).
    """
    # TODO stronger verification
    if op is not None:
        assert (len(op.size()) == 2)

    T = env.T[env.keyT]
    # Assume index structure of ``edge`` tensor to be as follows
    #
    #       -- 0
    # edge |-- 1
    #       -- 2
    #
    #   --0 0--T--1->2
    #  |       2->3
    # edge--1->0
    #  |
    #   --2->1
    E = torch.tensordot(edge, T, ([0], [0]))
    if verbosity > 0: print("E=edgeT " + str(E.size()))

    # TODO - more efficent contraction with uncontracted-double-layer on-site tensor
    #        Possibly reshape indices 1,2 of E, which are to be contracted with
    #        on-site tensor and contract bra,ket in two steps instead of creating
    #        double layer tensor
    #    /
    # --A--
    #  /|s
    #   X
    # s'|/
    # --A--
    #  /
    #
    # where X is Id or op
    a = next(iter(state.sites.values()))
    dims_a = a.size()
    X = torch.eye(dims_a[0], dtype=a.dtype,
                  device=a.device) if op is None else op
    A= torch.einsum('mefgh,mn,nabcd->eafbgchd',a,X,a).contiguous()\
        .view(dims_a[1]**2, dims_a[2]**2, dims_a[3]**2, dims_a[4]**2)

    #   ---------T--2->1
    #  |         3
    #  |         0
    # edge--0 1--A--3
    #  |         2
    #   ----1->0
    E = torch.tensordot(E, A, ([0, 3], [1, 0]))
    if verbosity > 0: print("E=EA " + str(E.size()))

    #   -------T--1->0
    #  |       |
    #  |       |
    # edge-----A--3->1
    #  |       2
    #  |       2
    #   --0 0--T--1->2
    E = torch.tensordot(E, T, ([0, 2], [0, 2]))
    if verbosity > 0: print("E=ET " + str(E.size()))

    return E
Esempio n. 3
0
 def _dot_simililarity(x, y):
     v = torch.tensordot(x.unsqueeze(1), y.T.unsqueeze(0), dims=2)
     # x shape: (N, 1, C)
     # y shape: (1, C, 2N)
     # v shape: (N, 2N)
     return v
Esempio n. 4
0
def apply_TM_1sO_2(state, env, edge, op=None, verbosity=0):
    r"""
    :param state: underlying 1-site C4v symmetric wavefunction
    :param env: C4v symmetric environment corresponding to ``state``
    :param edge: tensor of dimensions :math:`\chi \times (D^2)^2 \times \chi`
    :param op: two-site operator to be inserted within the two-site transfer matrix
    :param verbosity: logging verbosity
    :type state: IPEPS_C4V
    :type env: ENV_C4V
    :type edge: torch.tensor
    :type op: torch.tensor
    :type verbosity: int
    :return: ``edge`` with a single instance of the transfer matrix applied 
             The resulting tensor has an identical index structure as the 
             original ``edge``
    :rtype: torch.tensor
    
    Applies a single instance of the two-site "transfer matrix" to 
    the ``edge`` tensor by contracting the following network, or its corresponding 
    rotation depending on the ``direction``::

                 -----T----------
                |     |          
               edge--(a^+ o1 a)--
                |     |   |      
                |----(a^+ o2 a)--
                |     |          
                 -----T----------

    The two-site operator is first decomposed into a simple MPO o1--o2
    (TODO case where op comes with an extra MPO index)::
        
         s1'  s2'    s1'      s2'
        |  op   | = |o1|-----|o2|
         s1   s2     s1       s2  

    where the physical indices `s` and `s'` of the on-site tensor :math:`a` 
    and it's hermitian conjugate :math:`a^\dagger` are contracted with 
    identity :math:`\delta_{s,s'}` or ``o1``, ``o2``.
    """

    # TODO stronger verification
    op_1, op_2 = None, None
    if op is not None:
        if len(op.size()) == 4:
            # pre-process ``op``
            # TODO possibly truncate/compress according to the vanishingly small singular values
            dims_op = op.size()
            op_mat = op.permute(0, 2, 1, 3).contiguous().reshape(
                dims_op[0]**2, dims_op[0]**2)
            op_1, s, op_2 = torch.svd(op_mat)
            op_1 = op_1.reshape(dims_op[0], dims_op[0], s.size()[0])
            op_2 = torch.einsum('i,ij->ij', s,
                                op_2.t()).reshape(s.size()[0], dims_op[0],
                                                  dims_op[0])
            op_2 = op_2.permute(1, 2, 0).contiguous()
        else:
            raise ValueError(f"Invalid op: rank {op.size()}")

    # Four basic cases of passed op
    def get_aXa(a, op):
        # a - on-site tensor
        # op - operator
        dims_a = a.size()
        dims_op = None if op is None else op.size()
        if op is None:
            # identity
            A= torch.einsum('nefgh,nabcd->eafbgchd',a,a).contiguous()\
                .view(dims_a[1]**2, dims_a[2]**2, dims_a[3]**2, dims_a[4]**2)
        elif len(dims_op) == 2:
            # one-site operator
            A= torch.einsum('mefgh,mn,nabcd->eafbgchd',a,op,a).contiguous()\
                .view(dims_a[1]**2, dims_a[2]**2, dims_a[3]**2, dims_a[4]**2)
        elif len(dims_op) == 3:
            # edge operators of some MPO within the transfer matrix
            #
            # 0                   0
            # |                   |
            # op--2 ... or ... 2--op
            # |                   |
            # 1                   1
            #
            # assume the last index of the op is the MPO dimension.
            # It will become the last index of the resulting edge
            A= torch.einsum('mefgh,mnl,nabcd->eafbgchdl',a,op,a).contiguous()\
                .view(dims_a[1]**2, dims_a[2]**2, dims_a[3]**2, dims_a[4]**2, -1)
        if verbosity > 0: print(f"aXa {A.size()}")
        return A

    a = next(iter(state.sites.values()))
    T = env.T[env.keyT]
    # Assume index structure of ``edge`` tensor to be as follows
    #
    #       -- 0
    # edge |-- 1
    #      |---2
    #       -- 3
    #
    #   ----0 0--T--1->0
    #  |         2->1
    # edge--1->2
    #  |
    #   ----2->3
    #  |
    #   ----3->4
    E = torch.tensordot(T, edge, ([0], [0]))
    if verbosity > 0: print("E=edgeT " + str(E.size()))

    # TODO - more efficent contraction with uncontracted-double-layer on-site tensor
    #        Possibly reshape indices 1,2 of E, which are to be contracted with
    #        on-site tensor and contract bra,ket in two steps instead of creating
    #        double layer tensor
    #    /
    # --A--
    #  /|s
    #   X
    # s'|/
    # --A--
    #  /
    #
    # where X is Id or op
    A = get_aXa(a, op_1)

    #   ---------T--0
    #  |         1
    #  |         0
    # edge--2 1--A--3->4
    #  |      3<-2 \
    #   ----3->1   (4->5)
    #  |
    #   ----4->2
    E = torch.tensordot(E, A, ([1, 2], [0, 1]))
    if verbosity > 0: print("E=edgeTA " + str(E.size()))

    A = get_aXa(a, op_2)
    #   ---------T--0
    #  |         |
    # edge-------A--4->2
    #  |         | \
    #  |         3 (5)
    #  |         0 (4)
    #  |         | /
    #   ----1 1--A--2->3
    #  |         3->4
    #   ----2->1
    E = torch.tensordot(E,A,([1,3],[1,0])) if op is None else \
        torch.tensordot(E,A,([1,3,5],[1,0,4]))
    if verbosity > 0: print("E=edgeTAA " + str(E.size()))

    #   ---------T--0
    #  |         |
    # edge-------A--2->1
    #  |         |
    #   ---------A--3->2
    #  |         3
    #  |         2
    #   ----1 0--T2--1->3
    E = torch.tensordot(E, T, ([1, 3], [0, 2]))
    if verbosity > 0: print("E=edgeTAAT " + str(E.size()))

    return E
Esempio n. 5
0
 def vr(self):
     return torch.tensordot(self.positions, self.velocities) / self.r
Esempio n. 6
0
    def loss(self, samples):
        """
        Computes the Distributional Q-learning loss, based on projecting the
        discounted rewards + target Q-distribution into the current Q-domain,
        with cross-entropy loss.  

        Returns loss and KL-divergence-errors for use in prioritization.
        """

        delta_z = (self.V_max - self.V_min) / (self.agent.n_atoms - 1)
        z = torch.linspace(self.V_min, self.V_max, self.agent.n_atoms)
        # Makde 2-D tensor of contracted z_domain for each data point,
        # with zeros where next value should not be added.
        next_z = z * (self.discount**self.n_step_return)  # [P']
        next_z = torch.ger(1 - samples.done_n.float(), next_z)  # [B,P']
        ret = samples.return_.unsqueeze(1)  # [B,1]
        next_z = torch.clamp(ret + next_z, self.V_min, self.V_max)  # [B,P']

        z_bc = z.view(1, -1, 1)  # [1,P,1]
        next_z_bc = next_z.unsqueeze(1)  # [B,1,P']
        abs_diff_on_delta = abs(next_z_bc - z_bc) / delta_z
        projection_coeffs = torch.clamp(1 - abs_diff_on_delta, 0, 1)  # Most 0.
        # projection_coeffs is a 3-D tensor: [B,P,P']
        # dim-0: independent data entries
        # dim-1: base_z atoms (remains after projection)
        # dim-2: next_z atoms (summed in projection)

        with torch.no_grad():
            target_ps = self.agent.target(*samples.target_inputs)  # [B,A,P']
            if self.double_dqn:
                next_ps = self.agent(*samples.target_inputs)  # [B,A,P']
                next_qs = torch.tensordot(next_ps, z, dims=1)  # [B,A]
                next_a = torch.argmax(next_qs, dim=-1)  # [B]
            else:
                target_qs = torch.tensordot(target_ps, z, dims=1)  # [B,A]
                next_a = torch.argmax(target_qs, dim=-1)  # [B]
            target_p_unproj = select_at_indexes(next_a, target_ps)  # [B,P']
            target_p_unproj = target_p_unproj.unsqueeze(1)  # [B,1,P']
            target_p = (target_p_unproj * projection_coeffs).sum(-1)  # [B,P]
        ps = self.agent(*samples.agent_inputs)  # [B,A,P]
        p = select_at_indexes(samples.action, ps)  # [B,P]
        p = torch.clamp(p, EPS, 1)  # NaN-guard.
        losses = -torch.sum(target_p * torch.log(p), dim=1)  # Cross-entropy.

        if self.prioritized_replay:
            losses *= samples.is_weights

        target_p = torch.clamp(target_p, EPS, 1)
        KL_div = torch.sum(target_p *
                           (torch.log(target_p) - torch.log(p.detach())),
                           dim=1)
        KL_div = torch.clamp(KL_div, EPS, 1 / EPS)  # Avoid <0 from NaN-guard.

        if not self.mid_batch_reset:
            valid = valid_from_done(samples.done)
            loss = valid_mean(losses, valid)
            KL_div *= valid
        else:
            loss = torch.mean(losses)

        return loss, KL_div
Esempio n. 7
0
    y_torch = torch.from_numpy(y)

    C_matrix = torch.from_numpy(
        np.array([[1, 0, 0, 0], [0, 0, 1, 0]], dtype=np.float32))

    optimizer = optim.RMSprop(func.parameters(), lr=1e-3)
    end = time.time()

    time_meter = RunningAverageMeter(0.97)
    loss_meter = RunningAverageMeter(0.97)

    ii = 0
    for itr in range(1, args.niters + 1):
        optimizer.zero_grad()
        pred_x = odeint(func, x0_torch, time_torch)
        pred_y = torch.tensordot(pred_x, C_matrix, ((-1, ), (1, )))
        loss = torch.mean(torch.abs(pred_y - y_torch))
        loss.backward()
        optimizer.step()

        time_meter.update(time.time() - end)
        loss_meter.update(loss.item())

        if itr % args.test_freq == 0:
            with torch.no_grad():
                pred_x = odeint(func, x0_torch, time_torch)
                pred_y = torch.tensordot(pred_x, C_matrix, ((-1, ), (1, )))
                loss = torch.mean(torch.abs(pred_y - y_torch))
                print('Iter {:04d} | Total Loss {:.6f}'.format(
                    itr, loss.item()))
                ii += 1
Esempio n. 8
0
 def sample(self, p, z=None):
     q = torch.tensordot(p, z or self.z, dims=1)
     return super().sample(q)
Esempio n. 9
0
    def forward(self, x, offset):
        if x.numel() == 0:
            # When input is empty, we want to return a empty tensor with "correct" shape,
            # So that the following operations will not panic
            # if they check for the shape of the tensor.
            # This computes the height and width of the output tensor
            output_shape = [
                (i + 2 * p - (di * (k - 1) + 1)) // s + 1
                for i, p, di, k, s in zip(
                    x.shape[-2:], self.padding, self.dilation, self.kernel_size, self.stride
                )
            ]
            output_shape = [x.shape[0], self.weight.shape[0]] + output_shape
            return _NewEmptyTensorOp.apply(x, output_shape)

        out = x.new_tensor(size=self._output_size(x))

        # x.shape == [batch, in_channels, height, width]
        # offset.shape == [batch, 2 * in_channels * kernel_height * kernel_width, height, width]

        batch_size = x.shape[0]

        # pad input, left, right, top, bottom
        x = F.pad(x, pad=[self.padding, self.padding, self.padding, self.padding], value=0)

        xh = x.shape[2]
        xw = x.shape[3]

        h_start = 0 + (self.kernel_size // 2) + self.dilation - 1
        w_start = 0 + (self.kernel_size // 2) + self.dilation - 1
        h_end = xh - (self.kernel_size // 2) - self.dilation + 1
        w_end = xw - (self.kernel_size // 2) - self.dilation + 1

        # Every step a centered on the kernel center, for even sized kernels it is at the "bottom right" pixel of the
        # most centered 4 pixels. Strides which do not line up with the input maps will cut out pixel columns.

        base_kernel_offsets = x.new_zeros(size=(self.kernel_size, self.kernel_size, 2)).long()
        for h in range(self.kernel_size):
            base_kernel_offsets[h, :, 0] = (h - (self.kernel_size // 2)) * self.dilation
        for w in range(self.kernel_size):
            base_kernel_offsets[:, w, 1] = (w - (self.kernel_size // 2)) * self.dilation

        deform_offsets = offset.new_zeros(size=(batch_size, self.kernel_size, self.kernel_size, 2))

        for h in range(h_start, h_end, self.stride):
            for w in range(w_start, w_end, self.stride):
                # Construct input feature map pixel column
                # Pixel column shape = [batch, in_channels, kernel_height, kernel_width]
                out_h = (h - h_start) // self.stride
                out_w = (w - w_start) // self.stride
                for i in range(self.kernel_size):
                    for j in range(self.kernel_size):
                        deform_offsets[:, i, j, 0] = offset[:, 2*(i*self.kernel_size+j), out_h, out_w]
                        deform_offsets[:, i, j, 1] = offset[:, 2*(i*self.kernel_size+j)+1, out_h, out_w]

                sample_idx = torch.Tensor([[[[h, w]]]]) \
                    + base_kernel_offsets.unsqueeze(dim=0) \
                    + deform_offsets \
                    + 0.5

                sampled_points = batch_kernel_interpolate(x, sample_idx)
                # sampled_points shape = (batch, in_channels, kernel_size, kernel_size)

                # weight = (out_channels, in_channels, kernel_size, kernel_size)
                out[:, :, out_h, out_w] = torch.tensordot(sampled_points, self.weight.permute(1, 2, 3, 0), dims=3)

        if self.norm is not None:
            out = self.norm(out)
        if self.activation is not None:
            out = self.activation(out)
        return out
Esempio n. 10
0
    def diffusion_dynamics(self,
                           beliefs: torch.Tensor,
                           actions=None,
                           compute_drift=True,
                           compute_dispersion=True,
                           approx_state_exp_sampling=False):
        """
        Computes the Wonham diffusion dynamics
        :param beliefs: Tensor of beliefs. Must be of shape [Batch x States]
        :param actions: List of actions to consider of shape [Batch x Actions]. If None, all actions will be considered.
        :param compute_drift: Bool whether the drift terms should be computed
        :param compute_dispersion: Bool whether the drift terms should be computed
        :param approx_state_exp_sampling: If True, expectation over next state is approximated by sampling one state
        :return: Drift vector of shape [Batch x Actions x States] if compute_drift is True,
                 Dispersion matrix of shape [Batch, Actions, States, Noise] if compute_dispersion is True
        """
        if actions is None:
            actions = torch.arange(self.drift_tensor.shape[0])[None]
        elif actions.ndim == 1:
            actions = actions.reshape(-1, 1)

        # g_bar is of shape [N=NumSamples, ADim, ODim]
        da = self.drift_tensor[
            actions]  # drift tensor of selected actions: B A S O
        # noinspection PyArgumentList
        g_bar = torch.sum(beliefs[:, None, :, None] * da, axis=2)

        # g minus g_bar is of shape [N, A, S, O]
        gmgb = da - g_bar[:, :, None, :]

        # inv(hh) * (g-g_bar) is of shape [NumSamples, A, S, O]
        hhg = gmgb @ self.outer_dispersion_inv

        result = []

        if compute_drift:
            # GHG is of shape B, A, S
            # noinspection PyArgumentList
            ghg = torch.sum(gmgb * hhg, axis=-1)

            # BGHG is of shape B, A, S
            bghg = beliefs[:, None, :] * ghg

            # sumtb is of shape B, A, S
            if not approx_state_exp_sampling:
                t = self.transition_matrix[...,
                                           actions].permute(2, 3, 0,
                                                            1)  # B A S' S'
                # noinspection PyArgumentList
                tb = torch.sum(beliefs[:, None, :, None] * t, axis=2)
            else:
                ss = Categorical(probs=beliefs).sample()[:, None]  # B x 1
                tb = self.transition_matrix[ss, :, actions]

            # mu is of shape N, A, S
            mu_vec = bghg + tb

            result += [mu_vec]

        if compute_dispersion:
            # of shape N, A, S, NoiseDim
            sigma = beliefs[:, None, :, None] * torch.tensordot(
                self.dispersion_matrix, hhg, dims=([0], [-1])).permute(
                    1, 2, 3, 0)

            result += [sigma]

        if len(result) == 1:
            return result[0]
        else:
            return result
Esempio n. 11
0
def computeSurprisals(linearized):
    assert len(linearized) == MAX_BOUNDARY

    chart = [[
        torch.cuda.FloatTensor([[float("-Inf") for _ in range(BATCHSIZE)]
                                for _ in itos_setOfNonterminals])
        for _ in linearized
    ] for _ in linearized]

    for length in range(
            1,
            len(linearized) + 1
    ):  # the NUMBER of words spanned. start+length is the first word OUTSIDE the constituent
        for start in range(
                len(linearized
                    )):  # the index of the first word taking part in the thing
            if start + length - 1 >= len(linearized):
                continue
            if length == 1:  # TODO for words at the boundary, immediately add prefix and suffix counts
                if start < LEFT_CONTEXT:
                    for preterminal in terminals:
                        chart[start][start][
                            stoi_setOfNonterminals[preterminal]].fill_(0)
                else:
                    if wordCounts.get(linearized[start],
                                      0) < OOV_THRESHOLD:  # OOV
                        for preterminal in terminals:
                            chart[start][start][
                                stoi_setOfNonterminals[preterminal]].fill_(
                                    log(OOV_COUNT) -
                                    log(nonAndPreterminals[preterminal] +
                                        OOV_COUNT + OTHER_WORDS_SMOOTHING *
                                        len(wordCounts)))
                    else:
                        for preterminal in terminals:
                            count = terminals[preterminal].get(
                                linearized[start], 0) + OTHER_WORDS_SMOOTHING
                            chart[start][start][
                                stoi_setOfNonterminals[preterminal]].fill_(
                                    log(count) -
                                    log(nonAndPreterminals[preterminal] +
                                        OOV_COUNT + OTHER_WORDS_SMOOTHING *
                                        len(wordCounts)))
                    assert start == start + length - 1
            else:
                for start2 in range(start + 1, len(linearized)):
                    left = chart[start][start2 - 1].view(-1)
                    right = chart[start2][start + length - 1].view(-1)
                    maxLeft = torch.max(left)
                    maxRight = torch.max(right)
                    if float(maxLeft) == float("-inf") or float(
                            maxRight) == float("-inf"):  # everything will be 0
                        continue

                    # VERSION WITH TENSORDOT
#                  resultLeft = torch.tensordot(torch.exp(left-maxLeft), binary_rules_matrix, dims=([0], [1]))
#                 resultTotal = torch.tensordot(resultLeft, torch.exp(right-maxRight), dims=([1], [0]))

# VERSION WITH BILINEAR
#               print(left.size(), right.size())
                    resultTotal = torch.nn.functional.bilinear(
                        torch.exp(left - maxLeft), torch.exp(right - maxRight),
                        binary_rules_matrix)
                    #print(resultTotal)
                    #print(resultTotal2)
                    #assert (resultTotal-resultTotal2).abs().max() < 1e-5, (resultTotal-resultTotal2).abs().max()
                    #                  quit()

                    resultTotalLog = torch.log(resultTotal) + (maxLeft +
                                                               maxRight)
                    resultTotalLog[resultTotal <= 0].fill_(float("-inf"))
                    entry = chart[start][start + length - 1]
                    #assert "nan" not in str(entry.max())
                    #assert "nan" not in str(resultTotalLog.max())
                    chart[start][start + length - 1] = logSumExp(
                        resultTotalLog.view(-1, BATCHSIZE), entry)
                    #assert "nan" not in str(chart[start][start+length-1].max())
    #############################
    # Now consider different endpoints
    valuesPerBoundary = [0]
    for BOUNDARY in range(LEFT_CONTEXT + 1, len(linearized) + 1):
        chartFromStart = [
            torch.cuda.FloatTensor([[float("-Inf") for _ in range(BATCHSIZE)]
                                    for _ in itos_setOfNonterminals])
            for _ in range(BOUNDARY)
        ]

        if True:
            right = chart[BOUNDARY - 1][BOUNDARY - 1].view(-1)
            right_max = torch.max(right)

            result = torch.tensordot(invertedLeft,
                                     torch.exp(right - right_max),
                                     dims=([1], [0]))
            resultLog = (torch.log(result) + right_max).view(-1, BATCHSIZE)
            chartFromStart[BOUNDARY - 1] = resultLog

        for start in range(
                BOUNDARY
        )[::
          -1]:  # now construct potential constituents that start at `start', but end outside of the portion
            for start2 in range(start + 1, BOUNDARY):

                left = chart[start][start2 - 1].view(-1)
                right = chartFromStart[start2].view(-1)
                maxLeft = torch.max(left)
                maxRight = torch.max(right)
                if float(maxLeft) == float("-inf") or float(maxRight) == float(
                        "-inf"):  # everything will be 0
                    continue
                resultLeft = torch.tensordot(torch.exp(left - maxLeft),
                                             binary_rules_matrix,
                                             dims=([0], [1]))
                resultTotal = torch.tensordot(resultLeft,
                                              torch.exp(right - maxRight),
                                              dims=([1], [0]))
                #                  resultTotalLog = torch.log(resultTotal)+maxLeft+maxRight
                #                 resultTotalLog[resultTotal <= 0].fill_(float("-inf"))

                #                 resultTotalLog_max = torch.max(resultTotalLog)

                result = torch.tensordot(invertedLeft,
                                         resultTotal,
                                         dims=([1], [0]))
                resultLog = (torch.log(result) + (maxLeft + maxRight)).view(
                    -1, BATCHSIZE)
                resultLog[result <= 0].fill_(float("-inf"))
                chartFromStart[start] = logSumExp(chartFromStart[start],
                                                  resultLog)

#         for root in itos_setOfNonterminals:
#             count = roots.get(root, 0)
#             iroot = stoi_setOfNonterminals[root]
#             if chartFromStart[0][iroot] is not None:
#                if count == 0:
#                   chartFromStart[0][iroot] = torch.cuda.FloatTensor([float("-Inf") for _ in range(BATCHSIZE)])
#                else:
#                  chartFromStart[0][iroot] += log(count) - log(roots["__TOTAL__"])
#

        prefixProb = float(
            chartFromStart[0][stoi_setOfNonterminals["_SENTENCES_"]]
        )  #log(sum([exp(float(x[0])) if x[0] is not None else 0 for x in chartFromStart[0]])) # log P(S|root) -- the full mass comprising all possible trees (including spurious ambiguities arising from the PCFG conversion)

        surprisalTableSums[BOUNDARY - 1] += prefixProb
        surprisalTableCounts[BOUNDARY - 1] += 1
        valuesPerBoundary.append(prefixProb)
        print(BOUNDARY, prefixProb, linearized)
        assert prefixProb < valuesPerBoundary[-2], "bug or numerical problem?"
Esempio n. 12
0
 def forward(self, x):
   assert(x.shape[-1] == self.input_dims)
   raw_freqs = torch.tensordot(x, self.bands, dims=0)
   raw_freqs = raw_freqs.reshape(x.shape[:-1] + (-1,))
   return torch.cat([ raw_freqs.sin(), raw_freqs.cos() ], dim=-1)
Esempio n. 13
0
File: util.py Progetto: ucals/pyro
    def compute_expectation(self, costs):
        """
        Returns a differentiable expected cost, summing over costs at given ordinals.

        :param dict costs: A dict mapping ordinals to lists of cost tensors
        :returns: a scalar expected cost
        :rtype: torch.Tensor or float
        """
        # Share computation across all cost terms.
        with shared_intermediates() as cache:
            ring = MarginalRing(cache=cache)
            expected_cost = 0.
            for ordinal, cost_terms in costs.items():
                log_factors = self._get_log_factors(ordinal)
                scale = math.exp(
                    sum(x for x in log_factors
                        if not isinstance(x, torch.Tensor)))
                log_factors = [
                    x for x in log_factors if isinstance(x, torch.Tensor)
                ]

                # Collect log_prob terms to query for marginal probability.
                queries = {
                    frozenset(cost._pyro_dims): None
                    for cost in cost_terms
                }
                for log_factor in log_factors:
                    key = frozenset(log_factor._pyro_dims)
                    if queries.get(key, False) is None:
                        queries[key] = log_factor
                # Ensure a query exists for each cost term.
                for cost in cost_terms:
                    key = frozenset(cost._pyro_dims)
                    if queries[key] is None:
                        query = torch.zeros_like(cost)
                        query._pyro_dims = cost._pyro_dims
                        log_factors.append(query)
                        queries[key] = query

                # Perform sum-product contraction. Note that plates never need to be
                # product-contracted due to our plate-based dependency ordering.
                sum_dims = set().union(*(x._pyro_dims
                                         for x in log_factors)) - ordinal
                for query in queries.values():
                    require_backward(query)
                root = ring.sumproduct(log_factors, sum_dims)
                root._pyro_backward()
                probs = {
                    key: query._pyro_backward_result.exp()
                    for key, query in queries.items()
                }

                # Aggregate prob * cost terms.
                for cost in cost_terms:
                    key = frozenset(cost._pyro_dims)
                    prob = probs[key]
                    prob._pyro_dims = queries[key]._pyro_dims
                    mask = prob > 0
                    if torch._C._get_tracing_state() or not mask.all():
                        mask._pyro_dims = prob._pyro_dims
                        cost, prob, mask = packed.broadcast_all(
                            cost, prob, mask)
                        prob = prob.masked_select(mask)
                        cost = cost.masked_select(mask)
                    else:
                        cost, prob = packed.broadcast_all(cost, prob)
                    expected_cost = expected_cost + scale * torch.tensordot(
                        prob, cost, prob.dim())

        LAST_CACHE_SIZE[0] = count_cached_ops(cache)
        return expected_cost
Esempio n. 14
0
  def forward(self, betas, pose, trans, simplify=False):
    
    """
          Construct a compute graph that takes in parameters and outputs a tensor as
          model vertices. Face indices are also returned as a numpy ndarray.
          
          20190128: Add batch support.

          Parameters:
          ---------
          pose: Also known as 'theta', an [N, 24, 3] tensor indicating child joint rotation
          relative to parent joint. For root joint it's global orientation.
          Represented in a axis-angle format.

          betas: Parameter for model shape. A tensor of shape [N, 10] as coefficients of
          PCA components. Only 10 components were released by SMPL author.

          trans: Global translation tensor of shape [N, 3].

          Return:
          ------
          A 3-D tensor of [N * 6890 * 3] for vertices,
          and the corresponding [N * 19 * 3] joint positions.

    """
    batch_num = betas.shape[0]
    id_to_col = {self.kintree_table[1, i]: i
                 for i in range(self.kintree_table.shape[1])}
    parent = {
      i: id_to_col[self.kintree_table[0, i]]
      for i in range(1, self.kintree_table.shape[1])
    }
    print("v_t",self.v_template.shape)
    temp=torch.tensordot(betas, self.shapedirs, dims=([1], [2]))
    v_shaped =self.v_template.unsqueeze(0).expand(betas.size(0), 6890, 3)#self.v_template #torch.tensordot(betas, self.shapedirs, dims=([1], [2])) + self.v_template
    print("temp",temp.shape)
    print("v_s",v_shaped.shape)
    J = torch.matmul(self.J_regressor, v_shaped)
    R_cube_big = self.rodrigues(pose.contiguous().view(-1, 1, 3)).reshape(batch_num, -1, 3, 3)

    if simplify:
      v_posed = v_shaped
    else:
      R_cube = R_cube_big[:, 1:, :, :]
      I_cube = (torch.eye(3, dtype=torch.float32).unsqueeze(dim=0) + \
        torch.zeros((batch_num, R_cube.shape[1], 3, 3), dtype=torch.float32)).to(self.device)
      lrotmin = (R_cube - I_cube).reshape(batch_num, -1, 1).squeeze(dim=2)
      v_posed = v_shaped + torch.tensordot(lrotmin, self.posedirs, dims=([1], [2]))

    results = []
    results.append(
      self.with_zeros(torch.cat((R_cube_big[:, 0], torch.reshape(J[:, 0, :], (-1, 3, 1))), dim=2))
    )
    for i in range(1, self.kintree_table.shape[1]):
      results.append(
        torch.matmul(
          results[parent[i]],
          self.with_zeros(
            torch.cat(
              (R_cube_big[:, i], torch.reshape(J[:, i, :] - J[:, parent[i], :], (-1, 3, 1))),
              dim=2
            )
          )
        )
      )
    
    stacked = torch.stack(results, dim=1)
    results = stacked - \
      self.pack(
        torch.matmul(
          stacked,
          torch.reshape(
            torch.cat((J, torch.zeros((batch_num, 24, 1), dtype=torch.float32).to(self.device)), dim=2),
            (batch_num, 24, 4, 1)
          )
        )
      )
    # Restart from here
    T = torch.tensordot(results, self.weights, dims=([1], [1])).permute(0, 3, 1, 2)
    rest_shape_h = torch.cat(
      (v_posed, torch.ones((batch_num, v_posed.shape[1], 1), dtype=torch.float32).to(self.device)), dim=2
    )
    v = torch.matmul(T, torch.reshape(rest_shape_h, (batch_num, -1, 4, 1)))
    v = torch.reshape(v, (batch_num, -1, 4))[:, :, :3]
    result = v + torch.reshape(trans, (batch_num, 1, 3))
    # estimate 3D joint locations
    # print(result.shape)
    # print(self.joint_regressor.shape)
    joints = torch.tensordot(result, self.joint_regressor.transpose(1, 0), dims=([1], [0])).transpose(1, 2)
    return result, joints
Esempio n. 15
0
    def forward(self, features, paths_indices, other_info):
        """
        features: n_nodes x (input_path_size) x input_size
        paths_indices: n_paths x path_size (values < n_nodes)
        output: n_nodes x ((input_path_size) x path_size) x input_size
        """
        # convolution
        self.normalize_()
        norms = features.norm(dim=-1, keepdim=True)
        # norms: n_nodes x (input_path_size) x 1
        #output = features / norms.clamp(min=EPS)
        output = torch.tensordot(features, self.weight, dims=[[-1], [-1]])
        output = output / norms.clamp(min=EPS).unsqueeze(2)
        n_nodes = output.shape[0]
        if output.ndim == 4:
            output = output.permute(0, 2, 1, 3).contiguous()
        # output: n_nodes x path_size x (input_path_size) x hidden_size

        ## prepare masks
        mask = None
        if self.aggregation:
            mask = [None for _ in range(self.path_size)]
        if 'mask' in other_info and self.path_size > 1:
            mask = other_info['mask']

        output = output.view(n_nodes, self.path_size, -1)
        # output: n_nodes x path_size x (input_path_size x hidden_size)
        if self.aggregation:
            outputs = []
            for i in range(self.path_size):
                embeded = path_conv_agg(output, paths_indices[i],
                                        other_info['n_paths'][i], self.pooling,
                                        self.kappa, self.d_kappa, mask[i])
                outputs.append(embeded)
            output = torch.stack(outputs, dim=0)
            output = output.view(self.path_size, -1, self.hidden_size)
            # output: path_size x (n_nodes x (input_path_size)) x hidden_size
            output = norms.view(1, -1, 1) * output
        else:
            output = path_conv_agg(output, paths_indices[self.path_size - 1],
                                   other_info['n_paths'][self.path_size - 1],
                                   self.pooling, self.kappa, self.d_kappa,
                                   mask)
            # output: n_nodes x ((input_path_size) x hidden_size)
            output = output.view(n_nodes, -1, self.hidden_size)
            output = norms.view(n_nodes, -1, 1) * output
            # output: n_nodes x (input_path_size) x hidden_size

        lintrans = self._compute_lintrans()
        # linear transformation
        if self.aggregation:
            output = output.bmm(lintrans)
            # output = output.view(self.path_size, n_nodes, -1, self.hidden_size)
            output = output.permute(1, 0, 2)
            output = output.reshape(n_nodes, -1, self.hidden_size)
            output = output.contiguous()
        else:
            output = torch.tensordot(output, lintrans, dims=[[-1], [-1]])
        # output: n_nodes x ((input_path_size) x path_size) x hidden_size

        return output
Esempio n. 16
0
def rdm1x1(coord, state, env, verbosity=0):
    r"""
    :param coord: vertex (x,y) for which reduced density matrix is constructed
    :param state: underlying wavefunction
    :param env: environment corresponding to ``state``
    :param verbosity: logging verbosity
    :type coord: tuple(int,int) 
    :type state: IPEPS
    :type env: ENV
    :type verbosity: int
    :return: 1-site reduced density matrix with indices :math:`s;s'`
    :rtype: torch.tensor

    Computes 1-site reduced density matrix :math:`\rho_{1x1}` centered on vertex ``coord`` by 
    contracting the following tensor network::

        C--T-----C
        |  |     |
        T--A^+A--T
        |  |     |
        C--T-----C

    where the physical indices `s` and `s'` of on-site tensor :math:`A` at vertex ``coord`` 
    and it's hermitian conjugate :math:`A^\dagger` are left uncontracted
    """
    # C(-1,-1)--1->0
    # 0
    # 0
    # T(-1,0)--2
    # 1
    rdm = torch.tensordot(env.C[(coord, (-1, -1))], env.T[(coord, (-1, 0))],
                          ([0], [0]))
    if verbosity > 0:
        print("rdm=CT " + str(rdm.size()))
    # C(-1,-1)--0
    # |
    # T(-1,0)--2->1
    # 1
    # 0
    # C(-1,1)--1->2
    rdm = torch.tensordot(rdm, env.C[(coord, (-1, 1))], ([1], [0]))
    if verbosity > 0:
        print("rdm=CTC " + str(rdm.size()))
    # C(-1,-1)--0
    # |
    # T(-1,0)--1
    # |             0->2
    # C(-1,1)--2 1--T(0,1)--2->3
    rdm = torch.tensordot(rdm, env.T[(coord, (0, 1))], ([2], [1]))
    if verbosity > 0:
        print("rdm=CTCT " + str(rdm.size()))
    # TODO - more efficent contraction with uncontracted-double-layer on-site tensor
    #        Possibly reshape indices 1,2 of rdm, which are to be contracted with
    #        on-site tensor and contract bra,ket in two steps instead of creating
    #        double layer tensor
    #    /
    # --A--
    #  /|s
    #
    # s'|/
    # --A--
    #  /
    #
    dimsA = state.site(coord).size()
    a = torch.einsum('mefgh,nabcd->eafbgchdmn',state.site(coord),state.site(coord)).contiguous()\
        .view(dimsA[1]**2, dimsA[2]**2, dimsA[3]**2, dimsA[4]**2, dimsA[0], dimsA[0])
    # C(-1,-1)--0
    # |
    # |             0->2
    # T(-1,0)--1 1--a--3
    # |             2\45(s,s')
    # |             2
    # C(-1,1)-------T(0,1)--3->1
    rdm = torch.tensordot(rdm, a, ([1, 2], [1, 2]))
    if verbosity > 0:
        print("rdm=CTCTa " + str(rdm.size()))
    # C(-1,-1)--0 0--T(0,-1)--2->0
    # |              1
    # |              2
    # T(-1,0)--------a--3->2
    # |              |\45->34(s,s')
    # |              |
    # C(-1,1)--------T(0,1)--1
    rdm = torch.tensordot(env.T[(coord, (0, -1))], rdm, ([0, 1], [0, 2]))
    if verbosity > 0:
        print("rdm=CTCTaT " + str(rdm.size()))
    # C(-1,-1)--T(0,-1)--0 0--C(1,-1)
    # |         |             1->0
    # |         |
    # T(-1,0)---a--2
    # |         |\34(s,s')
    # |         |
    # C(-1,1)---T(0,1)--0->1
    rdm = torch.tensordot(env.C[(coord, (1, -1))], rdm, ([0], [0]))
    if verbosity > 0:
        print("rdm=CTCTaTC " + str(rdm.size()))
    # C(-1,-1)--T(0,-1)-----C(1,-1)
    # |         |           0
    # |         |           0
    # T(-1,0)---a--2 1------T(1,0)
    # |         |\34->23(s,s')  2->0
    # |         |
    # C(-1,1)---T(0,1)--1
    rdm = torch.tensordot(env.T[(coord, (1, 0))], rdm, ([0, 1], [0, 2]))
    if verbosity > 0:
        print("rdm=CTCTaTCT " + str(rdm.size()))
    # C(-1,-1)--T(0,-1)--------C(1,-1)
    # |         |              |
    # |         |              |
    # T(-1,0)---a--------------T(1,0)
    # |         |\23->12(s,s') 0
    # |         |              0
    # C(-1,1)---T(0,1)--1 1----C(1,1)
    rdm = torch.tensordot(rdm, env.C[(coord, (1, 1))], ([0, 1], [0, 1]))
    if verbosity > 0:
        print("rdm=CTCTaTCTC " + str(rdm.size()))

    # normalize
    rdm = rdm / torch.trace(rdm)

    return rdm
Esempio n. 17
0
def rgbtogray(image):
    # image1 = torch.rand(1,3,2,2)
    rgb_weights = torch.Tensor([0.2989, 0.5870, 0.1140]).to(image.get_device())
    input_r = torch.tensordot(image, rgb_weights, dims=([-3], [-1]))
    input_r = input_r.unsqueeze(-3)
    return input_r
Esempio n. 18
0
def objective_with_mat(D, F, P):
    A = P @ D @ P.T
    if has_torch and isinstance(D, torch.Tensor):
        return torch.tensordot(F, A, dims=2).cpu()
    else:
        return np.tensordot(F, A, axes=2)
Esempio n. 19
0
 def forward(self, inputs, embed=True):
     if embed:
         return torch.nn.functional.embedding(inputs, self.w)
     else:
         return torch.tensordot(inputs, self.w.t(), 1) + self.b
 def tensordot():
     a = torch.arange(60.).reshape(3, 4, 5)
     b = torch.arange(24.).reshape(4, 3, 2)
     torch.tensordot(a, b, dims=([1, 0], [0, 1]))
Esempio n. 21
0
def one_eucl_rnn_transform(W, h, U, x, b):
    W_otimes_h = torch.tensordot(h, W, dims=([-1], [1]))
    U_otimes_x = torch.tensordot(x, U, dims=([-1], [1]))
    return W_otimes_h + U_otimes_x + b
 def forward(self, input):
     x = input
     y = torch.tensordot(x.permute(0, 2, 3, 1), self.weight,
                         dims=1) + self.bias
     return y.permute(0, 3, 1, 2)
Esempio n. 23
0
class DefaultQubitTorch(DefaultQubit):
    """Simulator plugin based on ``"default.qubit"``, written using PyTorch.

    **Short name:** ``default.qubit.torch``

    This device provides a pure-state qubit simulator written using PyTorch.
    As a result, it supports classical backpropagation as a means to compute the Jacobian. This can
    be faster than the parameter-shift rule for analytic quantum gradients
    when the number of parameters to be optimized is large.

    To use this device, you will need to install PyTorch:

    .. code-block:: console

        pip install torch>=1.8.0

    **Example**

    The ``default.qubit.torch`` is designed to be used with end-to-end classical backpropagation
    (``diff_method="backprop"``) and the PyTorch interface. This is the default method
    of differentiation when creating a QNode with this device.

    Using this method, the created QNode is a 'white-box', and is
    tightly integrated with your PyTorch computation:

    .. code-block:: python

        dev = qml.device("default.qubit.torch", wires=1)

        @qml.qnode(dev, interface="torch", diff_method="backprop")
        def circuit(x):
            qml.RX(x[1], wires=0)
            qml.Rot(x[0], x[1], x[2], wires=0)
            return qml.expval(qml.PauliZ(0))

    >>> weights = torch.tensor([0.2, 0.5, 0.1], requires_grad=True)
    >>> res = circuit(weights)
    >>> res.backward()
    >>> print(weights.grad)
    tensor([-2.2527e-01, -1.0086e+00,  1.3878e-17])

    Autograd mode will also work when using classical backpropagation:

    >>> def cost(weights):
    ...    return torch.sum(circuit(weights)**3) - 1
    >>> res = circuit(weights)
    >>> res.backward()
    >>> print(weights.grad)
    tensor([-4.5053e-01, -2.0173e+00,  5.9837e-17])

    Executing the pipeline in PyTorch will allow the whole computation to be run on the GPU,
    and therefore providing an acceleration. Your parameters need to be instantiated on the same
    device as the backend device.

    .. code-block:: python

        dev = qml.device("default.qubit.torch", wires=1, torch_device='cuda')

        @qml.qnode(dev, interface="torch", diff_method="backprop")
        def circuit(x):
            qml.RX(x[1], wires=0)
            qml.Rot(x[0], x[1], x[2], wires=0)
            return qml.expval(qml.PauliZ(0))

    >>> weights = torch.tensor([0.2, 0.5, 0.1], requires_grad=True, device='cuda')
    >>> res = circuit(weights)
    >>> res.backward()
    >>> print(weights.grad)
    tensor([-2.2527e-01, -1.0086e+00,  2.9919e-17], device='cuda:0')


    There are a couple of things to keep in mind when using the ``"backprop"``
    differentiation method for QNodes:

    * You must use the ``"torch"`` interface for classical backpropagation, as PyTorch is
      used as the device backend.

    * Only exact expectation values, variances, and probabilities are differentiable.
      When instantiating the device with ``shots!=None``, differentiating QNode
      outputs will result in an error.

    If you wish to use a different machine-learning interface, or prefer to calculate quantum
    gradients using the ``parameter-shift`` or ``finite-diff`` differentiation methods,
    consider using the ``default.qubit`` device instead.

    Args:
        wires (int, Iterable): Number of subsystems represented by the device,
            or iterable that contains unique labels for the subsystems. Default 1 if not specified.
        shots (None, int): How many times the circuit should be evaluated (or sampled) to estimate
            the expectation values. Defaults to ``None`` if not specified, which means
            that the device returns analytical results.
            If ``shots > 0`` is used, the ``diff_method="backprop"``
            QNode differentiation method is not supported and it is recommended to consider
            switching device to ``default.qubit`` and using ``diff_method="parameter-shift"``.
        torch_device='cpu' (str): the device on which the computation will be
        run, e.g., ``'cpu'`` or ``'cuda'``
    """

    name = "Default qubit (Torch) PennyLane plugin"
    short_name = "default.qubit.torch"

    C_DTYPE = torch.complex128
    R_DTYPE = torch.float64

    _abs = staticmethod(torch.abs)
    _einsum = staticmethod(torch.einsum)
    _flatten = staticmethod(torch.flatten)
    _reshape = staticmethod(torch.reshape)
    _roll = staticmethod(torch.roll)
    _stack = staticmethod(lambda arrs, axis=0, out=None: torch.stack(arrs, axis=axis, out=out))
    _tensordot = staticmethod(
        lambda a, b, axes: torch.tensordot(
            a, b, axes if isinstance(axes, int) else tuple(map(list, axes))
        )
    )
    _transpose = staticmethod(lambda a, axes=None: a.permute(*axes))
    _asnumpy = staticmethod(lambda x: x.cpu().numpy())
    _conj = staticmethod(torch.conj)
    _real = staticmethod(torch.real)
    _imag = staticmethod(torch.imag)
    _norm = staticmethod(torch.norm)
    _flatten = staticmethod(torch.flatten)

    def __init__(self, wires, *, shots=None, analytic=None, torch_device=None):

        # Store if the user specified a Torch device. Otherwise the execute
        # method attempts to infer the Torch device from the gate parameters.
        self._torch_device_specified = torch_device is not None
        self._torch_device = torch_device

        super().__init__(wires, shots=shots, cache=0, analytic=analytic)

        # Move state to torch device (e.g. CPU, GPU, XLA, ...)
        self._state.requires_grad = True
        self._state = self._state.to(self._torch_device)
        self._pre_rotated_state = self._state

    @staticmethod
    def _get_parameter_torch_device(ops):
        """An auxiliary function to determine the Torch device specified for
        the gate parameters of the input operations.

        Returns the first CUDA Torch device found (if any) using a string
        format. Does not handle tensors put on multiple CUDA Torch devices.
        Such a case raises an error with Torch.

        If CUDA is not used with any of the parameters, then specifies the CPU
        if the parameters are on the CPU or None if there were no parametric
        operations.

        Args:
            ops (list[Operator]): list of operations to check

        Returns:
            str or None: The string of the Torch device determined or None if
            there is no data for any operations.
        """
        par_torch_device = None
        for op in ops:
            for data in op.data:

                # Using hasattr in case we don't have a Torch tensor as input
                if hasattr(data, "is_cuda"):
                    if data.is_cuda:  # pragma: no cover
                        return ":".join([data.device.type, str(data.device.index)])

                    par_torch_device = "cpu"

        return par_torch_device

    def execute(self, circuit, **kwargs):
        ops_and_obs = circuit.operations + circuit.observables

        par_torch_device = self._get_parameter_torch_device(ops_and_obs)

        if not self._torch_device_specified:
            self._torch_device = par_torch_device

            # If we've changed the device of the parameters between device
            # executions, need to move the state to the correct Torch device
            if self._state.device != self._torch_device:
                self._state = self._state.to(self._torch_device)
        else:
            if par_torch_device is not None:  # pragma: no cover
                params_cuda_device = "cuda" in par_torch_device
                specified_device_cuda = "cuda" in self._torch_device

                # Raise a warning if there's a mismatch between the specified and
                # used Torch devices
                if params_cuda_device != specified_device_cuda:

                    warnings.warn(
                        f"Torch device {self._torch_device} specified "
                        "upon PennyLane device creation does not match the "
                        "Torch device of the gate parameters; "
                        f"{self._torch_device} will be used."
                    )

        return super().execute(circuit, **kwargs)

    def _asarray(self, a, dtype=None):
        if isinstance(a, list):
            # Handle unexpected cases where we don't have a list of tensors
            if not isinstance(a[0], torch.Tensor):
                res = np.asarray(a)
                res = torch.from_numpy(res)
            else:
                res = torch.cat([torch.reshape(i, (-1,)) for i in a], dim=0)
            res = torch.cat([torch.reshape(i, (-1,)) for i in res], dim=0)
        else:
            res = torch.as_tensor(a, dtype=dtype)

        res = torch.as_tensor(res, device=self._torch_device)
        return res

    _cast = _asarray

    @staticmethod
    def _dot(x, y):
        if x.device != y.device:
            if x.device != "cpu":
                return torch.tensordot(x, y.to(x.device), dims=1)
            if y.device != "cpu":
                return torch.tensordot(x.to(y.device), y, dims=1)

        return torch.tensordot(x, y, dims=1)

    @staticmethod
    def _reduce_sum(array, axes):
        if not axes:
            return array
        return torch.sum(array, dim=axes)

    @staticmethod
    def _conj(array):
        if isinstance(array, torch.Tensor):
            return torch.conj(array)
        return np.conj(array)

    @staticmethod
    def _scatter(indices, array, new_dimensions):

        # `array` is now a torch tensor
        tensor = array
        new_tensor = torch.zeros(new_dimensions, dtype=tensor.dtype, device=tensor.device)
        new_tensor[indices] = tensor
        return new_tensor

    @classmethod
    def capabilities(cls):
        capabilities = super().capabilities().copy()
        capabilities.update(passthru_interface="torch", supports_reversible_diff=False)
        return capabilities

    def _get_unitary_matrix(self, unitary):
        """Return the matrix representing a unitary operation.

        Args:
            unitary (~.Operation): a PennyLane unitary operation

        Returns:
            torch.Tensor[complex]: Returns a 2D matrix representation of
            the unitary in the computational basis, or, in the case of a diagonal unitary,
            a 1D array representing the matrix diagonal.
        """
        if unitary in diagonal_in_z_basis:
            return self._asarray(unitary.eigvals, dtype=self.C_DTYPE)
        return self._asarray(unitary.matrix, dtype=self.C_DTYPE)

    def sample_basis_states(self, number_of_states, state_probability):
        """Sample from the computational basis states based on the state
        probability.

        This is an auxiliary method to the ``generate_samples`` method.

        Args:
            number_of_states (int): the number of basis states to sample from
            state_probability (torch.Tensor[float]): the computational basis probability vector

        Returns:
            List[int]: the sampled basis states
        """
        return super().sample_basis_states(
            number_of_states, state_probability.cpu().detach().numpy()
        )
Esempio n. 24
0
 def my_tensordort_perm(a, b, dims=None, perm=None):
     return torch.tensordot(a, b, dims=dims).sum(3).permute(perm)
def computeSurprisals(linearized):
      assert len(linearized[0]) == args.MAX_BOUNDARY
      assert len(linearized) == args.BATCHSIZE

      # Presumably unnecessary
      for x in chart:     
          for y in x:
               y.fill_(float("-Inf"))

      for length in range(1, args.MAX_BOUNDARY+1): # the NUMBER of words spanned. start+length is the first word OUTSIDE the constituent
         for start in range(args.MAX_BOUNDARY): # the index of the first word taking part in the thing
            if start+length-1 >= args.MAX_BOUNDARY:
               continue
            if length == 1: 
               if start < args.LEFT_CONTEXT:
                 for preterminal in terminals:
                    chart[start][start][:,stoi_setOfNonterminals[preterminal]].fill_(0)
               else:
                 lexical_tensor = torch.LongTensor([0 for _ in range(args.BATCHSIZE)])
             
                 for batch in range(args.BATCHSIZE): 
                    if wordCounts.get(linearized[batch][start],0) < args.OOV_THRESHOLD: # OOV
                       lexical_tensor[batch] = stoi["_OOV_"]
                    else:
                       lexical_tensor[batch] = stoi[linearized[batch][start]]
                 lexical_tensor = lexical_tensor.cuda()
                 chart[start][start] = torch.nn.functional.embedding(input=lexical_tensor, weight=lexicalProbabilities_matrix)
                 assert start == start+length-1
            else:
                entries = []
                for start2 in range(start+1, args.MAX_BOUNDARY):
                  left = chart[start][start2-1]
                  right = chart[start2][start+length-1]
                  maxLeft = torch.max(left) #, dim=1, keepdim=True)[0]
                  maxRight = torch.max(right) #, dim=1, keepdim=True)[0]
                  if float(maxLeft) == float("-inf") or float(maxRight) == float("-inf"): # everything will be 0
                     continue
                  resultLeft = torch.tensordot(torch.exp(left-maxLeft), binary_rules_matrix, dims=([1], [1]))
                  resultTotal = torch.bmm(resultLeft, torch.exp(right-maxRight).view(args.BATCHSIZE, -1, 1)).squeeze(2)
                  resultTotal = torch.nn.functional.relu(resultTotal) # because some values end up being slightly negative in result
                  resultTotalLog = torch.log(resultTotal)+(maxLeft+maxRight)
                  entries.append(resultTotalLog)
                chart[start][start+length-1] = logSumExpList(entries)
      #############################
      # Now consider different endpoints
      valuesPerBoundary = [0]
      for BOUNDARY in range(1, args.MAX_BOUNDARY+1):
         chartFromStart = [torch.cuda.FloatTensor([[float("-Inf") for _ in itos_setOfNonterminals] for _ in range(args.BATCHSIZE)]) for _ in range(BOUNDARY)]

         if True:      
             right = chart[BOUNDARY-1][BOUNDARY-1]
             right_max = torch.max(right)
             result = torch.tensordot(torch.exp(right-right_max), invertedLeft, dims=([1], [1]))
             resultLog = (torch.log(result) + right_max)
             chartFromStart[BOUNDARY-1] = resultLog
      
         for start in range(BOUNDARY-1)[::-1]: # now construct potential constituents that start at `start', but end outside of the portion
               entries = []
               for start2 in range(start+1, BOUNDARY):
                  left = chart[start][start2-1]
                  right = chartFromStart[start2]
                  maxLeft = torch.max(left)
                  maxRight = torch.max(right)
                  if float(maxLeft) == float("-inf") or float(maxRight) == float("-inf"): # everything will be 0
                     continue
                  resultLeft = torch.tensordot(torch.exp(left-maxLeft), binary_rules_matrix, dims=([1], [1]))
                  resultTotal = torch.bmm(resultLeft, torch.exp(right-maxRight).view(args.BATCHSIZE, -1, 1)).squeeze(2)
                  result = torch.tensordot(resultTotal, invertedLeft, dims=([1], [1]))
                  result = torch.nn.functional.relu(result) # because some values end up being slightly negative in result
                  resultLog = (torch.log(result) + (maxLeft+maxRight))
                  entries.append(resultLog)
               chartFromStart[start] = logSumExpList(entries)
         prefixProb = float(chartFromStart[0][:,stoi_setOfNonterminals["_SENTENCES_"]].sum()) #log(sum([exp(float(x[0])) if x[0] is not None else 0 for x in chartFromStart[0]])) # log P(S|root) -- the full mass comprising all possible trees (including spurious ambiguities arising from the PCFG conversion)

         surprisalTableSums[BOUNDARY-1] += prefixProb
         surprisalTableCounts[BOUNDARY-1] += args.BATCHSIZE
         valuesPerBoundary.append(prefixProb)
         print(BOUNDARY, prefixProb/args.BATCHSIZE, linearized[0])
         assert prefixProb/args.BATCHSIZE - 0.01 < valuesPerBoundary[-2]/args.BATCHSIZE, ("bug or numerical problem?", (prefixProb/args.BATCHSIZE, valuesPerBoundary[-2]/args.BATCHSIZE))
Esempio n. 26
0
def apply_TM_2sO(state, env, edge, op=None, verbosity=0):
    r"""
    :param state: underlying 1-site C4v symmetric wavefunction
    :param env: C4v symmetric environment corresponding to ``state``
    :param edge: tensor of dimensions :math:`\chi \times D^2 \times \chi`
    :param op: two-site operator to be inserted into the two consecutive
               transfer matrices
    :param verbosity: logging verbosity
    :type state: IPEPS_C4V
    :type env: ENV_C4V
    :type edge: torch.tensor
    :type op: torch.tensor
    :type verbosity: int
    :return: ``edge`` with two transfer matrices (and operator ``op``, if any) applied.
             The resulting tensor has an identical index structure as the 
             original ``edge``
    :rtype: torch.tensor
    
    Applies two transfer matrices to the ``edge`` tensor, including the two-site operator
    ``op`` by contracting the following network::

         -----T-------------T------------
        |     |             |
       edge--(a^+ op_l a)==(a^+ op_r a)--
        |     |             |
         -----T-------------T------------

    where the physical indices `s` and `s'` of the on-site tensor :math:`a` 
    and it's hermitian conjugate :math:`a^\dagger` are contracted with 
    identity :math:`\delta_{s,s'}` or ``op_l`` and ``op_r`` if ``op`` is supplied.
    The ``op_l`` and ``op_r`` are given by the SVD decomposition of two-site operator
    ``op``::

         0  1        0           1          0            1->0
		 |  |  SVD   |           |          |            |
 	    | op |  =  |op_l|--(S--|op^~_r|) = |op_l|--2 2--|op_r| 
         |  |        |           |          |            |
         2  3        2           3          2->1         3->1
    """
    # TODO stronger verification
    if op is not None:
        assert (len(op.size()) == 4)

        # pre-process ``op``
        # TODO possibly truncate/compress according to the vanishingly small singular values
        dims_op = op.size()
        op_mat = op.permute(0, 2, 1,
                            3).contiguous().reshape(dims_op[0]**2,
                                                    dims_op[0]**2)
        op_l, s, op_r = torch.svd(op_mat)
        op_l = op_l.reshape(dims_op[0], dims_op[0], s.size()[0])
        op_r = torch.einsum('i,ij->ij', s,
                            op_r.t()).reshape(s.size()[0], dims_op[0],
                                              dims_op[0])
        op_r = op_r.permute(1, 2, 0).contiguous()

    T = env.T[env.keyT]
    # Assume index structure of ``edge`` tensor to be as follows
    #
    #       -- 0
    # edge |-- 1
    #       -- 2
    #
    #   ----0 0--T--1->2
    #  |         2->3
    # edge--1->0
    #  |
    #   ----2->1
    E = torch.tensordot(edge, T, ([0], [0]))
    if verbosity > 0: print("E=edgeT " + str(E.size()))

    # TODO - more efficent contraction with uncontracted-double-layer on-site tensor
    #        Possibly reshape indices 1,2 of E, which are to be contracted with
    #        on-site tensor and contract bra,ket in two steps instead of creating
    #        double layer tensor
    #    /
    # --A--
    #  /|s
    #   X
    # s'|/
    # --A--
    #  /
    #
    # where X is Id or op
    a = next(iter(state.sites.values()))
    dims_a = a.size()
    X = torch.eye(dims_a[0], dtype=a.dtype,
                  device=a.device)[:, :, None] if op is None else op_l
    A= torch.einsum('mefgh,mnl,nabcd->eafbgchdl',a,X,a).contiguous()\
        .view(dims_a[1]**2, dims_a[2]**2, dims_a[3]**2, dims_a[4]**2, -1)

    #   ---------T--2->1
    #  |         3 4
    #  |         0/
    # edge--0 1--A--3
    #  |         2
    #   ----1->0
    E = torch.tensordot(E, A, ([0, 3], [1, 0]))
    if verbosity > 0: print("E=EA " + str(E.size()))

    #   -------T--1->0
    #  |       | 4->2
    #  |       |/
    # edge-----A--3->1
    #  |       2
    #  |       2
    #   --0 0--T--1->3
    E = torch.tensordot(E, T, ([0, 2], [0, 2]))
    if verbosity > 0: print("E=ET " + str(E.size()))

    #   ----0 0----T--1->3
    #  |----2->1   2->4
    # edge--1->0
    #  |
    #   ----3->2
    E = torch.tensordot(E, T, ([0], [0]))
    if verbosity > 0: print("E=ET " + str(E.size()))

    # TODO - more efficent contraction with uncontracted-double-layer on-site tensor
    #        Possibly reshape indices 1,2 of E, which are to be contracted with
    #        on-site tensor and contract bra,ket in two steps instead of creating
    #        double layer tensor
    #    /
    # --A--
    #  /|s
    #   X
    # s'|/
    # --A--
    #  /
    #
    # where X is Id or op
    X = torch.eye(dims_a[0], dtype=a.dtype,
                  device=a.device)[:, :, None] if op is None else op_r
    A= torch.einsum('mefgh,mnl,nabcd->eafbgchdl',a,X,a).contiguous()\
        .view(dims_a[1]**2, dims_a[2]**2, dims_a[3]**2, dims_a[4]**2, -1)

    #   ---------T--3->1
    #  |         4
    #  |----1 4-\0
    # edge--0 1--A--3
    #  |         2
    #   ----2->0
    E = torch.tensordot(E, A, ([0, 1, 4], [1, 4, 0]))
    if verbosity > 0: print("E=EA " + str(E.size()))

    #   -------T--1->0
    #  |       |
    #  |       |
    # edge-----A--3->1
    #  |       2
    #  |       2
    #   --0 0--T--1->2
    E = torch.tensordot(E, T, ([0, 2], [0, 2]))
    if verbosity > 0: print("E=ET " + str(E.size()))

    return E
Esempio n. 27
0
def rdm2x1(coord, ipeps, env, verbosity=0):
    r"""
    :param coord: vertex (x,y) specifies position of 2x1 subsystem
    :param state: underlying wavefunction
    :param env: environment corresponding to ``state``
    :param verbosity: logging verbosity
    :type coord: tuple(int,int) 
    :type state: IPEPS
    :type env: ENV
    :type verbosity: int
    :return: 2-site reduced density matrix with indices :math:`s_0s_1;s'_0s'_1`
    :rtype: torch.tensor

    Computes 2-site reduced density matrix :math:`\rho_{2x1}` of a horizontal 
    2x1 subsystem using following strategy:
    
        1. compute four individual corners 
        2. construct right and left half of the network
        3. contract right and left halt to obtain final reduced density matrix

    ::

        C--T------------T------------------C = C2x2_LU(coord)--C2x2(coord+(1,0))
        |  |            |                  |   |               |  
        T--A^+A(coord)--A^+A(coord+(1,0))--T   C2x1_LD(coord)--C2x1(coord+(1,0))
        |  |            |                  |
        C--T------------T------------------C 

    The physical indices `s` and `s'` of on-sites tensors :math:`A` (and :math:`A^\dagger`) 
    at vertices ``coord``, ``coord+(1,0)`` are left uncontracted
    """
    #----- building C2x2_LU ----------------------------------------------------
    C = env.C[(ipeps.vertexToSite(coord), (-1, -1))]
    T1 = env.T[(ipeps.vertexToSite(coord), (0, -1))]
    T2 = env.T[(ipeps.vertexToSite(coord), (-1, 0))]
    dimsA = ipeps.site(coord).size()
    a = torch.einsum('mefgh,nabcd->eafbgchdmn',ipeps.site(coord),ipeps.site(coord)).contiguous()\
        .view(dimsA[1]**2, dimsA[2]**2, dimsA[3]**2, dimsA[4]**2, dimsA[0], dimsA[0])

    # C--10--T1--2
    # 0      1
    C2x2_LU = torch.tensordot(C, T1, ([1], [0]))

    # C------T1--2->1
    # 0      1->0
    # 0
    # T2--2->3
    # 1->2
    C2x2_LU = torch.tensordot(C2x2_LU, T2, ([0], [0]))

    # C-------T1--1->0
    # |       0
    # |       0
    # T2--3 1 a--3
    # 2->1    2\45
    C2x2_LU = torch.tensordot(C2x2_LU, a, ([0, 3], [0, 1]))

    # permute 012345->120345
    # reshape (12)(03)45->0123
    # C2x2--1
    # |\23
    # 0
    C2x2_LU = C2x2_LU.permute(1,2,0,3,4,5).contiguous().view(\
        T1.size()[2]*a.size()[3],T2.size()[1]*a.size()[2],dimsA[0],dimsA[0])
    if verbosity > 0:
        print("C2X2 LU " + str(coord) + "->" + str(ipeps.vertexToSite(coord)) +
              " (-1,-1): " + str(C2x2_LU.size()))

    #----- building C2x1_LD ----------------------------------------------------
    C = env.C[(ipeps.vertexToSite(coord), (-1, 1))]
    T2 = env.T[(ipeps.vertexToSite(coord), (0, 1))]

    # 0       0->1
    # C--1 1--T2--2
    C2x1_LD = torch.tensordot(C, T2, ([1], [1]))

    # reshape (01)2->(0)1
    # 0
    # |
    # C2x1--1
    C2x1_LD = C2x1_LD.view(C.size()[0] * T2.size()[0],
                           T2.size()[2]).contiguous()
    if verbosity > 0:
        print("C2X1 LD " + str(coord) + "->" + str(ipeps.vertexToSite(coord)) +
              " (-1,1): " + str(C2x1_LD.size()))

    #----- build left part C2x2_LU--C2x1_LD ------------------------------------
    # C2x2_LU--1
    # |\23
    # 0
    # 0
    # C2x1_LD--1->0
    # TODO is it worthy(performance-wise) to instead overwrite one of C2x2_LU,C2x2_RU ?
    left_half = torch.tensordot(C2x1_LD, C2x2_LU, ([0], [0]))

    #----- building C2x2_RU ----------------------------------------------------
    vec = (1, 0)
    shitf_coord = ipeps.vertexToSite((coord[0] + vec[0], coord[1] + vec[1]))
    C = env.C[(shitf_coord, (1, -1))]
    T1 = env.T[(shitf_coord, (1, 0))]
    T2 = env.T[(shitf_coord, (0, -1))]
    dimsA = ipeps.site(shitf_coord).size()
    a = torch.einsum('mefgh,nabcd->eafbgchdmn',ipeps.site(shitf_coord),ipeps.site(shitf_coord)).contiguous()\
        .view(dimsA[1]**2, dimsA[2]**2, dimsA[3]**2, dimsA[4]**2, dimsA[0], dimsA[0])

    # 0--C
    #    1
    #    0
    # 1--T1
    #    2
    C2x2_RU = torch.tensordot(C, T1, ([1], [0]))

    # 2<-0--T2--2 0--C
    #    3<-1        |
    #          0<-1--T1
    #             1<-2
    C2x2_RU = torch.tensordot(C2x2_RU, T2, ([0], [2]))

    # 1<-2--T2------C
    #       3       |
    #    45\0       |
    # 2<-1--a--3 0--T1
    #    3<-2    0<-1
    C2x2_RU = torch.tensordot(C2x2_RU, a, ([0, 3], [3, 0]))

    # permute 012334->120345
    # reshape (12)(03)45->0123
    # 0--C2x2
    # 23/|
    #    1
    C2x2_RU = C2x2_RU.permute(1,2,0,3,4,5).contiguous().view(\
        T2.size()[0]*a.size()[1],T1.size()[2]*a.size()[2], dimsA[0], dimsA[0])
    if verbosity > 0:
        print("C2X2 RU " + str((coord[0] + vec[0], coord[1] + vec[1])) + "->" +
              str(shitf_coord) + " (1,-1): " + str(C2x2_RU.size()))

    #----- building C2x1_RD ----------------------------------------------------
    C = env.C[(shitf_coord, (1, 1))]
    T1 = env.T[(shitf_coord, (0, 1))]

    #    1<-0        0
    # 2<-1--T1--2 1--C
    C2x1_RD = torch.tensordot(C, T1, ([1], [2]))

    # reshape (01)2->(0)1
    C2x1_RD = C2x1_RD.view(C.size()[0] * T1.size()[0],
                           T1.size()[1]).contiguous()

    #    0
    #    |
    # 1--C2x1
    if verbosity > 0:
        print("C2X1 RD " + str((coord[0] + vec[0], coord[1] + vec[1])) + "->" +
              str(shitf_coord) + " (1,1): " + str(C2x1_RD.size()))

    #----- build right part C2x2_RU--C2x1_RD -----------------------------------
    # 1<-0--C2x2_RU
    #       |\23
    #       1
    #       0
    # 0<-1--C2x1_RD
    right_half = torch.tensordot(C2x1_RD, C2x2_RU, ([0], [1]))

    # construct reduced density matrix by contracting left and right halfs
    # C2x2_LU--1 1----C2x2_RU
    # |\23->01        |\23
    # |               |
    # C2x1_LD--0 0----C2x1_RD
    rdm = torch.tensordot(left_half, right_half, ([0, 1], [0, 1]))

    # permute into order of s0,s1;s0',s1' where primed indices
    # represent "ket"
    # 0123->0213
    # and normalize
    rdm = rdm.permute(0, 2, 1, 3)
    rdm = rdm / torch.einsum('ijij', rdm)

    return rdm
 def forward(self, x, dims):
     a = torch.tensordot(x, self.weight, dims=dims) + self.bias
     return a
Esempio n. 29
0
def rdm1x2(coord, ipeps, env, verbosity=0):
    r"""
    :param coord: vertex (x,y) specifies position of 1x2 subsystem
    :param state: underlying wavefunction
    :param env: environment corresponding to ``state``
    :param verbosity: logging verbosity
    :type coord: tuple(int,int) 
    :type state: IPEPS
    :type env: ENV
    :type verbosity: int
    :return: 2-site reduced density matrix with indices :math:`s_0s_1;s'_0s'_1`
    :rtype: torch.tensor

    Computes 2-site reduced density matrix :math:`\rho_{1x2}` of a vertical 
    1x2 subsystem using following strategy:
    
        1. compute four individual corners 
        2. construct upper and lower half of the network
        3. contract upper and lower halt to obtain final reduced density matrix

    ::

        C--T------------------C = C2x2_LU(coord)--------C1x2(coord)
        |  |                  |   |                     |
        T--A^+A(coord)--------T   C2x2_LD(coord+(0,1))--C1x2(coord+0,1))
        |  |                  |
        T--A^+A(coord+(0,1))--T
        |  |                  |
        C--T------------------C

    The physical indices `s` and `s'` of on-sites tensors :math:`A` (and :math:`A^\dagger`) 
    at vertices ``coord``, ``coord+(0,1)`` are left uncontracted
    """
    #----- building C2x2_LU ----------------------------------------------------
    C = env.C[(ipeps.vertexToSite(coord), (-1, -1))]
    T1 = env.T[(ipeps.vertexToSite(coord), (0, -1))]
    T2 = env.T[(ipeps.vertexToSite(coord), (-1, 0))]
    dimsA = ipeps.site(coord).size()
    a = torch.einsum('mefgh,nabcd->eafbgchdmn',ipeps.site(coord),ipeps.site(coord)).contiguous()\
        .view(dimsA[1]**2, dimsA[2]**2, dimsA[3]**2, dimsA[4]**2, dimsA[0], dimsA[0])

    # C--10--T1--2
    # 0      1
    C2x2_LU = torch.tensordot(C, T1, ([1], [0]))

    # C------T1--2->1
    # 0      1->0
    # 0
    # T2--2->3
    # 1->2
    C2x2_LU = torch.tensordot(C2x2_LU, T2, ([0], [0]))

    # C-------T1--1->0
    # |       0
    # |       0
    # T2--3 1 a--3
    # 2->1    2\45
    C2x2_LU = torch.tensordot(C2x2_LU, a, ([0, 3], [0, 1]))

    # permute 012345->120345
    # reshape (12)(03)45->0123
    # C2x2--1
    # |\23
    # 0
    C2x2_LU = C2x2_LU.permute(1,2,0,3,4,5).contiguous().view(\
        T1.size()[2]*a.size()[3],T2.size()[1]*a.size()[2],dimsA[0],dimsA[0])
    if verbosity > 0:
        print("C2X2 LU " + str(coord) + "->" + str(ipeps.vertexToSite(coord)) +
              " (-1,-1): " + str(C2x2_LU.size()))

    #----- building C1x2_RU ----------------------------------------------------
    C = env.C[(ipeps.vertexToSite(coord), (1, -1))]
    T1 = env.T[(ipeps.vertexToSite(coord), (1, 0))]

    # 0--C
    #    1
    #    0
    # 1--T1
    #    2
    C1x2_RU = torch.tensordot(C, T1, ([1], [0]))

    # reshape (01)2->(0)1
    # 0--C1x2
    # 23/|
    #    1
    C1x2_RU = C1x2_RU.view(C.size()[0] * T1.size()[1],
                           T1.size()[2]).contiguous()
    if verbosity > 0:
        print("C1X2 RU " + str(coord) + "->" + str(ipeps.vertexToSite(coord)) +
              " (1,-1): " + str(C1x2_RU.size()))

    #----- build upper part C2x2_LU--C1x2_RU -----------------------------------
    # C2x2_LU--1 0--C1x2_RU
    # |\23          |
    # 0->1          1->0
    upper_half = torch.tensordot(C1x2_RU, C2x2_LU, ([0], [1]))

    #----- building C2x2_LD ----------------------------------------------------
    vec = (0, 1)
    shitf_coord = ipeps.vertexToSite((coord[0] + vec[0], coord[1] + vec[1]))
    C = env.C[(shitf_coord, (-1, 1))]
    T1 = env.T[(shitf_coord, (-1, 0))]
    T2 = env.T[(shitf_coord, (0, 1))]
    dimsA = ipeps.site(shitf_coord).size()
    a = torch.einsum('mefgh,nabcd->eafbgchdmn',ipeps.site(shitf_coord),ipeps.site(shitf_coord)).contiguous()\
        .view(dimsA[1]**2, dimsA[2]**2, dimsA[3]**2, dimsA[4]**2, dimsA[0], dimsA[0])

    # 0->1
    # T1--2
    # 1
    # 0
    # C--1->0
    C2x2_LD = torch.tensordot(C, T1, ([0], [1]))

    # 1->0
    # T1--2->1
    # |
    # |       0->2
    # C--0 1--T2--2->3
    C2x2_LD = torch.tensordot(C2x2_LD, T2, ([0], [1]))

    # 0       0->2
    # T1--1 1--a--3
    # |        2\45
    # |        2
    # C--------T2--3->1
    C2x2_LD = torch.tensordot(C2x2_LD, a, ([1, 2], [1, 2]))

    # permute 012345->021345
    # reshape (02)(13)45->0123
    # 0
    # |/23
    # C2x2--1
    C2x2_LD = C2x2_LD.permute(0,2,1,3,4,5).contiguous().view(\
        T1.size()[0]*a.size()[0],T2.size()[2]*a.size()[3], dimsA[0], dimsA[0])
    if verbosity > 0:
        print("C2X2 LD " + str((coord[0] + vec[0], coord[1] + vec[1])) + "->" +
              str(shitf_coord) + " (-1,1): " + str(C2x2_LD.size()))

    #----- building C2x2_RD ----------------------------------------------------
    C = env.C[(shitf_coord, (1, 1))]
    T2 = env.T[(shitf_coord, (1, 0))]

    #       0
    #    1--T2
    #       2
    #       0
    # 2<-1--C
    C1x2_RD = torch.tensordot(T2, C, ([2], [0]))

    # permute 012->021
    # reshape 0(12)->0(1)
    C1x2_RD = C1x2_RD.permute(0, 2,
                              1).contiguous().view(T2.size()[0],
                                                   C.size()[1] * T2.size()[1])

    #    0
    #    |
    # 1--C1x2
    if verbosity > 0:
        print("C1X2 RD " + str((coord[0] + vec[0], coord[1] + vec[1])) + "->" +
              str(shitf_coord) + " (1,1): " + str(C1x2_RD.size()))

    #----- build lower part C2x2_LD--C1x2_RD -----------------------------------
    # 0->1          0
    # |/23          |
    # C2x2_LD--1 1--C1x2_RD
    lower_half = torch.tensordot(C1x2_RD, C2x2_LD, ([1], [1]))

    # construct reduced density matrix by contracting lower and upper halfs
    # C2x2_LU------C1x2_RU
    # |\23->01     |
    # 1            0
    # 1            0
    # |/23         |
    # C2x2_LD------C1x2_RD
    rdm = torch.tensordot(upper_half, lower_half, ([0, 1], [0, 1]))

    # permute into order of s0,s1;s0',s1' where primed indices
    # represent "ket"
    # 0123->0213
    # and normalize
    rdm = rdm.permute(0, 2, 1, 3)
    rdm = rdm / torch.einsum('ijij', rdm)

    return rdm
Esempio n. 30
0
def rdm2x2(coord, ipeps, env, verbosity=0):
    r"""
    :param coord: vertex (x,y) specifies upper left site of 2x2 subsystem 
    :param state: underlying wavefunction
    :param env: environment corresponding to ``state``
    :param verbosity: logging verbosity
    :type coord: tuple(int,int) 
    :type state: IPEPS
    :type env: ENV
    :type verbosity: int
    :return: 4-site reduced density matrix with indices :math:`s_0s_1s_2s_3;s'_0s'_1s'_2s'_3`
    :rtype: torch.tensor

    Computes 4-site reduced density matrix :math:`\rho_{2x2}` of 2x2 subsystem specified
    by the vertex ``coord`` of its upper left corner using strategy:

        1. compute four individual corners
        2. construct upper and lower half of the network
        3. contract upper and lower half to obtain final reduced density matrix

    ::

        C--T------------------T------------------C = C2x2_LU(coord)--------C2x2(coord+(1,0))
        |  |                  |                  |   |                     |
        T--A^+A(coord)--------A^+A(coord+(1,0))--T   C2x2_LD(coord+(0,1))--C2x2(coord+(1,1))
        |  |                  |                  |
        T--A^+A(coord+(0,1))--A^+A(coord+(1,1))--T
        |  |                  |                  |
        C--T------------------T------------------C
        
    The physical indices `s` and `s'` of on-sites tensors :math:`A` (and :math:`A^\dagger`) 
    at vertices ``coord``, ``coord+(1,0)``, ``coord+(0,1)``, and ``coord+(1,1)`` are 
    left uncontracted and given in the same order::
        
        s0 s1
        s2 s3

    """
    #----- building C2x2_LU ----------------------------------------------------
    C = env.C[(ipeps.vertexToSite(coord), (-1, -1))]
    T1 = env.T[(ipeps.vertexToSite(coord), (0, -1))]
    T2 = env.T[(ipeps.vertexToSite(coord), (-1, 0))]
    dimsA = ipeps.site(coord).size()
    a = torch.einsum('mefgh,nabcd->eafbgchdmn',ipeps.site(coord),ipeps.site(coord)).contiguous()\
        .view(dimsA[1]**2, dimsA[2]**2, dimsA[3]**2, dimsA[4]**2, dimsA[0], dimsA[0])

    # C--10--T1--2
    # 0      1
    C2x2_LU = torch.tensordot(C, T1, ([1], [0]))

    # C------T1--2->1
    # 0      1->0
    # 0
    # T2--2->3
    # 1->2
    C2x2_LU = torch.tensordot(C2x2_LU, T2, ([0], [0]))

    # C-------T1--1->0
    # |       0
    # |       0
    # T2--3 1 a--3
    # 2->1    2\45
    C2x2_LU = torch.tensordot(C2x2_LU, a, ([0, 3], [0, 1]))

    # permute 012345->120345
    # reshape (12)(03)45->0123
    # C2x2--1
    # |\23
    # 0
    C2x2_LU = C2x2_LU.permute(1,2,0,3,4,5).contiguous().view(\
        T1.size()[2]*a.size()[3],T2.size()[1]*a.size()[2],dimsA[0],dimsA[0])
    if verbosity > 0:
        print("C2X2 LU " + str(coord) + "->" + str(ipeps.vertexToSite(coord)) +
              " (-1,-1): " + str(C2x2_LU.size()))

    #----- building C2x2_RU ----------------------------------------------------
    vec = (1, 0)
    shitf_coord = ipeps.vertexToSite((coord[0] + vec[0], coord[1] + vec[1]))
    C = env.C[(shitf_coord, (1, -1))]
    T1 = env.T[(shitf_coord, (1, 0))]
    T2 = env.T[(shitf_coord, (0, -1))]
    dimsA = ipeps.site(shitf_coord).size()
    a = torch.einsum('mefgh,nabcd->eafbgchdmn',ipeps.site(shitf_coord),ipeps.site(shitf_coord)).contiguous()\
        .view(dimsA[1]**2, dimsA[2]**2, dimsA[3]**2, dimsA[4]**2, dimsA[0], dimsA[0])

    # 0--C
    #    1
    #    0
    # 1--T1
    #    2
    C2x2_RU = torch.tensordot(C, T1, ([1], [0]))

    # 2<-0--T2--2 0--C
    #    3<-1        |
    #          0<-1--T1
    #             1<-2
    C2x2_RU = torch.tensordot(C2x2_RU, T2, ([0], [2]))

    # 1<-2--T2------C
    #       3       |
    #    45\0       |
    # 2<-1--a--3 0--T1
    #    3<-2    0<-1
    C2x2_RU = torch.tensordot(C2x2_RU, a, ([0, 3], [3, 0]))

    # permute 012334->120345
    # reshape (12)(03)45->0123
    # 0--C2x2
    # 23/|
    #    1
    C2x2_RU = C2x2_RU.permute(1,2,0,3,4,5).contiguous().view(\
        T2.size()[0]*a.size()[1],T1.size()[2]*a.size()[2], dimsA[0], dimsA[0])
    if verbosity > 0:
        print("C2X2 RU " + str((coord[0] + vec[0], coord[1] + vec[1])) + "->" +
              str(shitf_coord) + " (1,-1): " + str(C2x2_RU.size()))

    #----- build upper part C2x2_LU--C2x2_RU -----------------------------------
    # C2x2_LU--1 0--C2x2_RU              C2x2_LU------C2x2_RU
    # |\23->12      |\23->45   & permute |\12->23      |\45
    # 0             1->3                 0             3->1
    # TODO is it worthy(performance-wise) to instead overwrite one of C2x2_LU,C2x2_RU ?
    upper_half = torch.tensordot(C2x2_LU, C2x2_RU, ([1], [0]))
    upper_half = upper_half.permute(0, 3, 1, 2, 4, 5)

    #----- building C2x2_RD ----------------------------------------------------
    vec = (1, 1)
    shitf_coord = ipeps.vertexToSite((coord[0] + vec[0], coord[1] + vec[1]))
    C = env.C[(shitf_coord, (1, 1))]
    T1 = env.T[(shitf_coord, (0, 1))]
    T2 = env.T[(shitf_coord, (1, 0))]
    dimsA = ipeps.site(shitf_coord).size()
    a = torch.einsum('mefgh,nabcd->eafbgchdmn',ipeps.site(shitf_coord),ipeps.site(shitf_coord)).contiguous()\
        .view(dimsA[1]**2, dimsA[2]**2, dimsA[3]**2, dimsA[4]**2, dimsA[0], dimsA[0])

    #    1<-0        0
    # 2<-1--T1--2 1--C
    C2x2_RD = torch.tensordot(C, T1, ([1], [2]))

    #         2<-0
    #      3<-1--T2
    #            2
    #    0<-1    0
    # 1<-2--T1---C
    C2x2_RD = torch.tensordot(C2x2_RD, T2, ([0], [2]))

    #    2<-0    1<-2
    # 3<-1--a--3 3--T2
    #       2\45    |
    #       0       |
    # 0<-1--T1------C
    C2x2_RD = torch.tensordot(C2x2_RD, a, ([0, 3], [2, 3]))

    # permute 012345->120345
    # reshape (12)(03)45->0123
    C2x2_RD = C2x2_RD.permute(1,2,0,3,4,5).contiguous().view(\
        T2.size()[0]*a.size()[0],T1.size()[1]*a.size()[1], dimsA[0], dimsA[0])

    #    0
    #    |/23
    # 1--C2x2
    if verbosity > 0:
        print("C2X2 RD " + str((coord[0] + vec[0], coord[1] + vec[1])) + "->" +
              str(shitf_coord) + " (1,1): " + str(C2x2_RD.size()))

    #----- building C2x2_LD ----------------------------------------------------
    vec = (0, 1)
    shitf_coord = ipeps.vertexToSite((coord[0] + vec[0], coord[1] + vec[1]))
    C = env.C[(shitf_coord, (-1, 1))]
    T1 = env.T[(shitf_coord, (-1, 0))]
    T2 = env.T[(shitf_coord, (0, 1))]
    dimsA = ipeps.site(shitf_coord).size()
    a = torch.einsum('mefgh,nabcd->eafbgchdmn',ipeps.site(shitf_coord),ipeps.site(shitf_coord)).contiguous()\
        .view(dimsA[1]**2, dimsA[2]**2, dimsA[3]**2, dimsA[4]**2, dimsA[0], dimsA[0])

    # 0->1
    # T1--2
    # 1
    # 0
    # C--1->0
    C2x2_LD = torch.tensordot(C, T1, ([0], [1]))

    # 1->0
    # T1--2->1
    # |
    # |       0->2
    # C--0 1--T2--2->3
    C2x2_LD = torch.tensordot(C2x2_LD, T2, ([0], [1]))

    # 0        0->2
    # T1--1 1--a--3
    # |        2\45
    # |        2
    # C--------T2--3->1
    C2x2_LD = torch.tensordot(C2x2_LD, a, ([1, 2], [1, 2]))

    # permute 012345->021345
    # reshape (02)(13)45->0123
    # 0
    # |/23
    # C2x2--1
    C2x2_LD = C2x2_LD.permute(0,2,1,3,4,5).contiguous().view(\
        T1.size()[0]*a.size()[0],T2.size()[2]*a.size()[3], dimsA[0], dimsA[0])
    if verbosity > 0:
        print("C2X2 LD " + str((coord[0] + vec[0], coord[1] + vec[1])) + "->" +
              str(shitf_coord) + " (-1,1): " + str(C2x2_LD.size()))

    #----- build lower part C2x2_LD--C2x2_RD -----------------------------------
    # 0             0->3                 0             3->1
    # |/23->12      |/23->45   & permute |/12->23      |/45
    # C2x2_LD--1 1--C2x2_RD              C2x2_LD------C2x2_RD
    # TODO is it worthy(performance-wise) to instead overwrite one of C2x2_LD,C2x2_RD ?
    lower_half = torch.tensordot(C2x2_LD, C2x2_RD, ([1], [1]))
    lower_half = lower_half.permute(0, 3, 1, 2, 4, 5)

    # construct reduced density matrix by contracting lower and upper halfs
    # C2x2_LU------C2x2_RU
    # |\23->01     |\45->23
    # 0            1
    # 0            1
    # |/23->45     |/45->67
    # C2x2_LD------C2x2_RD
    rdm = torch.tensordot(upper_half, lower_half, ([0, 1], [0, 1]))

    # permute into order of s0,s1,s2,s3;s0',s1',s2',s3' where primed indices
    # represent "ket"
    # 01234567->02461357
    # and normalize
    rdm = rdm.permute(0, 2, 4, 6, 1, 3, 5, 7)
    rdm = rdm / torch.einsum('ijklijkl', rdm)

    return rdm
def evaluate_vae(model,
                 data_loader,
                 epoch: int,
                 device: str,
                 criterion,
                 mu_force_beta_param,
                 eval_type: str = 'valid',
                 iteration: int = 0):
    model.eval()
    total_loss: float = 0
    total_kl_loss: float = 0
    total_nll: float = 0
    total_perp: float = 0
    total_mu_loss: float = 0

    for batch, sent_lengths in data_loader:
        with torch.no_grad():
            inp = batch[:, 0:-1].to(device)

            # Creat both prediction of next word and the posterior of which we sample Z.
            # Nr to sample
            # nr_MC_sample = 10 if eval_type == 'test' else 1 # Did not work out unfortunately
            nr_MC_sample = 1 if eval_type == 'test' else 1
            preds, posterior = model(inp, nr_MC_sample)

            # If we have multi-log sample, average over the likelihoods on the 0th dimension
            is_using_multi_samples = nr_MC_sample > 1

            if is_using_multi_samples:
                preds = preds.reshape(nr_MC_sample, batch.shape[0], -1).mean(0)

            # Define target as the next word to predict
            target = batch[:, 1:].to(device)

            # Calc loss by using the ELBO-criterion
            loss, kl_loss, nll = criterion(preds, target, posterior)

            # Perplexity
            perp = calc_batch_perplexity(nll.detach(), sent_lengths)

            # Calc perplexity
            # Take mean of mini-batch loss
            loss = loss.mean()
            kl_loss = kl_loss.mean()
            nll = nll.mean()

            # Now add to the loss mu force loss
            batch_mean_vectors = posterior.loc
            avg_batch_mean_vector = batch_mean_vectors.mean(0)
            mu_force_loss_var = torch.tensordot(
                batch_mean_vectors - avg_batch_mean_vector, batch_mean_vectors
                - avg_batch_mean_vector, 2) / batch.shape[0] / 2
            mu_force_loss = torch.max(torch.tensor(
                [0.0]), mu_force_beta_param - mu_force_loss_var).to(device)

            loss = loss + mu_force_loss

            total_loss += loss.item()
            total_kl_loss += kl_loss.item()
            total_nll += nll.item()
            total_perp += perp
            total_mu_loss += mu_force_loss_var.item()

    total_loss = total_loss / len(data_loader)
    total_kl_loss = total_kl_loss / len(data_loader)
    total_nll = total_nll / len(data_loader)
    total_perp = total_perp / len(data_loader)
    total_mu_loss = total_mu_loss / len(data_loader)

    return (total_loss, total_kl_loss, total_nll, total_mu_loss), total_perp