def tensordot(x, y, axes=2): """Simple translation of tensordot syntax to einsum. """ torch, _ = _get_torch_and_device() if _TORCH_HAS_TENSORDOT: return torch.tensordot(x, y, dims=axes) xnd = x.ndimension() ynd = y.ndimension() # convert int argument to (list[int], list[int]) if isinstance(axes, int): axes = range(xnd - axes, xnd), range(axes) # convert (int, int) to (list[int], list[int]) if isinstance(axes[0], int): axes = (axes[0],), axes[1] if isinstance(axes[1], int): axes = axes[0], (axes[1],) # initialize empty indices x_ix = [None] * xnd y_ix = [None] * ynd out_ix = [] # fill in repeated indices available_ix = iter(_torch_symbols_base) for ax1, ax2 in zip(*axes): repeat = next(available_ix) x_ix[ax1] = repeat y_ix[ax2] = repeat # fill in the rest, and maintain output order for i in range(xnd): if x_ix[i] is None: leave = next(available_ix) x_ix[i] = leave out_ix.append(leave) for i in range(ynd): if y_ix[i] is None: leave = next(available_ix) y_ix[i] = leave out_ix.append(leave) # form full string and contract! einsum_str = "{},{}->{}".format(*map("".join, (x_ix, y_ix, out_ix))) return einsum(einsum_str, x, y)
def apply_TM_1sO(state, env, edge, op=None, verbosity=0): r""" :param state: underlying 1-site C4v symmetric wavefunction :param env: C4v symmetric environment corresponding to ``state`` :param edge: tensor of dimensions :math:`\chi \times D^2 \times \chi` :param op: operator to be inserted into transfer matrix :param verbosity: logging verbosity :type state: IPEPS_C4V :type env: ENV_C4V :type edge: torch.tensor :type op: torch.tensor :type verbosity: int :return: ``edge`` with a single instance of the transfer matrix applied. The resulting tensor has an identical index structure as the original ``edge`` :rtype: torch.tensor Applies a single instance of the "transfer matrix" to the ``edge`` tensor by contracting the following network:: -----T---------- | | edge--(a^+ op a)-- | | -----T---------- where the physical indices `s` and `s'` of the on-site tensor :math:`a` and it's hermitian conjugate :math:`a^\dagger` are contracted with identity :math:`\delta_{s,s'}` or ``op`` (if supplied). """ # TODO stronger verification if op is not None: assert (len(op.size()) == 2) T = env.T[env.keyT] # Assume index structure of ``edge`` tensor to be as follows # # -- 0 # edge |-- 1 # -- 2 # # --0 0--T--1->2 # | 2->3 # edge--1->0 # | # --2->1 E = torch.tensordot(edge, T, ([0], [0])) if verbosity > 0: print("E=edgeT " + str(E.size())) # TODO - more efficent contraction with uncontracted-double-layer on-site tensor # Possibly reshape indices 1,2 of E, which are to be contracted with # on-site tensor and contract bra,ket in two steps instead of creating # double layer tensor # / # --A-- # /|s # X # s'|/ # --A-- # / # # where X is Id or op a = next(iter(state.sites.values())) dims_a = a.size() X = torch.eye(dims_a[0], dtype=a.dtype, device=a.device) if op is None else op A= torch.einsum('mefgh,mn,nabcd->eafbgchd',a,X,a).contiguous()\ .view(dims_a[1]**2, dims_a[2]**2, dims_a[3]**2, dims_a[4]**2) # ---------T--2->1 # | 3 # | 0 # edge--0 1--A--3 # | 2 # ----1->0 E = torch.tensordot(E, A, ([0, 3], [1, 0])) if verbosity > 0: print("E=EA " + str(E.size())) # -------T--1->0 # | | # | | # edge-----A--3->1 # | 2 # | 2 # --0 0--T--1->2 E = torch.tensordot(E, T, ([0, 2], [0, 2])) if verbosity > 0: print("E=ET " + str(E.size())) return E
def _dot_simililarity(x, y): v = torch.tensordot(x.unsqueeze(1), y.T.unsqueeze(0), dims=2) # x shape: (N, 1, C) # y shape: (1, C, 2N) # v shape: (N, 2N) return v
def apply_TM_1sO_2(state, env, edge, op=None, verbosity=0): r""" :param state: underlying 1-site C4v symmetric wavefunction :param env: C4v symmetric environment corresponding to ``state`` :param edge: tensor of dimensions :math:`\chi \times (D^2)^2 \times \chi` :param op: two-site operator to be inserted within the two-site transfer matrix :param verbosity: logging verbosity :type state: IPEPS_C4V :type env: ENV_C4V :type edge: torch.tensor :type op: torch.tensor :type verbosity: int :return: ``edge`` with a single instance of the transfer matrix applied The resulting tensor has an identical index structure as the original ``edge`` :rtype: torch.tensor Applies a single instance of the two-site "transfer matrix" to the ``edge`` tensor by contracting the following network, or its corresponding rotation depending on the ``direction``:: -----T---------- | | edge--(a^+ o1 a)-- | | | |----(a^+ o2 a)-- | | -----T---------- The two-site operator is first decomposed into a simple MPO o1--o2 (TODO case where op comes with an extra MPO index):: s1' s2' s1' s2' | op | = |o1|-----|o2| s1 s2 s1 s2 where the physical indices `s` and `s'` of the on-site tensor :math:`a` and it's hermitian conjugate :math:`a^\dagger` are contracted with identity :math:`\delta_{s,s'}` or ``o1``, ``o2``. """ # TODO stronger verification op_1, op_2 = None, None if op is not None: if len(op.size()) == 4: # pre-process ``op`` # TODO possibly truncate/compress according to the vanishingly small singular values dims_op = op.size() op_mat = op.permute(0, 2, 1, 3).contiguous().reshape( dims_op[0]**2, dims_op[0]**2) op_1, s, op_2 = torch.svd(op_mat) op_1 = op_1.reshape(dims_op[0], dims_op[0], s.size()[0]) op_2 = torch.einsum('i,ij->ij', s, op_2.t()).reshape(s.size()[0], dims_op[0], dims_op[0]) op_2 = op_2.permute(1, 2, 0).contiguous() else: raise ValueError(f"Invalid op: rank {op.size()}") # Four basic cases of passed op def get_aXa(a, op): # a - on-site tensor # op - operator dims_a = a.size() dims_op = None if op is None else op.size() if op is None: # identity A= torch.einsum('nefgh,nabcd->eafbgchd',a,a).contiguous()\ .view(dims_a[1]**2, dims_a[2]**2, dims_a[3]**2, dims_a[4]**2) elif len(dims_op) == 2: # one-site operator A= torch.einsum('mefgh,mn,nabcd->eafbgchd',a,op,a).contiguous()\ .view(dims_a[1]**2, dims_a[2]**2, dims_a[3]**2, dims_a[4]**2) elif len(dims_op) == 3: # edge operators of some MPO within the transfer matrix # # 0 0 # | | # op--2 ... or ... 2--op # | | # 1 1 # # assume the last index of the op is the MPO dimension. # It will become the last index of the resulting edge A= torch.einsum('mefgh,mnl,nabcd->eafbgchdl',a,op,a).contiguous()\ .view(dims_a[1]**2, dims_a[2]**2, dims_a[3]**2, dims_a[4]**2, -1) if verbosity > 0: print(f"aXa {A.size()}") return A a = next(iter(state.sites.values())) T = env.T[env.keyT] # Assume index structure of ``edge`` tensor to be as follows # # -- 0 # edge |-- 1 # |---2 # -- 3 # # ----0 0--T--1->0 # | 2->1 # edge--1->2 # | # ----2->3 # | # ----3->4 E = torch.tensordot(T, edge, ([0], [0])) if verbosity > 0: print("E=edgeT " + str(E.size())) # TODO - more efficent contraction with uncontracted-double-layer on-site tensor # Possibly reshape indices 1,2 of E, which are to be contracted with # on-site tensor and contract bra,ket in two steps instead of creating # double layer tensor # / # --A-- # /|s # X # s'|/ # --A-- # / # # where X is Id or op A = get_aXa(a, op_1) # ---------T--0 # | 1 # | 0 # edge--2 1--A--3->4 # | 3<-2 \ # ----3->1 (4->5) # | # ----4->2 E = torch.tensordot(E, A, ([1, 2], [0, 1])) if verbosity > 0: print("E=edgeTA " + str(E.size())) A = get_aXa(a, op_2) # ---------T--0 # | | # edge-------A--4->2 # | | \ # | 3 (5) # | 0 (4) # | | / # ----1 1--A--2->3 # | 3->4 # ----2->1 E = torch.tensordot(E,A,([1,3],[1,0])) if op is None else \ torch.tensordot(E,A,([1,3,5],[1,0,4])) if verbosity > 0: print("E=edgeTAA " + str(E.size())) # ---------T--0 # | | # edge-------A--2->1 # | | # ---------A--3->2 # | 3 # | 2 # ----1 0--T2--1->3 E = torch.tensordot(E, T, ([1, 3], [0, 2])) if verbosity > 0: print("E=edgeTAAT " + str(E.size())) return E
def vr(self): return torch.tensordot(self.positions, self.velocities) / self.r
def loss(self, samples): """ Computes the Distributional Q-learning loss, based on projecting the discounted rewards + target Q-distribution into the current Q-domain, with cross-entropy loss. Returns loss and KL-divergence-errors for use in prioritization. """ delta_z = (self.V_max - self.V_min) / (self.agent.n_atoms - 1) z = torch.linspace(self.V_min, self.V_max, self.agent.n_atoms) # Makde 2-D tensor of contracted z_domain for each data point, # with zeros where next value should not be added. next_z = z * (self.discount**self.n_step_return) # [P'] next_z = torch.ger(1 - samples.done_n.float(), next_z) # [B,P'] ret = samples.return_.unsqueeze(1) # [B,1] next_z = torch.clamp(ret + next_z, self.V_min, self.V_max) # [B,P'] z_bc = z.view(1, -1, 1) # [1,P,1] next_z_bc = next_z.unsqueeze(1) # [B,1,P'] abs_diff_on_delta = abs(next_z_bc - z_bc) / delta_z projection_coeffs = torch.clamp(1 - abs_diff_on_delta, 0, 1) # Most 0. # projection_coeffs is a 3-D tensor: [B,P,P'] # dim-0: independent data entries # dim-1: base_z atoms (remains after projection) # dim-2: next_z atoms (summed in projection) with torch.no_grad(): target_ps = self.agent.target(*samples.target_inputs) # [B,A,P'] if self.double_dqn: next_ps = self.agent(*samples.target_inputs) # [B,A,P'] next_qs = torch.tensordot(next_ps, z, dims=1) # [B,A] next_a = torch.argmax(next_qs, dim=-1) # [B] else: target_qs = torch.tensordot(target_ps, z, dims=1) # [B,A] next_a = torch.argmax(target_qs, dim=-1) # [B] target_p_unproj = select_at_indexes(next_a, target_ps) # [B,P'] target_p_unproj = target_p_unproj.unsqueeze(1) # [B,1,P'] target_p = (target_p_unproj * projection_coeffs).sum(-1) # [B,P] ps = self.agent(*samples.agent_inputs) # [B,A,P] p = select_at_indexes(samples.action, ps) # [B,P] p = torch.clamp(p, EPS, 1) # NaN-guard. losses = -torch.sum(target_p * torch.log(p), dim=1) # Cross-entropy. if self.prioritized_replay: losses *= samples.is_weights target_p = torch.clamp(target_p, EPS, 1) KL_div = torch.sum(target_p * (torch.log(target_p) - torch.log(p.detach())), dim=1) KL_div = torch.clamp(KL_div, EPS, 1 / EPS) # Avoid <0 from NaN-guard. if not self.mid_batch_reset: valid = valid_from_done(samples.done) loss = valid_mean(losses, valid) KL_div *= valid else: loss = torch.mean(losses) return loss, KL_div
y_torch = torch.from_numpy(y) C_matrix = torch.from_numpy( np.array([[1, 0, 0, 0], [0, 0, 1, 0]], dtype=np.float32)) optimizer = optim.RMSprop(func.parameters(), lr=1e-3) end = time.time() time_meter = RunningAverageMeter(0.97) loss_meter = RunningAverageMeter(0.97) ii = 0 for itr in range(1, args.niters + 1): optimizer.zero_grad() pred_x = odeint(func, x0_torch, time_torch) pred_y = torch.tensordot(pred_x, C_matrix, ((-1, ), (1, ))) loss = torch.mean(torch.abs(pred_y - y_torch)) loss.backward() optimizer.step() time_meter.update(time.time() - end) loss_meter.update(loss.item()) if itr % args.test_freq == 0: with torch.no_grad(): pred_x = odeint(func, x0_torch, time_torch) pred_y = torch.tensordot(pred_x, C_matrix, ((-1, ), (1, ))) loss = torch.mean(torch.abs(pred_y - y_torch)) print('Iter {:04d} | Total Loss {:.6f}'.format( itr, loss.item())) ii += 1
def sample(self, p, z=None): q = torch.tensordot(p, z or self.z, dims=1) return super().sample(q)
def forward(self, x, offset): if x.numel() == 0: # When input is empty, we want to return a empty tensor with "correct" shape, # So that the following operations will not panic # if they check for the shape of the tensor. # This computes the height and width of the output tensor output_shape = [ (i + 2 * p - (di * (k - 1) + 1)) // s + 1 for i, p, di, k, s in zip( x.shape[-2:], self.padding, self.dilation, self.kernel_size, self.stride ) ] output_shape = [x.shape[0], self.weight.shape[0]] + output_shape return _NewEmptyTensorOp.apply(x, output_shape) out = x.new_tensor(size=self._output_size(x)) # x.shape == [batch, in_channels, height, width] # offset.shape == [batch, 2 * in_channels * kernel_height * kernel_width, height, width] batch_size = x.shape[0] # pad input, left, right, top, bottom x = F.pad(x, pad=[self.padding, self.padding, self.padding, self.padding], value=0) xh = x.shape[2] xw = x.shape[3] h_start = 0 + (self.kernel_size // 2) + self.dilation - 1 w_start = 0 + (self.kernel_size // 2) + self.dilation - 1 h_end = xh - (self.kernel_size // 2) - self.dilation + 1 w_end = xw - (self.kernel_size // 2) - self.dilation + 1 # Every step a centered on the kernel center, for even sized kernels it is at the "bottom right" pixel of the # most centered 4 pixels. Strides which do not line up with the input maps will cut out pixel columns. base_kernel_offsets = x.new_zeros(size=(self.kernel_size, self.kernel_size, 2)).long() for h in range(self.kernel_size): base_kernel_offsets[h, :, 0] = (h - (self.kernel_size // 2)) * self.dilation for w in range(self.kernel_size): base_kernel_offsets[:, w, 1] = (w - (self.kernel_size // 2)) * self.dilation deform_offsets = offset.new_zeros(size=(batch_size, self.kernel_size, self.kernel_size, 2)) for h in range(h_start, h_end, self.stride): for w in range(w_start, w_end, self.stride): # Construct input feature map pixel column # Pixel column shape = [batch, in_channels, kernel_height, kernel_width] out_h = (h - h_start) // self.stride out_w = (w - w_start) // self.stride for i in range(self.kernel_size): for j in range(self.kernel_size): deform_offsets[:, i, j, 0] = offset[:, 2*(i*self.kernel_size+j), out_h, out_w] deform_offsets[:, i, j, 1] = offset[:, 2*(i*self.kernel_size+j)+1, out_h, out_w] sample_idx = torch.Tensor([[[[h, w]]]]) \ + base_kernel_offsets.unsqueeze(dim=0) \ + deform_offsets \ + 0.5 sampled_points = batch_kernel_interpolate(x, sample_idx) # sampled_points shape = (batch, in_channels, kernel_size, kernel_size) # weight = (out_channels, in_channels, kernel_size, kernel_size) out[:, :, out_h, out_w] = torch.tensordot(sampled_points, self.weight.permute(1, 2, 3, 0), dims=3) if self.norm is not None: out = self.norm(out) if self.activation is not None: out = self.activation(out) return out
def diffusion_dynamics(self, beliefs: torch.Tensor, actions=None, compute_drift=True, compute_dispersion=True, approx_state_exp_sampling=False): """ Computes the Wonham diffusion dynamics :param beliefs: Tensor of beliefs. Must be of shape [Batch x States] :param actions: List of actions to consider of shape [Batch x Actions]. If None, all actions will be considered. :param compute_drift: Bool whether the drift terms should be computed :param compute_dispersion: Bool whether the drift terms should be computed :param approx_state_exp_sampling: If True, expectation over next state is approximated by sampling one state :return: Drift vector of shape [Batch x Actions x States] if compute_drift is True, Dispersion matrix of shape [Batch, Actions, States, Noise] if compute_dispersion is True """ if actions is None: actions = torch.arange(self.drift_tensor.shape[0])[None] elif actions.ndim == 1: actions = actions.reshape(-1, 1) # g_bar is of shape [N=NumSamples, ADim, ODim] da = self.drift_tensor[ actions] # drift tensor of selected actions: B A S O # noinspection PyArgumentList g_bar = torch.sum(beliefs[:, None, :, None] * da, axis=2) # g minus g_bar is of shape [N, A, S, O] gmgb = da - g_bar[:, :, None, :] # inv(hh) * (g-g_bar) is of shape [NumSamples, A, S, O] hhg = gmgb @ self.outer_dispersion_inv result = [] if compute_drift: # GHG is of shape B, A, S # noinspection PyArgumentList ghg = torch.sum(gmgb * hhg, axis=-1) # BGHG is of shape B, A, S bghg = beliefs[:, None, :] * ghg # sumtb is of shape B, A, S if not approx_state_exp_sampling: t = self.transition_matrix[..., actions].permute(2, 3, 0, 1) # B A S' S' # noinspection PyArgumentList tb = torch.sum(beliefs[:, None, :, None] * t, axis=2) else: ss = Categorical(probs=beliefs).sample()[:, None] # B x 1 tb = self.transition_matrix[ss, :, actions] # mu is of shape N, A, S mu_vec = bghg + tb result += [mu_vec] if compute_dispersion: # of shape N, A, S, NoiseDim sigma = beliefs[:, None, :, None] * torch.tensordot( self.dispersion_matrix, hhg, dims=([0], [-1])).permute( 1, 2, 3, 0) result += [sigma] if len(result) == 1: return result[0] else: return result
def computeSurprisals(linearized): assert len(linearized) == MAX_BOUNDARY chart = [[ torch.cuda.FloatTensor([[float("-Inf") for _ in range(BATCHSIZE)] for _ in itos_setOfNonterminals]) for _ in linearized ] for _ in linearized] for length in range( 1, len(linearized) + 1 ): # the NUMBER of words spanned. start+length is the first word OUTSIDE the constituent for start in range( len(linearized )): # the index of the first word taking part in the thing if start + length - 1 >= len(linearized): continue if length == 1: # TODO for words at the boundary, immediately add prefix and suffix counts if start < LEFT_CONTEXT: for preterminal in terminals: chart[start][start][ stoi_setOfNonterminals[preterminal]].fill_(0) else: if wordCounts.get(linearized[start], 0) < OOV_THRESHOLD: # OOV for preterminal in terminals: chart[start][start][ stoi_setOfNonterminals[preterminal]].fill_( log(OOV_COUNT) - log(nonAndPreterminals[preterminal] + OOV_COUNT + OTHER_WORDS_SMOOTHING * len(wordCounts))) else: for preterminal in terminals: count = terminals[preterminal].get( linearized[start], 0) + OTHER_WORDS_SMOOTHING chart[start][start][ stoi_setOfNonterminals[preterminal]].fill_( log(count) - log(nonAndPreterminals[preterminal] + OOV_COUNT + OTHER_WORDS_SMOOTHING * len(wordCounts))) assert start == start + length - 1 else: for start2 in range(start + 1, len(linearized)): left = chart[start][start2 - 1].view(-1) right = chart[start2][start + length - 1].view(-1) maxLeft = torch.max(left) maxRight = torch.max(right) if float(maxLeft) == float("-inf") or float( maxRight) == float("-inf"): # everything will be 0 continue # VERSION WITH TENSORDOT # resultLeft = torch.tensordot(torch.exp(left-maxLeft), binary_rules_matrix, dims=([0], [1])) # resultTotal = torch.tensordot(resultLeft, torch.exp(right-maxRight), dims=([1], [0])) # VERSION WITH BILINEAR # print(left.size(), right.size()) resultTotal = torch.nn.functional.bilinear( torch.exp(left - maxLeft), torch.exp(right - maxRight), binary_rules_matrix) #print(resultTotal) #print(resultTotal2) #assert (resultTotal-resultTotal2).abs().max() < 1e-5, (resultTotal-resultTotal2).abs().max() # quit() resultTotalLog = torch.log(resultTotal) + (maxLeft + maxRight) resultTotalLog[resultTotal <= 0].fill_(float("-inf")) entry = chart[start][start + length - 1] #assert "nan" not in str(entry.max()) #assert "nan" not in str(resultTotalLog.max()) chart[start][start + length - 1] = logSumExp( resultTotalLog.view(-1, BATCHSIZE), entry) #assert "nan" not in str(chart[start][start+length-1].max()) ############################# # Now consider different endpoints valuesPerBoundary = [0] for BOUNDARY in range(LEFT_CONTEXT + 1, len(linearized) + 1): chartFromStart = [ torch.cuda.FloatTensor([[float("-Inf") for _ in range(BATCHSIZE)] for _ in itos_setOfNonterminals]) for _ in range(BOUNDARY) ] if True: right = chart[BOUNDARY - 1][BOUNDARY - 1].view(-1) right_max = torch.max(right) result = torch.tensordot(invertedLeft, torch.exp(right - right_max), dims=([1], [0])) resultLog = (torch.log(result) + right_max).view(-1, BATCHSIZE) chartFromStart[BOUNDARY - 1] = resultLog for start in range( BOUNDARY )[:: -1]: # now construct potential constituents that start at `start', but end outside of the portion for start2 in range(start + 1, BOUNDARY): left = chart[start][start2 - 1].view(-1) right = chartFromStart[start2].view(-1) maxLeft = torch.max(left) maxRight = torch.max(right) if float(maxLeft) == float("-inf") or float(maxRight) == float( "-inf"): # everything will be 0 continue resultLeft = torch.tensordot(torch.exp(left - maxLeft), binary_rules_matrix, dims=([0], [1])) resultTotal = torch.tensordot(resultLeft, torch.exp(right - maxRight), dims=([1], [0])) # resultTotalLog = torch.log(resultTotal)+maxLeft+maxRight # resultTotalLog[resultTotal <= 0].fill_(float("-inf")) # resultTotalLog_max = torch.max(resultTotalLog) result = torch.tensordot(invertedLeft, resultTotal, dims=([1], [0])) resultLog = (torch.log(result) + (maxLeft + maxRight)).view( -1, BATCHSIZE) resultLog[result <= 0].fill_(float("-inf")) chartFromStart[start] = logSumExp(chartFromStart[start], resultLog) # for root in itos_setOfNonterminals: # count = roots.get(root, 0) # iroot = stoi_setOfNonterminals[root] # if chartFromStart[0][iroot] is not None: # if count == 0: # chartFromStart[0][iroot] = torch.cuda.FloatTensor([float("-Inf") for _ in range(BATCHSIZE)]) # else: # chartFromStart[0][iroot] += log(count) - log(roots["__TOTAL__"]) # prefixProb = float( chartFromStart[0][stoi_setOfNonterminals["_SENTENCES_"]] ) #log(sum([exp(float(x[0])) if x[0] is not None else 0 for x in chartFromStart[0]])) # log P(S|root) -- the full mass comprising all possible trees (including spurious ambiguities arising from the PCFG conversion) surprisalTableSums[BOUNDARY - 1] += prefixProb surprisalTableCounts[BOUNDARY - 1] += 1 valuesPerBoundary.append(prefixProb) print(BOUNDARY, prefixProb, linearized) assert prefixProb < valuesPerBoundary[-2], "bug or numerical problem?"
def forward(self, x): assert(x.shape[-1] == self.input_dims) raw_freqs = torch.tensordot(x, self.bands, dims=0) raw_freqs = raw_freqs.reshape(x.shape[:-1] + (-1,)) return torch.cat([ raw_freqs.sin(), raw_freqs.cos() ], dim=-1)
def compute_expectation(self, costs): """ Returns a differentiable expected cost, summing over costs at given ordinals. :param dict costs: A dict mapping ordinals to lists of cost tensors :returns: a scalar expected cost :rtype: torch.Tensor or float """ # Share computation across all cost terms. with shared_intermediates() as cache: ring = MarginalRing(cache=cache) expected_cost = 0. for ordinal, cost_terms in costs.items(): log_factors = self._get_log_factors(ordinal) scale = math.exp( sum(x for x in log_factors if not isinstance(x, torch.Tensor))) log_factors = [ x for x in log_factors if isinstance(x, torch.Tensor) ] # Collect log_prob terms to query for marginal probability. queries = { frozenset(cost._pyro_dims): None for cost in cost_terms } for log_factor in log_factors: key = frozenset(log_factor._pyro_dims) if queries.get(key, False) is None: queries[key] = log_factor # Ensure a query exists for each cost term. for cost in cost_terms: key = frozenset(cost._pyro_dims) if queries[key] is None: query = torch.zeros_like(cost) query._pyro_dims = cost._pyro_dims log_factors.append(query) queries[key] = query # Perform sum-product contraction. Note that plates never need to be # product-contracted due to our plate-based dependency ordering. sum_dims = set().union(*(x._pyro_dims for x in log_factors)) - ordinal for query in queries.values(): require_backward(query) root = ring.sumproduct(log_factors, sum_dims) root._pyro_backward() probs = { key: query._pyro_backward_result.exp() for key, query in queries.items() } # Aggregate prob * cost terms. for cost in cost_terms: key = frozenset(cost._pyro_dims) prob = probs[key] prob._pyro_dims = queries[key]._pyro_dims mask = prob > 0 if torch._C._get_tracing_state() or not mask.all(): mask._pyro_dims = prob._pyro_dims cost, prob, mask = packed.broadcast_all( cost, prob, mask) prob = prob.masked_select(mask) cost = cost.masked_select(mask) else: cost, prob = packed.broadcast_all(cost, prob) expected_cost = expected_cost + scale * torch.tensordot( prob, cost, prob.dim()) LAST_CACHE_SIZE[0] = count_cached_ops(cache) return expected_cost
def forward(self, betas, pose, trans, simplify=False): """ Construct a compute graph that takes in parameters and outputs a tensor as model vertices. Face indices are also returned as a numpy ndarray. 20190128: Add batch support. Parameters: --------- pose: Also known as 'theta', an [N, 24, 3] tensor indicating child joint rotation relative to parent joint. For root joint it's global orientation. Represented in a axis-angle format. betas: Parameter for model shape. A tensor of shape [N, 10] as coefficients of PCA components. Only 10 components were released by SMPL author. trans: Global translation tensor of shape [N, 3]. Return: ------ A 3-D tensor of [N * 6890 * 3] for vertices, and the corresponding [N * 19 * 3] joint positions. """ batch_num = betas.shape[0] id_to_col = {self.kintree_table[1, i]: i for i in range(self.kintree_table.shape[1])} parent = { i: id_to_col[self.kintree_table[0, i]] for i in range(1, self.kintree_table.shape[1]) } print("v_t",self.v_template.shape) temp=torch.tensordot(betas, self.shapedirs, dims=([1], [2])) v_shaped =self.v_template.unsqueeze(0).expand(betas.size(0), 6890, 3)#self.v_template #torch.tensordot(betas, self.shapedirs, dims=([1], [2])) + self.v_template print("temp",temp.shape) print("v_s",v_shaped.shape) J = torch.matmul(self.J_regressor, v_shaped) R_cube_big = self.rodrigues(pose.contiguous().view(-1, 1, 3)).reshape(batch_num, -1, 3, 3) if simplify: v_posed = v_shaped else: R_cube = R_cube_big[:, 1:, :, :] I_cube = (torch.eye(3, dtype=torch.float32).unsqueeze(dim=0) + \ torch.zeros((batch_num, R_cube.shape[1], 3, 3), dtype=torch.float32)).to(self.device) lrotmin = (R_cube - I_cube).reshape(batch_num, -1, 1).squeeze(dim=2) v_posed = v_shaped + torch.tensordot(lrotmin, self.posedirs, dims=([1], [2])) results = [] results.append( self.with_zeros(torch.cat((R_cube_big[:, 0], torch.reshape(J[:, 0, :], (-1, 3, 1))), dim=2)) ) for i in range(1, self.kintree_table.shape[1]): results.append( torch.matmul( results[parent[i]], self.with_zeros( torch.cat( (R_cube_big[:, i], torch.reshape(J[:, i, :] - J[:, parent[i], :], (-1, 3, 1))), dim=2 ) ) ) ) stacked = torch.stack(results, dim=1) results = stacked - \ self.pack( torch.matmul( stacked, torch.reshape( torch.cat((J, torch.zeros((batch_num, 24, 1), dtype=torch.float32).to(self.device)), dim=2), (batch_num, 24, 4, 1) ) ) ) # Restart from here T = torch.tensordot(results, self.weights, dims=([1], [1])).permute(0, 3, 1, 2) rest_shape_h = torch.cat( (v_posed, torch.ones((batch_num, v_posed.shape[1], 1), dtype=torch.float32).to(self.device)), dim=2 ) v = torch.matmul(T, torch.reshape(rest_shape_h, (batch_num, -1, 4, 1))) v = torch.reshape(v, (batch_num, -1, 4))[:, :, :3] result = v + torch.reshape(trans, (batch_num, 1, 3)) # estimate 3D joint locations # print(result.shape) # print(self.joint_regressor.shape) joints = torch.tensordot(result, self.joint_regressor.transpose(1, 0), dims=([1], [0])).transpose(1, 2) return result, joints
def forward(self, features, paths_indices, other_info): """ features: n_nodes x (input_path_size) x input_size paths_indices: n_paths x path_size (values < n_nodes) output: n_nodes x ((input_path_size) x path_size) x input_size """ # convolution self.normalize_() norms = features.norm(dim=-1, keepdim=True) # norms: n_nodes x (input_path_size) x 1 #output = features / norms.clamp(min=EPS) output = torch.tensordot(features, self.weight, dims=[[-1], [-1]]) output = output / norms.clamp(min=EPS).unsqueeze(2) n_nodes = output.shape[0] if output.ndim == 4: output = output.permute(0, 2, 1, 3).contiguous() # output: n_nodes x path_size x (input_path_size) x hidden_size ## prepare masks mask = None if self.aggregation: mask = [None for _ in range(self.path_size)] if 'mask' in other_info and self.path_size > 1: mask = other_info['mask'] output = output.view(n_nodes, self.path_size, -1) # output: n_nodes x path_size x (input_path_size x hidden_size) if self.aggregation: outputs = [] for i in range(self.path_size): embeded = path_conv_agg(output, paths_indices[i], other_info['n_paths'][i], self.pooling, self.kappa, self.d_kappa, mask[i]) outputs.append(embeded) output = torch.stack(outputs, dim=0) output = output.view(self.path_size, -1, self.hidden_size) # output: path_size x (n_nodes x (input_path_size)) x hidden_size output = norms.view(1, -1, 1) * output else: output = path_conv_agg(output, paths_indices[self.path_size - 1], other_info['n_paths'][self.path_size - 1], self.pooling, self.kappa, self.d_kappa, mask) # output: n_nodes x ((input_path_size) x hidden_size) output = output.view(n_nodes, -1, self.hidden_size) output = norms.view(n_nodes, -1, 1) * output # output: n_nodes x (input_path_size) x hidden_size lintrans = self._compute_lintrans() # linear transformation if self.aggregation: output = output.bmm(lintrans) # output = output.view(self.path_size, n_nodes, -1, self.hidden_size) output = output.permute(1, 0, 2) output = output.reshape(n_nodes, -1, self.hidden_size) output = output.contiguous() else: output = torch.tensordot(output, lintrans, dims=[[-1], [-1]]) # output: n_nodes x ((input_path_size) x path_size) x hidden_size return output
def rdm1x1(coord, state, env, verbosity=0): r""" :param coord: vertex (x,y) for which reduced density matrix is constructed :param state: underlying wavefunction :param env: environment corresponding to ``state`` :param verbosity: logging verbosity :type coord: tuple(int,int) :type state: IPEPS :type env: ENV :type verbosity: int :return: 1-site reduced density matrix with indices :math:`s;s'` :rtype: torch.tensor Computes 1-site reduced density matrix :math:`\rho_{1x1}` centered on vertex ``coord`` by contracting the following tensor network:: C--T-----C | | | T--A^+A--T | | | C--T-----C where the physical indices `s` and `s'` of on-site tensor :math:`A` at vertex ``coord`` and it's hermitian conjugate :math:`A^\dagger` are left uncontracted """ # C(-1,-1)--1->0 # 0 # 0 # T(-1,0)--2 # 1 rdm = torch.tensordot(env.C[(coord, (-1, -1))], env.T[(coord, (-1, 0))], ([0], [0])) if verbosity > 0: print("rdm=CT " + str(rdm.size())) # C(-1,-1)--0 # | # T(-1,0)--2->1 # 1 # 0 # C(-1,1)--1->2 rdm = torch.tensordot(rdm, env.C[(coord, (-1, 1))], ([1], [0])) if verbosity > 0: print("rdm=CTC " + str(rdm.size())) # C(-1,-1)--0 # | # T(-1,0)--1 # | 0->2 # C(-1,1)--2 1--T(0,1)--2->3 rdm = torch.tensordot(rdm, env.T[(coord, (0, 1))], ([2], [1])) if verbosity > 0: print("rdm=CTCT " + str(rdm.size())) # TODO - more efficent contraction with uncontracted-double-layer on-site tensor # Possibly reshape indices 1,2 of rdm, which are to be contracted with # on-site tensor and contract bra,ket in two steps instead of creating # double layer tensor # / # --A-- # /|s # # s'|/ # --A-- # / # dimsA = state.site(coord).size() a = torch.einsum('mefgh,nabcd->eafbgchdmn',state.site(coord),state.site(coord)).contiguous()\ .view(dimsA[1]**2, dimsA[2]**2, dimsA[3]**2, dimsA[4]**2, dimsA[0], dimsA[0]) # C(-1,-1)--0 # | # | 0->2 # T(-1,0)--1 1--a--3 # | 2\45(s,s') # | 2 # C(-1,1)-------T(0,1)--3->1 rdm = torch.tensordot(rdm, a, ([1, 2], [1, 2])) if verbosity > 0: print("rdm=CTCTa " + str(rdm.size())) # C(-1,-1)--0 0--T(0,-1)--2->0 # | 1 # | 2 # T(-1,0)--------a--3->2 # | |\45->34(s,s') # | | # C(-1,1)--------T(0,1)--1 rdm = torch.tensordot(env.T[(coord, (0, -1))], rdm, ([0, 1], [0, 2])) if verbosity > 0: print("rdm=CTCTaT " + str(rdm.size())) # C(-1,-1)--T(0,-1)--0 0--C(1,-1) # | | 1->0 # | | # T(-1,0)---a--2 # | |\34(s,s') # | | # C(-1,1)---T(0,1)--0->1 rdm = torch.tensordot(env.C[(coord, (1, -1))], rdm, ([0], [0])) if verbosity > 0: print("rdm=CTCTaTC " + str(rdm.size())) # C(-1,-1)--T(0,-1)-----C(1,-1) # | | 0 # | | 0 # T(-1,0)---a--2 1------T(1,0) # | |\34->23(s,s') 2->0 # | | # C(-1,1)---T(0,1)--1 rdm = torch.tensordot(env.T[(coord, (1, 0))], rdm, ([0, 1], [0, 2])) if verbosity > 0: print("rdm=CTCTaTCT " + str(rdm.size())) # C(-1,-1)--T(0,-1)--------C(1,-1) # | | | # | | | # T(-1,0)---a--------------T(1,0) # | |\23->12(s,s') 0 # | | 0 # C(-1,1)---T(0,1)--1 1----C(1,1) rdm = torch.tensordot(rdm, env.C[(coord, (1, 1))], ([0, 1], [0, 1])) if verbosity > 0: print("rdm=CTCTaTCTC " + str(rdm.size())) # normalize rdm = rdm / torch.trace(rdm) return rdm
def rgbtogray(image): # image1 = torch.rand(1,3,2,2) rgb_weights = torch.Tensor([0.2989, 0.5870, 0.1140]).to(image.get_device()) input_r = torch.tensordot(image, rgb_weights, dims=([-3], [-1])) input_r = input_r.unsqueeze(-3) return input_r
def objective_with_mat(D, F, P): A = P @ D @ P.T if has_torch and isinstance(D, torch.Tensor): return torch.tensordot(F, A, dims=2).cpu() else: return np.tensordot(F, A, axes=2)
def forward(self, inputs, embed=True): if embed: return torch.nn.functional.embedding(inputs, self.w) else: return torch.tensordot(inputs, self.w.t(), 1) + self.b
def tensordot(): a = torch.arange(60.).reshape(3, 4, 5) b = torch.arange(24.).reshape(4, 3, 2) torch.tensordot(a, b, dims=([1, 0], [0, 1]))
def one_eucl_rnn_transform(W, h, U, x, b): W_otimes_h = torch.tensordot(h, W, dims=([-1], [1])) U_otimes_x = torch.tensordot(x, U, dims=([-1], [1])) return W_otimes_h + U_otimes_x + b
def forward(self, input): x = input y = torch.tensordot(x.permute(0, 2, 3, 1), self.weight, dims=1) + self.bias return y.permute(0, 3, 1, 2)
class DefaultQubitTorch(DefaultQubit): """Simulator plugin based on ``"default.qubit"``, written using PyTorch. **Short name:** ``default.qubit.torch`` This device provides a pure-state qubit simulator written using PyTorch. As a result, it supports classical backpropagation as a means to compute the Jacobian. This can be faster than the parameter-shift rule for analytic quantum gradients when the number of parameters to be optimized is large. To use this device, you will need to install PyTorch: .. code-block:: console pip install torch>=1.8.0 **Example** The ``default.qubit.torch`` is designed to be used with end-to-end classical backpropagation (``diff_method="backprop"``) and the PyTorch interface. This is the default method of differentiation when creating a QNode with this device. Using this method, the created QNode is a 'white-box', and is tightly integrated with your PyTorch computation: .. code-block:: python dev = qml.device("default.qubit.torch", wires=1) @qml.qnode(dev, interface="torch", diff_method="backprop") def circuit(x): qml.RX(x[1], wires=0) qml.Rot(x[0], x[1], x[2], wires=0) return qml.expval(qml.PauliZ(0)) >>> weights = torch.tensor([0.2, 0.5, 0.1], requires_grad=True) >>> res = circuit(weights) >>> res.backward() >>> print(weights.grad) tensor([-2.2527e-01, -1.0086e+00, 1.3878e-17]) Autograd mode will also work when using classical backpropagation: >>> def cost(weights): ... return torch.sum(circuit(weights)**3) - 1 >>> res = circuit(weights) >>> res.backward() >>> print(weights.grad) tensor([-4.5053e-01, -2.0173e+00, 5.9837e-17]) Executing the pipeline in PyTorch will allow the whole computation to be run on the GPU, and therefore providing an acceleration. Your parameters need to be instantiated on the same device as the backend device. .. code-block:: python dev = qml.device("default.qubit.torch", wires=1, torch_device='cuda') @qml.qnode(dev, interface="torch", diff_method="backprop") def circuit(x): qml.RX(x[1], wires=0) qml.Rot(x[0], x[1], x[2], wires=0) return qml.expval(qml.PauliZ(0)) >>> weights = torch.tensor([0.2, 0.5, 0.1], requires_grad=True, device='cuda') >>> res = circuit(weights) >>> res.backward() >>> print(weights.grad) tensor([-2.2527e-01, -1.0086e+00, 2.9919e-17], device='cuda:0') There are a couple of things to keep in mind when using the ``"backprop"`` differentiation method for QNodes: * You must use the ``"torch"`` interface for classical backpropagation, as PyTorch is used as the device backend. * Only exact expectation values, variances, and probabilities are differentiable. When instantiating the device with ``shots!=None``, differentiating QNode outputs will result in an error. If you wish to use a different machine-learning interface, or prefer to calculate quantum gradients using the ``parameter-shift`` or ``finite-diff`` differentiation methods, consider using the ``default.qubit`` device instead. Args: wires (int, Iterable): Number of subsystems represented by the device, or iterable that contains unique labels for the subsystems. Default 1 if not specified. shots (None, int): How many times the circuit should be evaluated (or sampled) to estimate the expectation values. Defaults to ``None`` if not specified, which means that the device returns analytical results. If ``shots > 0`` is used, the ``diff_method="backprop"`` QNode differentiation method is not supported and it is recommended to consider switching device to ``default.qubit`` and using ``diff_method="parameter-shift"``. torch_device='cpu' (str): the device on which the computation will be run, e.g., ``'cpu'`` or ``'cuda'`` """ name = "Default qubit (Torch) PennyLane plugin" short_name = "default.qubit.torch" C_DTYPE = torch.complex128 R_DTYPE = torch.float64 _abs = staticmethod(torch.abs) _einsum = staticmethod(torch.einsum) _flatten = staticmethod(torch.flatten) _reshape = staticmethod(torch.reshape) _roll = staticmethod(torch.roll) _stack = staticmethod(lambda arrs, axis=0, out=None: torch.stack(arrs, axis=axis, out=out)) _tensordot = staticmethod( lambda a, b, axes: torch.tensordot( a, b, axes if isinstance(axes, int) else tuple(map(list, axes)) ) ) _transpose = staticmethod(lambda a, axes=None: a.permute(*axes)) _asnumpy = staticmethod(lambda x: x.cpu().numpy()) _conj = staticmethod(torch.conj) _real = staticmethod(torch.real) _imag = staticmethod(torch.imag) _norm = staticmethod(torch.norm) _flatten = staticmethod(torch.flatten) def __init__(self, wires, *, shots=None, analytic=None, torch_device=None): # Store if the user specified a Torch device. Otherwise the execute # method attempts to infer the Torch device from the gate parameters. self._torch_device_specified = torch_device is not None self._torch_device = torch_device super().__init__(wires, shots=shots, cache=0, analytic=analytic) # Move state to torch device (e.g. CPU, GPU, XLA, ...) self._state.requires_grad = True self._state = self._state.to(self._torch_device) self._pre_rotated_state = self._state @staticmethod def _get_parameter_torch_device(ops): """An auxiliary function to determine the Torch device specified for the gate parameters of the input operations. Returns the first CUDA Torch device found (if any) using a string format. Does not handle tensors put on multiple CUDA Torch devices. Such a case raises an error with Torch. If CUDA is not used with any of the parameters, then specifies the CPU if the parameters are on the CPU or None if there were no parametric operations. Args: ops (list[Operator]): list of operations to check Returns: str or None: The string of the Torch device determined or None if there is no data for any operations. """ par_torch_device = None for op in ops: for data in op.data: # Using hasattr in case we don't have a Torch tensor as input if hasattr(data, "is_cuda"): if data.is_cuda: # pragma: no cover return ":".join([data.device.type, str(data.device.index)]) par_torch_device = "cpu" return par_torch_device def execute(self, circuit, **kwargs): ops_and_obs = circuit.operations + circuit.observables par_torch_device = self._get_parameter_torch_device(ops_and_obs) if not self._torch_device_specified: self._torch_device = par_torch_device # If we've changed the device of the parameters between device # executions, need to move the state to the correct Torch device if self._state.device != self._torch_device: self._state = self._state.to(self._torch_device) else: if par_torch_device is not None: # pragma: no cover params_cuda_device = "cuda" in par_torch_device specified_device_cuda = "cuda" in self._torch_device # Raise a warning if there's a mismatch between the specified and # used Torch devices if params_cuda_device != specified_device_cuda: warnings.warn( f"Torch device {self._torch_device} specified " "upon PennyLane device creation does not match the " "Torch device of the gate parameters; " f"{self._torch_device} will be used." ) return super().execute(circuit, **kwargs) def _asarray(self, a, dtype=None): if isinstance(a, list): # Handle unexpected cases where we don't have a list of tensors if not isinstance(a[0], torch.Tensor): res = np.asarray(a) res = torch.from_numpy(res) else: res = torch.cat([torch.reshape(i, (-1,)) for i in a], dim=0) res = torch.cat([torch.reshape(i, (-1,)) for i in res], dim=0) else: res = torch.as_tensor(a, dtype=dtype) res = torch.as_tensor(res, device=self._torch_device) return res _cast = _asarray @staticmethod def _dot(x, y): if x.device != y.device: if x.device != "cpu": return torch.tensordot(x, y.to(x.device), dims=1) if y.device != "cpu": return torch.tensordot(x.to(y.device), y, dims=1) return torch.tensordot(x, y, dims=1) @staticmethod def _reduce_sum(array, axes): if not axes: return array return torch.sum(array, dim=axes) @staticmethod def _conj(array): if isinstance(array, torch.Tensor): return torch.conj(array) return np.conj(array) @staticmethod def _scatter(indices, array, new_dimensions): # `array` is now a torch tensor tensor = array new_tensor = torch.zeros(new_dimensions, dtype=tensor.dtype, device=tensor.device) new_tensor[indices] = tensor return new_tensor @classmethod def capabilities(cls): capabilities = super().capabilities().copy() capabilities.update(passthru_interface="torch", supports_reversible_diff=False) return capabilities def _get_unitary_matrix(self, unitary): """Return the matrix representing a unitary operation. Args: unitary (~.Operation): a PennyLane unitary operation Returns: torch.Tensor[complex]: Returns a 2D matrix representation of the unitary in the computational basis, or, in the case of a diagonal unitary, a 1D array representing the matrix diagonal. """ if unitary in diagonal_in_z_basis: return self._asarray(unitary.eigvals, dtype=self.C_DTYPE) return self._asarray(unitary.matrix, dtype=self.C_DTYPE) def sample_basis_states(self, number_of_states, state_probability): """Sample from the computational basis states based on the state probability. This is an auxiliary method to the ``generate_samples`` method. Args: number_of_states (int): the number of basis states to sample from state_probability (torch.Tensor[float]): the computational basis probability vector Returns: List[int]: the sampled basis states """ return super().sample_basis_states( number_of_states, state_probability.cpu().detach().numpy() )
def my_tensordort_perm(a, b, dims=None, perm=None): return torch.tensordot(a, b, dims=dims).sum(3).permute(perm)
def computeSurprisals(linearized): assert len(linearized[0]) == args.MAX_BOUNDARY assert len(linearized) == args.BATCHSIZE # Presumably unnecessary for x in chart: for y in x: y.fill_(float("-Inf")) for length in range(1, args.MAX_BOUNDARY+1): # the NUMBER of words spanned. start+length is the first word OUTSIDE the constituent for start in range(args.MAX_BOUNDARY): # the index of the first word taking part in the thing if start+length-1 >= args.MAX_BOUNDARY: continue if length == 1: if start < args.LEFT_CONTEXT: for preterminal in terminals: chart[start][start][:,stoi_setOfNonterminals[preterminal]].fill_(0) else: lexical_tensor = torch.LongTensor([0 for _ in range(args.BATCHSIZE)]) for batch in range(args.BATCHSIZE): if wordCounts.get(linearized[batch][start],0) < args.OOV_THRESHOLD: # OOV lexical_tensor[batch] = stoi["_OOV_"] else: lexical_tensor[batch] = stoi[linearized[batch][start]] lexical_tensor = lexical_tensor.cuda() chart[start][start] = torch.nn.functional.embedding(input=lexical_tensor, weight=lexicalProbabilities_matrix) assert start == start+length-1 else: entries = [] for start2 in range(start+1, args.MAX_BOUNDARY): left = chart[start][start2-1] right = chart[start2][start+length-1] maxLeft = torch.max(left) #, dim=1, keepdim=True)[0] maxRight = torch.max(right) #, dim=1, keepdim=True)[0] if float(maxLeft) == float("-inf") or float(maxRight) == float("-inf"): # everything will be 0 continue resultLeft = torch.tensordot(torch.exp(left-maxLeft), binary_rules_matrix, dims=([1], [1])) resultTotal = torch.bmm(resultLeft, torch.exp(right-maxRight).view(args.BATCHSIZE, -1, 1)).squeeze(2) resultTotal = torch.nn.functional.relu(resultTotal) # because some values end up being slightly negative in result resultTotalLog = torch.log(resultTotal)+(maxLeft+maxRight) entries.append(resultTotalLog) chart[start][start+length-1] = logSumExpList(entries) ############################# # Now consider different endpoints valuesPerBoundary = [0] for BOUNDARY in range(1, args.MAX_BOUNDARY+1): chartFromStart = [torch.cuda.FloatTensor([[float("-Inf") for _ in itos_setOfNonterminals] for _ in range(args.BATCHSIZE)]) for _ in range(BOUNDARY)] if True: right = chart[BOUNDARY-1][BOUNDARY-1] right_max = torch.max(right) result = torch.tensordot(torch.exp(right-right_max), invertedLeft, dims=([1], [1])) resultLog = (torch.log(result) + right_max) chartFromStart[BOUNDARY-1] = resultLog for start in range(BOUNDARY-1)[::-1]: # now construct potential constituents that start at `start', but end outside of the portion entries = [] for start2 in range(start+1, BOUNDARY): left = chart[start][start2-1] right = chartFromStart[start2] maxLeft = torch.max(left) maxRight = torch.max(right) if float(maxLeft) == float("-inf") or float(maxRight) == float("-inf"): # everything will be 0 continue resultLeft = torch.tensordot(torch.exp(left-maxLeft), binary_rules_matrix, dims=([1], [1])) resultTotal = torch.bmm(resultLeft, torch.exp(right-maxRight).view(args.BATCHSIZE, -1, 1)).squeeze(2) result = torch.tensordot(resultTotal, invertedLeft, dims=([1], [1])) result = torch.nn.functional.relu(result) # because some values end up being slightly negative in result resultLog = (torch.log(result) + (maxLeft+maxRight)) entries.append(resultLog) chartFromStart[start] = logSumExpList(entries) prefixProb = float(chartFromStart[0][:,stoi_setOfNonterminals["_SENTENCES_"]].sum()) #log(sum([exp(float(x[0])) if x[0] is not None else 0 for x in chartFromStart[0]])) # log P(S|root) -- the full mass comprising all possible trees (including spurious ambiguities arising from the PCFG conversion) surprisalTableSums[BOUNDARY-1] += prefixProb surprisalTableCounts[BOUNDARY-1] += args.BATCHSIZE valuesPerBoundary.append(prefixProb) print(BOUNDARY, prefixProb/args.BATCHSIZE, linearized[0]) assert prefixProb/args.BATCHSIZE - 0.01 < valuesPerBoundary[-2]/args.BATCHSIZE, ("bug or numerical problem?", (prefixProb/args.BATCHSIZE, valuesPerBoundary[-2]/args.BATCHSIZE))
def apply_TM_2sO(state, env, edge, op=None, verbosity=0): r""" :param state: underlying 1-site C4v symmetric wavefunction :param env: C4v symmetric environment corresponding to ``state`` :param edge: tensor of dimensions :math:`\chi \times D^2 \times \chi` :param op: two-site operator to be inserted into the two consecutive transfer matrices :param verbosity: logging verbosity :type state: IPEPS_C4V :type env: ENV_C4V :type edge: torch.tensor :type op: torch.tensor :type verbosity: int :return: ``edge`` with two transfer matrices (and operator ``op``, if any) applied. The resulting tensor has an identical index structure as the original ``edge`` :rtype: torch.tensor Applies two transfer matrices to the ``edge`` tensor, including the two-site operator ``op`` by contracting the following network:: -----T-------------T------------ | | | edge--(a^+ op_l a)==(a^+ op_r a)-- | | | -----T-------------T------------ where the physical indices `s` and `s'` of the on-site tensor :math:`a` and it's hermitian conjugate :math:`a^\dagger` are contracted with identity :math:`\delta_{s,s'}` or ``op_l`` and ``op_r`` if ``op`` is supplied. The ``op_l`` and ``op_r`` are given by the SVD decomposition of two-site operator ``op``:: 0 1 0 1 0 1->0 | | SVD | | | | | op | = |op_l|--(S--|op^~_r|) = |op_l|--2 2--|op_r| | | | | | | 2 3 2 3 2->1 3->1 """ # TODO stronger verification if op is not None: assert (len(op.size()) == 4) # pre-process ``op`` # TODO possibly truncate/compress according to the vanishingly small singular values dims_op = op.size() op_mat = op.permute(0, 2, 1, 3).contiguous().reshape(dims_op[0]**2, dims_op[0]**2) op_l, s, op_r = torch.svd(op_mat) op_l = op_l.reshape(dims_op[0], dims_op[0], s.size()[0]) op_r = torch.einsum('i,ij->ij', s, op_r.t()).reshape(s.size()[0], dims_op[0], dims_op[0]) op_r = op_r.permute(1, 2, 0).contiguous() T = env.T[env.keyT] # Assume index structure of ``edge`` tensor to be as follows # # -- 0 # edge |-- 1 # -- 2 # # ----0 0--T--1->2 # | 2->3 # edge--1->0 # | # ----2->1 E = torch.tensordot(edge, T, ([0], [0])) if verbosity > 0: print("E=edgeT " + str(E.size())) # TODO - more efficent contraction with uncontracted-double-layer on-site tensor # Possibly reshape indices 1,2 of E, which are to be contracted with # on-site tensor and contract bra,ket in two steps instead of creating # double layer tensor # / # --A-- # /|s # X # s'|/ # --A-- # / # # where X is Id or op a = next(iter(state.sites.values())) dims_a = a.size() X = torch.eye(dims_a[0], dtype=a.dtype, device=a.device)[:, :, None] if op is None else op_l A= torch.einsum('mefgh,mnl,nabcd->eafbgchdl',a,X,a).contiguous()\ .view(dims_a[1]**2, dims_a[2]**2, dims_a[3]**2, dims_a[4]**2, -1) # ---------T--2->1 # | 3 4 # | 0/ # edge--0 1--A--3 # | 2 # ----1->0 E = torch.tensordot(E, A, ([0, 3], [1, 0])) if verbosity > 0: print("E=EA " + str(E.size())) # -------T--1->0 # | | 4->2 # | |/ # edge-----A--3->1 # | 2 # | 2 # --0 0--T--1->3 E = torch.tensordot(E, T, ([0, 2], [0, 2])) if verbosity > 0: print("E=ET " + str(E.size())) # ----0 0----T--1->3 # |----2->1 2->4 # edge--1->0 # | # ----3->2 E = torch.tensordot(E, T, ([0], [0])) if verbosity > 0: print("E=ET " + str(E.size())) # TODO - more efficent contraction with uncontracted-double-layer on-site tensor # Possibly reshape indices 1,2 of E, which are to be contracted with # on-site tensor and contract bra,ket in two steps instead of creating # double layer tensor # / # --A-- # /|s # X # s'|/ # --A-- # / # # where X is Id or op X = torch.eye(dims_a[0], dtype=a.dtype, device=a.device)[:, :, None] if op is None else op_r A= torch.einsum('mefgh,mnl,nabcd->eafbgchdl',a,X,a).contiguous()\ .view(dims_a[1]**2, dims_a[2]**2, dims_a[3]**2, dims_a[4]**2, -1) # ---------T--3->1 # | 4 # |----1 4-\0 # edge--0 1--A--3 # | 2 # ----2->0 E = torch.tensordot(E, A, ([0, 1, 4], [1, 4, 0])) if verbosity > 0: print("E=EA " + str(E.size())) # -------T--1->0 # | | # | | # edge-----A--3->1 # | 2 # | 2 # --0 0--T--1->2 E = torch.tensordot(E, T, ([0, 2], [0, 2])) if verbosity > 0: print("E=ET " + str(E.size())) return E
def rdm2x1(coord, ipeps, env, verbosity=0): r""" :param coord: vertex (x,y) specifies position of 2x1 subsystem :param state: underlying wavefunction :param env: environment corresponding to ``state`` :param verbosity: logging verbosity :type coord: tuple(int,int) :type state: IPEPS :type env: ENV :type verbosity: int :return: 2-site reduced density matrix with indices :math:`s_0s_1;s'_0s'_1` :rtype: torch.tensor Computes 2-site reduced density matrix :math:`\rho_{2x1}` of a horizontal 2x1 subsystem using following strategy: 1. compute four individual corners 2. construct right and left half of the network 3. contract right and left halt to obtain final reduced density matrix :: C--T------------T------------------C = C2x2_LU(coord)--C2x2(coord+(1,0)) | | | | | | T--A^+A(coord)--A^+A(coord+(1,0))--T C2x1_LD(coord)--C2x1(coord+(1,0)) | | | | C--T------------T------------------C The physical indices `s` and `s'` of on-sites tensors :math:`A` (and :math:`A^\dagger`) at vertices ``coord``, ``coord+(1,0)`` are left uncontracted """ #----- building C2x2_LU ---------------------------------------------------- C = env.C[(ipeps.vertexToSite(coord), (-1, -1))] T1 = env.T[(ipeps.vertexToSite(coord), (0, -1))] T2 = env.T[(ipeps.vertexToSite(coord), (-1, 0))] dimsA = ipeps.site(coord).size() a = torch.einsum('mefgh,nabcd->eafbgchdmn',ipeps.site(coord),ipeps.site(coord)).contiguous()\ .view(dimsA[1]**2, dimsA[2]**2, dimsA[3]**2, dimsA[4]**2, dimsA[0], dimsA[0]) # C--10--T1--2 # 0 1 C2x2_LU = torch.tensordot(C, T1, ([1], [0])) # C------T1--2->1 # 0 1->0 # 0 # T2--2->3 # 1->2 C2x2_LU = torch.tensordot(C2x2_LU, T2, ([0], [0])) # C-------T1--1->0 # | 0 # | 0 # T2--3 1 a--3 # 2->1 2\45 C2x2_LU = torch.tensordot(C2x2_LU, a, ([0, 3], [0, 1])) # permute 012345->120345 # reshape (12)(03)45->0123 # C2x2--1 # |\23 # 0 C2x2_LU = C2x2_LU.permute(1,2,0,3,4,5).contiguous().view(\ T1.size()[2]*a.size()[3],T2.size()[1]*a.size()[2],dimsA[0],dimsA[0]) if verbosity > 0: print("C2X2 LU " + str(coord) + "->" + str(ipeps.vertexToSite(coord)) + " (-1,-1): " + str(C2x2_LU.size())) #----- building C2x1_LD ---------------------------------------------------- C = env.C[(ipeps.vertexToSite(coord), (-1, 1))] T2 = env.T[(ipeps.vertexToSite(coord), (0, 1))] # 0 0->1 # C--1 1--T2--2 C2x1_LD = torch.tensordot(C, T2, ([1], [1])) # reshape (01)2->(0)1 # 0 # | # C2x1--1 C2x1_LD = C2x1_LD.view(C.size()[0] * T2.size()[0], T2.size()[2]).contiguous() if verbosity > 0: print("C2X1 LD " + str(coord) + "->" + str(ipeps.vertexToSite(coord)) + " (-1,1): " + str(C2x1_LD.size())) #----- build left part C2x2_LU--C2x1_LD ------------------------------------ # C2x2_LU--1 # |\23 # 0 # 0 # C2x1_LD--1->0 # TODO is it worthy(performance-wise) to instead overwrite one of C2x2_LU,C2x2_RU ? left_half = torch.tensordot(C2x1_LD, C2x2_LU, ([0], [0])) #----- building C2x2_RU ---------------------------------------------------- vec = (1, 0) shitf_coord = ipeps.vertexToSite((coord[0] + vec[0], coord[1] + vec[1])) C = env.C[(shitf_coord, (1, -1))] T1 = env.T[(shitf_coord, (1, 0))] T2 = env.T[(shitf_coord, (0, -1))] dimsA = ipeps.site(shitf_coord).size() a = torch.einsum('mefgh,nabcd->eafbgchdmn',ipeps.site(shitf_coord),ipeps.site(shitf_coord)).contiguous()\ .view(dimsA[1]**2, dimsA[2]**2, dimsA[3]**2, dimsA[4]**2, dimsA[0], dimsA[0]) # 0--C # 1 # 0 # 1--T1 # 2 C2x2_RU = torch.tensordot(C, T1, ([1], [0])) # 2<-0--T2--2 0--C # 3<-1 | # 0<-1--T1 # 1<-2 C2x2_RU = torch.tensordot(C2x2_RU, T2, ([0], [2])) # 1<-2--T2------C # 3 | # 45\0 | # 2<-1--a--3 0--T1 # 3<-2 0<-1 C2x2_RU = torch.tensordot(C2x2_RU, a, ([0, 3], [3, 0])) # permute 012334->120345 # reshape (12)(03)45->0123 # 0--C2x2 # 23/| # 1 C2x2_RU = C2x2_RU.permute(1,2,0,3,4,5).contiguous().view(\ T2.size()[0]*a.size()[1],T1.size()[2]*a.size()[2], dimsA[0], dimsA[0]) if verbosity > 0: print("C2X2 RU " + str((coord[0] + vec[0], coord[1] + vec[1])) + "->" + str(shitf_coord) + " (1,-1): " + str(C2x2_RU.size())) #----- building C2x1_RD ---------------------------------------------------- C = env.C[(shitf_coord, (1, 1))] T1 = env.T[(shitf_coord, (0, 1))] # 1<-0 0 # 2<-1--T1--2 1--C C2x1_RD = torch.tensordot(C, T1, ([1], [2])) # reshape (01)2->(0)1 C2x1_RD = C2x1_RD.view(C.size()[0] * T1.size()[0], T1.size()[1]).contiguous() # 0 # | # 1--C2x1 if verbosity > 0: print("C2X1 RD " + str((coord[0] + vec[0], coord[1] + vec[1])) + "->" + str(shitf_coord) + " (1,1): " + str(C2x1_RD.size())) #----- build right part C2x2_RU--C2x1_RD ----------------------------------- # 1<-0--C2x2_RU # |\23 # 1 # 0 # 0<-1--C2x1_RD right_half = torch.tensordot(C2x1_RD, C2x2_RU, ([0], [1])) # construct reduced density matrix by contracting left and right halfs # C2x2_LU--1 1----C2x2_RU # |\23->01 |\23 # | | # C2x1_LD--0 0----C2x1_RD rdm = torch.tensordot(left_half, right_half, ([0, 1], [0, 1])) # permute into order of s0,s1;s0',s1' where primed indices # represent "ket" # 0123->0213 # and normalize rdm = rdm.permute(0, 2, 1, 3) rdm = rdm / torch.einsum('ijij', rdm) return rdm
def forward(self, x, dims): a = torch.tensordot(x, self.weight, dims=dims) + self.bias return a
def rdm1x2(coord, ipeps, env, verbosity=0): r""" :param coord: vertex (x,y) specifies position of 1x2 subsystem :param state: underlying wavefunction :param env: environment corresponding to ``state`` :param verbosity: logging verbosity :type coord: tuple(int,int) :type state: IPEPS :type env: ENV :type verbosity: int :return: 2-site reduced density matrix with indices :math:`s_0s_1;s'_0s'_1` :rtype: torch.tensor Computes 2-site reduced density matrix :math:`\rho_{1x2}` of a vertical 1x2 subsystem using following strategy: 1. compute four individual corners 2. construct upper and lower half of the network 3. contract upper and lower halt to obtain final reduced density matrix :: C--T------------------C = C2x2_LU(coord)--------C1x2(coord) | | | | | T--A^+A(coord)--------T C2x2_LD(coord+(0,1))--C1x2(coord+0,1)) | | | T--A^+A(coord+(0,1))--T | | | C--T------------------C The physical indices `s` and `s'` of on-sites tensors :math:`A` (and :math:`A^\dagger`) at vertices ``coord``, ``coord+(0,1)`` are left uncontracted """ #----- building C2x2_LU ---------------------------------------------------- C = env.C[(ipeps.vertexToSite(coord), (-1, -1))] T1 = env.T[(ipeps.vertexToSite(coord), (0, -1))] T2 = env.T[(ipeps.vertexToSite(coord), (-1, 0))] dimsA = ipeps.site(coord).size() a = torch.einsum('mefgh,nabcd->eafbgchdmn',ipeps.site(coord),ipeps.site(coord)).contiguous()\ .view(dimsA[1]**2, dimsA[2]**2, dimsA[3]**2, dimsA[4]**2, dimsA[0], dimsA[0]) # C--10--T1--2 # 0 1 C2x2_LU = torch.tensordot(C, T1, ([1], [0])) # C------T1--2->1 # 0 1->0 # 0 # T2--2->3 # 1->2 C2x2_LU = torch.tensordot(C2x2_LU, T2, ([0], [0])) # C-------T1--1->0 # | 0 # | 0 # T2--3 1 a--3 # 2->1 2\45 C2x2_LU = torch.tensordot(C2x2_LU, a, ([0, 3], [0, 1])) # permute 012345->120345 # reshape (12)(03)45->0123 # C2x2--1 # |\23 # 0 C2x2_LU = C2x2_LU.permute(1,2,0,3,4,5).contiguous().view(\ T1.size()[2]*a.size()[3],T2.size()[1]*a.size()[2],dimsA[0],dimsA[0]) if verbosity > 0: print("C2X2 LU " + str(coord) + "->" + str(ipeps.vertexToSite(coord)) + " (-1,-1): " + str(C2x2_LU.size())) #----- building C1x2_RU ---------------------------------------------------- C = env.C[(ipeps.vertexToSite(coord), (1, -1))] T1 = env.T[(ipeps.vertexToSite(coord), (1, 0))] # 0--C # 1 # 0 # 1--T1 # 2 C1x2_RU = torch.tensordot(C, T1, ([1], [0])) # reshape (01)2->(0)1 # 0--C1x2 # 23/| # 1 C1x2_RU = C1x2_RU.view(C.size()[0] * T1.size()[1], T1.size()[2]).contiguous() if verbosity > 0: print("C1X2 RU " + str(coord) + "->" + str(ipeps.vertexToSite(coord)) + " (1,-1): " + str(C1x2_RU.size())) #----- build upper part C2x2_LU--C1x2_RU ----------------------------------- # C2x2_LU--1 0--C1x2_RU # |\23 | # 0->1 1->0 upper_half = torch.tensordot(C1x2_RU, C2x2_LU, ([0], [1])) #----- building C2x2_LD ---------------------------------------------------- vec = (0, 1) shitf_coord = ipeps.vertexToSite((coord[0] + vec[0], coord[1] + vec[1])) C = env.C[(shitf_coord, (-1, 1))] T1 = env.T[(shitf_coord, (-1, 0))] T2 = env.T[(shitf_coord, (0, 1))] dimsA = ipeps.site(shitf_coord).size() a = torch.einsum('mefgh,nabcd->eafbgchdmn',ipeps.site(shitf_coord),ipeps.site(shitf_coord)).contiguous()\ .view(dimsA[1]**2, dimsA[2]**2, dimsA[3]**2, dimsA[4]**2, dimsA[0], dimsA[0]) # 0->1 # T1--2 # 1 # 0 # C--1->0 C2x2_LD = torch.tensordot(C, T1, ([0], [1])) # 1->0 # T1--2->1 # | # | 0->2 # C--0 1--T2--2->3 C2x2_LD = torch.tensordot(C2x2_LD, T2, ([0], [1])) # 0 0->2 # T1--1 1--a--3 # | 2\45 # | 2 # C--------T2--3->1 C2x2_LD = torch.tensordot(C2x2_LD, a, ([1, 2], [1, 2])) # permute 012345->021345 # reshape (02)(13)45->0123 # 0 # |/23 # C2x2--1 C2x2_LD = C2x2_LD.permute(0,2,1,3,4,5).contiguous().view(\ T1.size()[0]*a.size()[0],T2.size()[2]*a.size()[3], dimsA[0], dimsA[0]) if verbosity > 0: print("C2X2 LD " + str((coord[0] + vec[0], coord[1] + vec[1])) + "->" + str(shitf_coord) + " (-1,1): " + str(C2x2_LD.size())) #----- building C2x2_RD ---------------------------------------------------- C = env.C[(shitf_coord, (1, 1))] T2 = env.T[(shitf_coord, (1, 0))] # 0 # 1--T2 # 2 # 0 # 2<-1--C C1x2_RD = torch.tensordot(T2, C, ([2], [0])) # permute 012->021 # reshape 0(12)->0(1) C1x2_RD = C1x2_RD.permute(0, 2, 1).contiguous().view(T2.size()[0], C.size()[1] * T2.size()[1]) # 0 # | # 1--C1x2 if verbosity > 0: print("C1X2 RD " + str((coord[0] + vec[0], coord[1] + vec[1])) + "->" + str(shitf_coord) + " (1,1): " + str(C1x2_RD.size())) #----- build lower part C2x2_LD--C1x2_RD ----------------------------------- # 0->1 0 # |/23 | # C2x2_LD--1 1--C1x2_RD lower_half = torch.tensordot(C1x2_RD, C2x2_LD, ([1], [1])) # construct reduced density matrix by contracting lower and upper halfs # C2x2_LU------C1x2_RU # |\23->01 | # 1 0 # 1 0 # |/23 | # C2x2_LD------C1x2_RD rdm = torch.tensordot(upper_half, lower_half, ([0, 1], [0, 1])) # permute into order of s0,s1;s0',s1' where primed indices # represent "ket" # 0123->0213 # and normalize rdm = rdm.permute(0, 2, 1, 3) rdm = rdm / torch.einsum('ijij', rdm) return rdm
def rdm2x2(coord, ipeps, env, verbosity=0): r""" :param coord: vertex (x,y) specifies upper left site of 2x2 subsystem :param state: underlying wavefunction :param env: environment corresponding to ``state`` :param verbosity: logging verbosity :type coord: tuple(int,int) :type state: IPEPS :type env: ENV :type verbosity: int :return: 4-site reduced density matrix with indices :math:`s_0s_1s_2s_3;s'_0s'_1s'_2s'_3` :rtype: torch.tensor Computes 4-site reduced density matrix :math:`\rho_{2x2}` of 2x2 subsystem specified by the vertex ``coord`` of its upper left corner using strategy: 1. compute four individual corners 2. construct upper and lower half of the network 3. contract upper and lower half to obtain final reduced density matrix :: C--T------------------T------------------C = C2x2_LU(coord)--------C2x2(coord+(1,0)) | | | | | | T--A^+A(coord)--------A^+A(coord+(1,0))--T C2x2_LD(coord+(0,1))--C2x2(coord+(1,1)) | | | | T--A^+A(coord+(0,1))--A^+A(coord+(1,1))--T | | | | C--T------------------T------------------C The physical indices `s` and `s'` of on-sites tensors :math:`A` (and :math:`A^\dagger`) at vertices ``coord``, ``coord+(1,0)``, ``coord+(0,1)``, and ``coord+(1,1)`` are left uncontracted and given in the same order:: s0 s1 s2 s3 """ #----- building C2x2_LU ---------------------------------------------------- C = env.C[(ipeps.vertexToSite(coord), (-1, -1))] T1 = env.T[(ipeps.vertexToSite(coord), (0, -1))] T2 = env.T[(ipeps.vertexToSite(coord), (-1, 0))] dimsA = ipeps.site(coord).size() a = torch.einsum('mefgh,nabcd->eafbgchdmn',ipeps.site(coord),ipeps.site(coord)).contiguous()\ .view(dimsA[1]**2, dimsA[2]**2, dimsA[3]**2, dimsA[4]**2, dimsA[0], dimsA[0]) # C--10--T1--2 # 0 1 C2x2_LU = torch.tensordot(C, T1, ([1], [0])) # C------T1--2->1 # 0 1->0 # 0 # T2--2->3 # 1->2 C2x2_LU = torch.tensordot(C2x2_LU, T2, ([0], [0])) # C-------T1--1->0 # | 0 # | 0 # T2--3 1 a--3 # 2->1 2\45 C2x2_LU = torch.tensordot(C2x2_LU, a, ([0, 3], [0, 1])) # permute 012345->120345 # reshape (12)(03)45->0123 # C2x2--1 # |\23 # 0 C2x2_LU = C2x2_LU.permute(1,2,0,3,4,5).contiguous().view(\ T1.size()[2]*a.size()[3],T2.size()[1]*a.size()[2],dimsA[0],dimsA[0]) if verbosity > 0: print("C2X2 LU " + str(coord) + "->" + str(ipeps.vertexToSite(coord)) + " (-1,-1): " + str(C2x2_LU.size())) #----- building C2x2_RU ---------------------------------------------------- vec = (1, 0) shitf_coord = ipeps.vertexToSite((coord[0] + vec[0], coord[1] + vec[1])) C = env.C[(shitf_coord, (1, -1))] T1 = env.T[(shitf_coord, (1, 0))] T2 = env.T[(shitf_coord, (0, -1))] dimsA = ipeps.site(shitf_coord).size() a = torch.einsum('mefgh,nabcd->eafbgchdmn',ipeps.site(shitf_coord),ipeps.site(shitf_coord)).contiguous()\ .view(dimsA[1]**2, dimsA[2]**2, dimsA[3]**2, dimsA[4]**2, dimsA[0], dimsA[0]) # 0--C # 1 # 0 # 1--T1 # 2 C2x2_RU = torch.tensordot(C, T1, ([1], [0])) # 2<-0--T2--2 0--C # 3<-1 | # 0<-1--T1 # 1<-2 C2x2_RU = torch.tensordot(C2x2_RU, T2, ([0], [2])) # 1<-2--T2------C # 3 | # 45\0 | # 2<-1--a--3 0--T1 # 3<-2 0<-1 C2x2_RU = torch.tensordot(C2x2_RU, a, ([0, 3], [3, 0])) # permute 012334->120345 # reshape (12)(03)45->0123 # 0--C2x2 # 23/| # 1 C2x2_RU = C2x2_RU.permute(1,2,0,3,4,5).contiguous().view(\ T2.size()[0]*a.size()[1],T1.size()[2]*a.size()[2], dimsA[0], dimsA[0]) if verbosity > 0: print("C2X2 RU " + str((coord[0] + vec[0], coord[1] + vec[1])) + "->" + str(shitf_coord) + " (1,-1): " + str(C2x2_RU.size())) #----- build upper part C2x2_LU--C2x2_RU ----------------------------------- # C2x2_LU--1 0--C2x2_RU C2x2_LU------C2x2_RU # |\23->12 |\23->45 & permute |\12->23 |\45 # 0 1->3 0 3->1 # TODO is it worthy(performance-wise) to instead overwrite one of C2x2_LU,C2x2_RU ? upper_half = torch.tensordot(C2x2_LU, C2x2_RU, ([1], [0])) upper_half = upper_half.permute(0, 3, 1, 2, 4, 5) #----- building C2x2_RD ---------------------------------------------------- vec = (1, 1) shitf_coord = ipeps.vertexToSite((coord[0] + vec[0], coord[1] + vec[1])) C = env.C[(shitf_coord, (1, 1))] T1 = env.T[(shitf_coord, (0, 1))] T2 = env.T[(shitf_coord, (1, 0))] dimsA = ipeps.site(shitf_coord).size() a = torch.einsum('mefgh,nabcd->eafbgchdmn',ipeps.site(shitf_coord),ipeps.site(shitf_coord)).contiguous()\ .view(dimsA[1]**2, dimsA[2]**2, dimsA[3]**2, dimsA[4]**2, dimsA[0], dimsA[0]) # 1<-0 0 # 2<-1--T1--2 1--C C2x2_RD = torch.tensordot(C, T1, ([1], [2])) # 2<-0 # 3<-1--T2 # 2 # 0<-1 0 # 1<-2--T1---C C2x2_RD = torch.tensordot(C2x2_RD, T2, ([0], [2])) # 2<-0 1<-2 # 3<-1--a--3 3--T2 # 2\45 | # 0 | # 0<-1--T1------C C2x2_RD = torch.tensordot(C2x2_RD, a, ([0, 3], [2, 3])) # permute 012345->120345 # reshape (12)(03)45->0123 C2x2_RD = C2x2_RD.permute(1,2,0,3,4,5).contiguous().view(\ T2.size()[0]*a.size()[0],T1.size()[1]*a.size()[1], dimsA[0], dimsA[0]) # 0 # |/23 # 1--C2x2 if verbosity > 0: print("C2X2 RD " + str((coord[0] + vec[0], coord[1] + vec[1])) + "->" + str(shitf_coord) + " (1,1): " + str(C2x2_RD.size())) #----- building C2x2_LD ---------------------------------------------------- vec = (0, 1) shitf_coord = ipeps.vertexToSite((coord[0] + vec[0], coord[1] + vec[1])) C = env.C[(shitf_coord, (-1, 1))] T1 = env.T[(shitf_coord, (-1, 0))] T2 = env.T[(shitf_coord, (0, 1))] dimsA = ipeps.site(shitf_coord).size() a = torch.einsum('mefgh,nabcd->eafbgchdmn',ipeps.site(shitf_coord),ipeps.site(shitf_coord)).contiguous()\ .view(dimsA[1]**2, dimsA[2]**2, dimsA[3]**2, dimsA[4]**2, dimsA[0], dimsA[0]) # 0->1 # T1--2 # 1 # 0 # C--1->0 C2x2_LD = torch.tensordot(C, T1, ([0], [1])) # 1->0 # T1--2->1 # | # | 0->2 # C--0 1--T2--2->3 C2x2_LD = torch.tensordot(C2x2_LD, T2, ([0], [1])) # 0 0->2 # T1--1 1--a--3 # | 2\45 # | 2 # C--------T2--3->1 C2x2_LD = torch.tensordot(C2x2_LD, a, ([1, 2], [1, 2])) # permute 012345->021345 # reshape (02)(13)45->0123 # 0 # |/23 # C2x2--1 C2x2_LD = C2x2_LD.permute(0,2,1,3,4,5).contiguous().view(\ T1.size()[0]*a.size()[0],T2.size()[2]*a.size()[3], dimsA[0], dimsA[0]) if verbosity > 0: print("C2X2 LD " + str((coord[0] + vec[0], coord[1] + vec[1])) + "->" + str(shitf_coord) + " (-1,1): " + str(C2x2_LD.size())) #----- build lower part C2x2_LD--C2x2_RD ----------------------------------- # 0 0->3 0 3->1 # |/23->12 |/23->45 & permute |/12->23 |/45 # C2x2_LD--1 1--C2x2_RD C2x2_LD------C2x2_RD # TODO is it worthy(performance-wise) to instead overwrite one of C2x2_LD,C2x2_RD ? lower_half = torch.tensordot(C2x2_LD, C2x2_RD, ([1], [1])) lower_half = lower_half.permute(0, 3, 1, 2, 4, 5) # construct reduced density matrix by contracting lower and upper halfs # C2x2_LU------C2x2_RU # |\23->01 |\45->23 # 0 1 # 0 1 # |/23->45 |/45->67 # C2x2_LD------C2x2_RD rdm = torch.tensordot(upper_half, lower_half, ([0, 1], [0, 1])) # permute into order of s0,s1,s2,s3;s0',s1',s2',s3' where primed indices # represent "ket" # 01234567->02461357 # and normalize rdm = rdm.permute(0, 2, 4, 6, 1, 3, 5, 7) rdm = rdm / torch.einsum('ijklijkl', rdm) return rdm
def evaluate_vae(model, data_loader, epoch: int, device: str, criterion, mu_force_beta_param, eval_type: str = 'valid', iteration: int = 0): model.eval() total_loss: float = 0 total_kl_loss: float = 0 total_nll: float = 0 total_perp: float = 0 total_mu_loss: float = 0 for batch, sent_lengths in data_loader: with torch.no_grad(): inp = batch[:, 0:-1].to(device) # Creat both prediction of next word and the posterior of which we sample Z. # Nr to sample # nr_MC_sample = 10 if eval_type == 'test' else 1 # Did not work out unfortunately nr_MC_sample = 1 if eval_type == 'test' else 1 preds, posterior = model(inp, nr_MC_sample) # If we have multi-log sample, average over the likelihoods on the 0th dimension is_using_multi_samples = nr_MC_sample > 1 if is_using_multi_samples: preds = preds.reshape(nr_MC_sample, batch.shape[0], -1).mean(0) # Define target as the next word to predict target = batch[:, 1:].to(device) # Calc loss by using the ELBO-criterion loss, kl_loss, nll = criterion(preds, target, posterior) # Perplexity perp = calc_batch_perplexity(nll.detach(), sent_lengths) # Calc perplexity # Take mean of mini-batch loss loss = loss.mean() kl_loss = kl_loss.mean() nll = nll.mean() # Now add to the loss mu force loss batch_mean_vectors = posterior.loc avg_batch_mean_vector = batch_mean_vectors.mean(0) mu_force_loss_var = torch.tensordot( batch_mean_vectors - avg_batch_mean_vector, batch_mean_vectors - avg_batch_mean_vector, 2) / batch.shape[0] / 2 mu_force_loss = torch.max(torch.tensor( [0.0]), mu_force_beta_param - mu_force_loss_var).to(device) loss = loss + mu_force_loss total_loss += loss.item() total_kl_loss += kl_loss.item() total_nll += nll.item() total_perp += perp total_mu_loss += mu_force_loss_var.item() total_loss = total_loss / len(data_loader) total_kl_loss = total_kl_loss / len(data_loader) total_nll = total_nll / len(data_loader) total_perp = total_perp / len(data_loader) total_mu_loss = total_mu_loss / len(data_loader) return (total_loss, total_kl_loss, total_nll, total_mu_loss), total_perp