Esempio n. 1
0
def test_topk_nodes():
    # test#1: basic
    g0 = dgl.DGLGraph(nx.path_graph(14))

    feat0 = F.randn((g0.number_of_nodes(), 10))
    g0.ndata['x'] = feat0
    # to test the case where k > number of nodes.
    dgl.topk_nodes(g0, 'x', 20, idx=-1)
    # test correctness
    val, indices = dgl.topk_nodes(g0, 'x', 5, idx=-1)
    ground_truth = F.reshape(
        F.argsort(F.slice_axis(feat0, -1, 9, 10), 0, True)[:5], (5,))
    assert F.allclose(ground_truth, indices)
    g0.ndata.pop('x')

    # test#2: batched graph
    g1 = dgl.DGLGraph(nx.path_graph(12))
    feat1 = F.randn((g1.number_of_nodes(), 10))

    bg = dgl.batch([g0, g1])
    bg.ndata['x'] = F.cat([feat0, feat1], 0)
    # to test the case where k > number of nodes.
    dgl.topk_nodes(bg, 'x', 16, idx=1)
    # test correctness
    val, indices = dgl.topk_nodes(bg, 'x', 6, descending=False, idx=0)
    ground_truth_0 = F.reshape(
        F.argsort(F.slice_axis(feat0, -1, 0, 1), 0, False)[:6], (6,))
    ground_truth_1 = F.reshape(
        F.argsort(F.slice_axis(feat1, -1, 0, 1), 0, False)[:6], (6,))
    ground_truth = F.stack([ground_truth_0, ground_truth_1], 0)
    assert F.allclose(ground_truth, indices)

    # test idx=None
    val, indices = dgl.topk_nodes(bg, 'x', 6, descending=True)
    assert F.allclose(val, F.stack([F.topk(feat0, 6, 0), F.topk(feat1, 6, 0)], 0))
Esempio n. 2
0
def test_edge_softmax(g, norm_by, shp, idtype):
    g = g.astype(idtype).to(F.ctx())
    edata = F.tensor(np.random.rand(g.number_of_edges(), *shp))
    e1 = F.attach_grad(F.clone(edata))

    with F.record_grad():
        score1 = edge_softmax(g, e1, norm_by=norm_by)
        F.backward(F.reduce_sum(score1))
        grad_edata = F.grad(e1)

    with F.record_grad():
        e2 = F.attach_grad(F.clone(edata))
        e2_2d = F.reshape(
            e2,
            (g.number_of_src_nodes(), g.number_of_dst_nodes(), *e2.shape[1:]))
        if norm_by == 'src':
            score2 = F.softmax(e2_2d, 1)
            score2 = F.reshape(score2, (-1, *e2.shape[1:]))
        if norm_by == 'dst':
            score2 = F.softmax(e2_2d, 0)
            score2 = F.reshape(score2, (-1, *e2.shape[1:]))
        assert F.allclose(score1, score2)
        print('forward passed')

        F.backward(F.reduce_sum(score2))
        assert F.allclose(F.grad(e2), grad_edata)
        print('backward passed')
Esempio n. 3
0
    def step(self, cell_p, hid_p):

        embed = T.reshape(T.dot(self.attribute[:, 0], self.params['W_ctx_3']),
                          [self.batch_size, 10])
        hidP = T.dot(hid_p, self.params['W_ctx_2'])  # (25, 10)
        embedd = T.repeat(self.params['W_ctx_1'], self.batch_size, 0) * T.tanh(
            embed + hidP +
            T.repeat(self.params['b_ctx'], self.batch_size, 0))  # (25, 10)
        alpha_base = T.reshape(T.exp(embedd),
                               [self.batch_size, 10, 1])  # (25, 10, 1)
        alpha_base = alpha_base / alpha_base.sum()
        att = T.reshape(self.attribute[:, 0],
                        [self.batch_size, 10, self.att_frame])
        ctx = (alpha_base * att /
               T.reshape(alpha_base.sum(axis=1), [self.batch_size, 1, 1])).sum(
                   axis=1)  # (25, 300)
        ctx = T.reshape(ctx, [self.batch_size, self.att_frame])
        # ctx += T.dot(hid_p, self.params['W_att']) + T.repeat(self.params['b_att'], self.batch_size, 0)

        input_to = T.dot(ctx, self.params['W_in']) + T.repeat(
            self.params['b'], self.batch_size, 0)  # (25, 2048)
        gate = input_to + T.dot(hid_p, self.params['W_hid'])

        # Apply nonlinearities
        ingate = T.nnet.sigmoid(
            self._slice(gate, 0, self.hidden_dim) +
            cell_p * T.repeat(self.params['W_cell'][0], self.batch_size, 0))
        forgetgate = T.nnet.sigmoid(
            self._slice(gate, 1, self.hidden_dim) +
            cell_p * T.repeat(self.params['W_cell'][1], self.batch_size, 0))
        cell_input = T.tanh(self._slice(gate, 2, self.hidden_dim))

        # Compute new cell value
        cell = forgetgate * cell_p + ingate * cell_input

        # BatchNormalization
        # brodcast_m = K.reshape(mean_p, broadcast_shape)
        # brodcast_std = K.reshape(std_p, broadcast_shape)
        # cell_normed = ((cell - brodcast_m) /
        #                 (brodcast_std + self.epsilon))
        broadcast_shape = [self.batch_size, self.hidden_dim]
        cell_bn = K.reshape(
            self.params['gamma'], broadcast_shape) * cell + K.reshape(
                self.params['beta'], broadcast_shape)  # (1, 512)

        outgate = T.nnet.sigmoid(
            self._slice(gate, 3, self.hidden_dim) +
            cell_bn * T.repeat(self.params['W_cell'][2], self.batch_size, 0))

        # Compute new hidden unit activation
        hid = outgate * T.tanh(cell_bn)
        return T.reshape(cell_bn,
                         [self.batch_size, self.hidden_dim]), T.reshape(
                             hid, [self.batch_size, self.hidden_dim])
Esempio n. 4
0
 def __call__(self, edges):
     sdata = edges.src[self.src_field]
     edata = edges.data[self.edge_field]
     # Due to the different broadcasting semantics of different backends,
     # we need to broadcast the sdata and edata to be of the same rank.
     rank = max(F.ndim(sdata), F.ndim(edata))
     sshape = F.shape(sdata)
     eshape = F.shape(edata)
     sdata = F.reshape(sdata, sshape + (1, ) * (rank - F.ndim(sdata)))
     edata = F.reshape(edata, eshape + (1, ) * (rank - F.ndim(edata)))
     ret = self.mul_op(sdata, edata)
     return {self.out_field: ret}
    def call(self, x, mask=None):
        b, xb = 0., 0.
        if self.data_format == 'channels_first':
            kernel_sum_axes = [1, 2, 3]
            if self.use_bias:
                b = K.reshape(self.b, (self.filters, 1, 1, 1))
                xb = 1.
        elif self.data_format == 'channels_last':
            kernel_sum_axes = [0, 1, 2]
            if self.use_bias:
                b = K.reshape(self.b, (1, 1, 1, self.filters))
                xb = 1.

        Wnorm = K.sqrt(
            K.sum(K.square(self.W), axis=kernel_sum_axes, keepdims=True) +
            K.square(b) + K.epsilon())
        xnorm = K.sqrt(
            K.conv2d(K.square(x),
                     self.kernel_norm,
                     strides=self.strides,
                     padding=self.padding,
                     data_format=self.data_format,
                     filter_shape=self.kernel_norm_shape) + xb + K.epsilon())

        W = self.W / Wnorm

        output = K.conv2d(x,
                          W,
                          strides=self.strides,
                          padding=self.padding,
                          data_format=self.data_format,
                          filter_shape=self.kernel_shape)

        if K.backend() == 'theano':
            xnorm = K.pattern_broadcast(xnorm, [False, True, False, False])

        output /= xnorm

        if self.use_bias:
            b /= Wnorm
            if self.data_format == 'channels_first':
                b = K.reshape(b, (1, self.filters, 1, 1))
            elif self.data_format == 'channels_last':
                b = K.reshape(b, (1, 1, 1, self.filters))
            else:
                raise ValueError('Invalid data_format:', self.data_format)
            b /= xnorm
            output += b
        output = self.activation(output)
        return output
Esempio n. 6
0
def get_smallest_eigenpair(hes_val, eigvec_shape):
    """
    Get the smallest eigenvalue and its corresponding eigenvector of the input hes_val.
    """
    assert len(hes_val.shape) == 2 * len(eigvec_shape)
    assert np.array_equal(eigvec_shape, hes_val.shape[:len(eigvec_shape)])
    assert np.array_equal(eigvec_shape, hes_val.shape[len(eigvec_shape):])

    # get the eigenvector of the hessian matrix
    hes_val_mat = T.reshape(hes_val, (np.prod(eigvec_shape), -1))
    eigvals, eigvecs = T.eigh(hes_val_mat)
    # index for smallest eigenvalue
    idx = T.argmin(eigvals)
    eig_val = eigvals[idx]
    eigvec = T.reshape(eigvecs[:, idx], eigvec_shape)
    return eig_val, eigvec
Esempio n. 7
0
 def set_output(self, train=False):
     [X_H, X_M] = self.get_input(train=train)
     assert hasattr(self, 'input_frame')
     [cell, hid] = self.step(self.input_frame, X_M, X_H, train)
     self.output = [hid, cell]
     self.output_frame = T.dot(hid, self.lstmpar.W_output) + K.reshape(
         self.lstmpar.b_output, [1, self.output_dim])
Esempio n. 8
0
    def step(self, cell_previous, hid_previous, train):

        ingate = T.dot(hid_previous, self.lstmpar.W_hid_to_ingate) + K.reshape(self.lstmpar.b_ingate,[1,self.num_lstm])
        forgetgate = T.dot(hid_previous, self.lstmpar.W_hid_to_forgetgate) + K.reshape(self.lstmpar.b_forgetgate,[1,self.num_lstm])
        cell_input = T.dot(hid_previous, self.lstmpar.W_hid_to_cell) + K.reshape(self.lstmpar.b_cell,[1,self.num_lstm])
        outgate = T.dot(hid_previous, self.lstmpar.W_hid_to_outgate) + K.reshape(self.lstmpar.b_outgate,[1,self.num_lstm])

        # Compute peephole connections
        ingate += cell_previous * K.reshape(self.lstmpar.W_cell_to_ingate,[1,self.num_lstm])
        forgetgate += cell_previous * K.reshape(self.lstmpar.W_cell_to_forgetgate,[1,self.num_lstm])

        # Apply nonlinearities
        ingate = K.sigmoid(ingate)
        forgetgate = K.sigmoid(forgetgate)
        cell_input = K.tanh(cell_input)

        # Compute new cell value
        cell = forgetgate * cell_previous + ingate * cell_input
        cell_bn = self.bn.set_output(cell,train=train)

        outgate += cell_bn * K.reshape(self.lstmpar.W_cell_to_outgate,[1,self.num_lstm])
        outgate = K.sigmoid(outgate)

        # Compute new hidden unit activation
        if self.use_th:
            hid = outgate * K.tanh(cell_bn)
        else:
            hid = outgate * cell_bn
        return [cell_bn, hid]
Esempio n. 9
0
def test_broadcast(idtype, g):
    g = g.astype(idtype).to(F.ctx())
    gfeat = F.randn((g.batch_size, 3))

    # Test.0: broadcast_nodes
    g.ndata['h'] = dgl.broadcast_nodes(g, gfeat)
    subg = dgl.unbatch(g)
    for i, sg in enumerate(subg):
        assert F.allclose(
            sg.ndata['h'],
            F.repeat(F.reshape(gfeat[i], (1, 3)), sg.number_of_nodes(), dim=0))

    # Test.1: broadcast_edges
    g.edata['h'] = dgl.broadcast_edges(g, gfeat)
    subg = dgl.unbatch(g)
    for i, sg in enumerate(subg):
        assert F.allclose(
            sg.edata['h'],
            F.repeat(F.reshape(gfeat[i], (1, 3)), sg.number_of_edges(), dim=0))
Esempio n. 10
0
    def call(self, x, **kwargs):
        debug_print("call")
        # filters = K.zeros(shape=(N_filt, Filt_dim))
        min_freq = 50.0
        min_band = 50.0
        filt_beg_freq = K.abs(self.filt_b1) + min_freq / self.freq_scale
        filt_end_freq = filt_beg_freq + (K.abs(self.filt_band) +
                                         min_band / self.freq_scale)

        n = np.linspace(0, self.Filt_dim, self.Filt_dim)
        window = 0.54 - 0.46 * K.cos(2 * math.pi * n / self.Filt_dim)
        window = K.cast(window, "float32")
        window = K.variable(window)

        t_right_linspace = np.linspace(1, (self.Filt_dim - 1) / 2,
                                       int((self.Filt_dim - 1) / 2))
        t_right = K.variable(t_right_linspace / self.fs)

        # Compute the filters.
        output_list = []
        for i in range(self.N_filt):
            low_pass1 = (
                2 * self.filt_beg_freq[i] *
                sinc(self.filt_beg_freq[i] * self.freq_scale, self.t_right))
            low_pass2 = (
                2 * self.filt_end_freq[i] *
                sinc(self.filt_end_freq[i] * self.freq_scale, self.t_right))
            band_pass = low_pass2 - low_pass1
            band_pass = band_pass / K.max(band_pass)
            output_list.append(band_pass * self.window)
        filters = K.stack(output_list)  # (80, 251)
        filters = K.transpose(filters)  # (251, 80)
        filters = K.reshape(
            filters, (self.Filt_dim, 1, self.N_filt)
        )  # (251,1,80) in TF: (filter_width, in_channels, out_channels) in
        # PyTorch (out_channels, in_channels, filter_width)
        """Given an input tensor of shape [batch, in_width, in_channels] if data_format is "NWC", or [batch, 
        in_channels, in_width] if data_format is "NCW", and a filter / kernel tensor of shape [filter_width, 
        in_channels, out_channels], this op reshapes the arguments to pass them to conv2d to perform the equivalent 
        convolution operation. Internally, this op reshapes the input tensors and invokes tf.nn.conv2d. For example, 
        if data_format does not start with "NC", a tensor of shape [batch, in_width, in_channels] is reshaped to [
        batch, 1, in_width, in_channels], and the filter is reshaped to [1, filter_width, in_channels, out_channels]. 
        The result is then reshaped back to [batch, out_width, out_channels] (where out_width is a function of the 
        stride and padding as in conv2d) and returned to the caller. """

        # Do the convolution.
        debug_print("call")
        debug_print("  x", x)
        debug_print("  filters", filters)
        out = K.conv1d(x, kernel=filters)
        debug_print("  out", out)

        return out
Esempio n. 11
0
    def set_output(self, X, train=False):

        input_shape = (self.batch_size, self.num_lstm)
        reduction_axes = list(range(len(input_shape)))
        del reduction_axes[self.axis]
        broadcast_shape = [1] * len(input_shape)
        broadcast_shape[self.axis] = input_shape[self.axis]
        if train:
            m = K.mean(X, axis=reduction_axes)
            brodcast_m = K.reshape(m, broadcast_shape)
            std = K.mean(K.square(X - brodcast_m) + self.epsilon, axis=reduction_axes)
            std = K.sqrt(std)
            brodcast_std = K.reshape(std, broadcast_shape)
            mean_update = self.momentum * self.running_mean + (1-self.momentum) * m
            std_update = self.momentum * self.running_std + (1-self.momentum) * std
            self.updates = [(self.running_mean, mean_update), (self.running_std, std_update)]
            X_normed = (X - brodcast_m) / (brodcast_std + self.epsilon)
        else:
            brodcast_m = K.reshape(self.running_mean, broadcast_shape)
            brodcast_std = K.reshape(self.running_std, broadcast_shape)
            X_normed = ((X - brodcast_m) /
                            (brodcast_std + self.epsilon))
        out = K.reshape(self.gamma, broadcast_shape) * X_normed + K.reshape(self.beta, broadcast_shape)

        return out
Esempio n. 12
0
    def set_output(self, X, train=False):

        input_shape = (self.batch_size, self.num_lstm)
        reduction_axes = list(range(len(input_shape)))
        del reduction_axes[self.axis]
        broadcast_shape = [1] * len(input_shape)
        broadcast_shape[self.axis] = input_shape[self.axis]
        if train:
            m = K.mean(X, axis=reduction_axes)
            brodcast_m = K.reshape(m, broadcast_shape)
            std = K.mean(K.square(X - brodcast_m) + self.epsilon,
                         axis=reduction_axes)
            std = K.sqrt(std)
            brodcast_std = K.reshape(std, broadcast_shape)
            mean_update = self.momentum * self.running_mean + (
                1 - self.momentum) * m
            std_update = self.momentum * self.running_std + (
                1 - self.momentum) * std
            self.updates = [(self.running_mean, mean_update),
                            (self.running_std, std_update)]
            X_normed = (X - brodcast_m) / (brodcast_std + self.epsilon)
        else:
            brodcast_m = K.reshape(self.running_mean, broadcast_shape)
            brodcast_std = K.reshape(self.running_std, broadcast_shape)
            X_normed = ((X - brodcast_m) / (brodcast_std + self.epsilon))
        out = K.reshape(self.gamma, broadcast_shape) * X_normed + K.reshape(
            self.beta, broadcast_shape)

        return out
Esempio n. 13
0
    def step(self, input_n, cell_previous, hid_previous, train):
        input_to_in = T.dot(input_n, self.lstmpar.W_in_to_ingate) + K.reshape(
            self.lstmpar.b_ingate, [1, self.num_lstm])
        input_to_forget = T.dot(
            input_n, self.lstmpar.W_in_to_forgetgate) + K.reshape(
                self.lstmpar.b_forgetgate, [1, self.num_lstm])
        input_to_cell = T.dot(input_n, self.lstmpar.W_in_to_cell) + K.reshape(
            self.lstmpar.b_cell, [1, self.num_lstm])
        input_to_out = T.dot(input_n,
                             self.lstmpar.W_in_to_outgate) + K.reshape(
                                 self.lstmpar.b_outgate, [1, self.num_lstm])

        ingate = input_to_in + T.dot(hid_previous,
                                     self.lstmpar.W_hid_to_ingate)
        forgetgate = input_to_forget + T.dot(hid_previous,
                                             self.lstmpar.W_hid_to_forgetgate)
        cell_input = input_to_cell + T.dot(hid_previous,
                                           self.lstmpar.W_hid_to_cell)
        outgate = input_to_out + T.dot(hid_previous,
                                       self.lstmpar.W_hid_to_outgate)

        # Compute peephole connections
        ingate += cell_previous * K.reshape(self.lstmpar.W_cell_to_ingate,
                                            [1, self.num_lstm])
        forgetgate += cell_previous * K.reshape(
            self.lstmpar.W_cell_to_forgetgate, [1, self.num_lstm])

        # Apply nonlinearities
        ingate = K.sigmoid(ingate)
        forgetgate = K.sigmoid(forgetgate)
        cell_input = K.tanh(cell_input)

        # Compute new cell value
        cell = forgetgate * cell_previous + ingate * cell_input
        cell_bn = self.bn.set_output(cell, train=train)

        outgate += cell_bn * K.reshape(self.lstmpar.W_cell_to_outgate,
                                       [1, self.num_lstm])
        outgate = K.sigmoid(outgate)

        # Compute new hidden unit activation
        hid = outgate * cell_bn
        return [cell_bn, hid]
Esempio n. 14
0
    def _matvec(self, vec):
        dg = self.dmrg_graph
        in_data = T.reshape(vec, self.eigvec_shape)
        self.feed_dict.update({dg.vnodes[self.index]: in_data})

        if self.initial_matvec:
            if self.index == 0:
                reset_graph = True
                evicted_inputs = []
            else:
                reset_graph = False
                evicted_inputs = [
                    dg.mps_inputs[self.index - 1], dg.vnodes[self.index]
                ]
            self.initial_matvec = False
        else:
            reset_graph = False
            evicted_inputs = [dg.vnodes[self.index]]

        out_data, = self.executor.run(feed_dict=self.feed_dict,
                                      reset_graph=reset_graph,
                                      evicted_inputs=evicted_inputs,
                                      out_nodes=[dg.hvps[self.index]])
        return out_data.ravel()
Esempio n. 15
0
def batchnorm(X,
              batch_size,
              hidden_dim,
              gamma,
              beta,
              running_mean,
              running_std,
              epsilon=1e-10,
              axis=1,
              momentum=0.99,
              train=False):

    X = K.reshape(X, (batch_size, hidden_dim))
    input_shape = (batch_size, hidden_dim)  # (1, 512)
    reduction_axes = list(range(len(input_shape)))  # [0, 1]
    del reduction_axes[axis]  # [0]
    broadcast_shape = [1] * len(input_shape)  # [1, 1]
    broadcast_shape[axis] = input_shape[axis]  # [1, 512]
    if train:
        m = K.mean(
            X, axis=reduction_axes
        )  # m.shape = (1, 512), note that if matrix is 1-d then mean function will return one number even if axis=0
        brodcast_m = K.reshape(m, broadcast_shape)  # m.shape = (1, 512)
        std = K.mean(K.square(X - brodcast_m) + epsilon,
                     axis=reduction_axes)  # batchnormed m(m**2)
        std = K.sqrt(std)  # batchnormed m, (1, 512)
        brodcast_std = K.reshape(std, broadcast_shape)  # (1, 512)
        mean_update = momentum * running_mean + (1 - momentum) * m  # (1, 512)
        std_update = momentum * running_std + (1 - momentum) * std  # (1, 512)
        X_normed = (X - brodcast_m) / (brodcast_std + epsilon)  # (1, 512)
    else:
        brodcast_m = K.reshape(running_mean, broadcast_shape)
        brodcast_std = K.reshape(running_std, broadcast_shape)
        X_normed = ((X - brodcast_m) / (brodcast_std + epsilon))
    out = K.reshape(gamma, broadcast_shape) * X_normed + K.reshape(
        beta, broadcast_shape)  # (1, 512)

    return out, mean_update, std_update
Esempio n. 16
0
def dmrg_local_update(intermediate, eigvec, max_mps_rank):
    """
    Perform local update for DMRG.

    Parameters
    ----------
    intermediate: the input einsum node. Its inputs are two mps sites.
    eigvec: the eigenvector to get the low rank decomposition.
    max_mps_rank: maximum mps tensor rank.
    """
    # parse intermediate strings
    inputs = intermediate.inputs
    assert len(inputs) == 2

    # Here input names are formatted as A{i}.
    index_input_0 = int(inputs[0].name[1:])
    index_input_1 = int(inputs[1].name[1:])

    in_subs, out_subs, _ = _parse_einsum_input(
        (intermediate.einsum_subscripts, *intermediate.inputs))

    if index_input_0 > index_input_1:
        # right site appers first
        right_subs, left_subs = in_subs.split(',')
    else:
        left_subs, right_subs = in_subs.split(',')

    map_subs_indices = dict(zip(out_subs,
                                list(range(len(intermediate.shape)))))

    contract_char, = list(set(left_subs) - set(out_subs))

    left_uncontract_chars = list(set(left_subs) - set(contract_char))
    right_uncontract_chars = list(set(right_subs) - set(contract_char))

    left_indices = [map_subs_indices[char] for char in left_uncontract_chars]
    right_indices = [map_subs_indices[char] for char in right_uncontract_chars]

    left_uncontract_str = "".join(left_uncontract_chars)
    right_uncontract_str = "".join(right_uncontract_chars)

    #############################################################
    # svd decomposition to get updated sites
    eigvec_shape = intermediate.shape
    eigvec_mat = T.transpose(eigvec, left_indices + right_indices)
    eigvec_mat = T.reshape(eigvec_mat,
                           (np.prod([eigvec_shape[i]
                                     for i in left_indices]), -1))

    U, s, VT = T.svd(eigvec_mat)
    rank = min([max_mps_rank, eigvec_mat.shape[0], eigvec_mat.shape[1]])
    U, s, VT = U[:, :rank], s[:rank], VT[:rank, :]
    VT = T.diag(s) @ VT

    U = T.reshape(U, [eigvec_shape[i] for i in left_indices] + [rank])
    VT = T.reshape(VT, ([rank] + [eigvec_shape[i] for i in right_indices]))

    left = T.einsum(f"{left_uncontract_str}{contract_char}->{left_subs}", U)
    right = T.einsum(f"{contract_char}{right_uncontract_str}->{right_subs}",
                     VT)

    return left, right
Esempio n. 17
0
def dmrg_shared_exec_iterative_solve(mpo_tensors,
                                     init_mps_tensors,
                                     max_mps_rank,
                                     num_iter=1,
                                     sequence='R'):
    """
    Perform DMRG iterations with shared execution and iterative solve.
    """
    if sequence != "R":
        raise NotImplementedError

    num = len(mpo_tensors)
    size = mpo_tensors[0].shape[1]
    mpo_ranks = [mpo_tensors[i].shape[0] for i in range(1, len(mpo_tensors))]

    mps_tensors = copy.deepcopy(init_mps_tensors)
    mps_ranks = [mps_tensors[i].shape[0] for i in range(1, len(mps_tensors))]

    dg = DmrgImplicitUpdateGraph.create(num, mpo_ranks, mps_ranks, size)
    for i, hvp in enumerate(dg.hvps):
        dg.hvps[i] = simplify(hvp)
        assert isinstance(hvp, ad.EinsumNode)
    dg.hvps = generate_sequential_optimal_tree(dg.hvps, dg.mps_inputs)

    executor_hvps = ad.Executor(dg.hvps)
    executor_intermediates = ad.Executor(dg.intermediates)

    # sequence is R
    for iter in range(num_iter):

        mps_tensors = gauge_transform_mps(mps_tensors, right=True)
        mps_ranks = [
            mps_tensors[i].shape[0] for i in range(1, len(mps_tensors))
        ]

        for i in range(num - 1):

            dg.update_graph(num, mpo_ranks, mps_ranks, size)

            feed_dict = dict(zip(dg.mpo_inputs, mpo_tensors))
            feed_dict.update(dict(zip(dg.mps_inputs, mps_tensors)))

            intermediate, = executor_intermediates.run(
                feed_dict=feed_dict, out_nodes=[dg.intermediates[i]])

            # Calculate the eigenvector using the implicit solver.
            # Note: This only supports NumPy datatype.
            # TODO: Add a general Lanczos solver that adapts to all the backends.
            operator = DMRGLinearOperator(dg, executor_hvps, i, feed_dict)
            # Reference: https://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.linalg.eigsh.html
            eig_vals, eigvecs = spla.eigsh(operator,
                                           k=1,
                                           ncv=4,
                                           tol=1e-3,
                                           which='SA',
                                           v0=intermediate.ravel())
            eig_val, eigvec = eig_vals[0], eigvecs[:, 0]
            eigvec = T.reshape(eigvec, dg.intermediates[i].shape)

            # Update the two sites of mps
            mps_tensors[i], mps_tensors[i + 1] = dmrg_local_update(
                dg.intermediates[i], eigvec, max_mps_rank)

            # update the rank
            mps_ranks[i] = mps_tensors[i + 1].shape[0]
            print(f'At site {i}, the smallest eigenvalue is: {eig_val}')

        print(f'At iteration {iter} the smallest eigenvalue is: {eig_val}')
    return mps_tensors, eig_val
Esempio n. 18
0
 def set_output(self,train=False):
     [X_H, X_M] = self.get_input(train=train)
     [cell, hid] = self.step(X_M, X_H, train)
     self.output = [hid, cell]
     self.output_frame = T.dot(hid, self.lstmpar.W_output) + K.reshape(self.lstmpar.b_output,[1,self.dim_frame])
Esempio n. 19
0
 def set_output(self, train=False):
     [X_H, X_M] = self.get_input(train=train)
     [cell, hid] = self.step(X_M, X_H, train)
     self.output = [hid, cell]
     self.output_frame = T.dot(hid, self.lstmpar.W_output) + K.reshape(
         self.lstmpar.b_output, [1, self.dim_frame])
Esempio n. 20
0
    def step(self, cell_p, hid_p, mean_p, std_p):

        embed = T.reshape(T.dot(self.attribute[:, 0], self.params['W_ctx_3']),
                          [self.batch_size, 10])
        hidP = T.dot(hid_p, self.params['W_ctx_2'])  # (25, 10)
        embedd = T.repeat(self.params['W_ctx_1'], self.batch_size, 0) * T.tanh(
            embed + hidP +
            T.repeat(self.params['b_ctx'], self.batch_size, 0))  # (25, 10)
        alpha_base = T.reshape(T.exp(embedd),
                               [self.batch_size, 10, 1])  # (25, 10, 1)
        alpha_base = alpha_base / alpha_base.sum()
        att = T.reshape(self.attribute[:, 0],
                        [self.batch_size, 10, self.att_frame])
        ctx = (alpha_base * att /
               T.reshape(alpha_base.sum(axis=1), [self.batch_size, 1, 1])).sum(
                   axis=1)  # (25, 300)
        ctx = T.reshape(ctx, [self.batch_size, self.att_frame])
        # ctx += T.dot(hid_p, self.params['W_att']) + T.repeat(self.params['b_att'], self.batch_size, 0)

        input_to = T.dot(ctx, self.params['W_in']) + T.repeat(
            self.params['b'], self.batch_size, 0)  # (25, 2048)
        # input_to_i = T.dot(ctx, self.params['W_in_i']) + T.repeat(self.params['b_i'], self.batch_size, 0)
        # input_to_f = T.dot(ctx, self.params['W_in_f']) + T.repeat(self.params['b_f'], self.batch_size, 0)
        # input_to_o = T.dot(ctx, self.params['W_in_o']) + T.repeat(self.params['b_o'], self.batch_size, 0)
        # input_to_c = T.dot(ctx, self.params['W_in_c']) + T.repeat(self.params['b_c'], self.batch_size, 0)
        gate = input_to + T.dot(hid_p, self.params['W_hid'])
        # gate_i = input_to_i + T.dot(hid_p, self.params['W_hid_i'])
        # gate_f = input_to_f + T.dot(hid_p, self.params['W_hid_f'])
        # gate_o = input_to_o + T.dot(hid_p, self.params['W_hid_o'])
        # gate_c = input_to_c + T.dot(hid_p, self.params['W_hid_c'])

        # Apply nonlinearities
        ingate = T.nnet.sigmoid(
            self._slice(gate, 0, self.hidden_dim) +
            cell_p * T.repeat(self.params['W_cell'][0], self.batch_size, 0))
        forgetgate = T.nnet.sigmoid(
            self._slice(gate, 1, self.hidden_dim) +
            cell_p * T.repeat(self.params['W_cell'][1], self.batch_size, 0))
        cell_input = T.tanh(self._slice(gate, 2, self.hidden_dim))

        # Compute new cell value
        cell = forgetgate * cell_p + ingate * cell_input

        # BatchNormalization
        input_shape = (self.batch_size, self.hidden_dim)  # (1, 512)
        cell = K.reshape(cell, input_shape)
        reduction_axes = list(range(len(input_shape)))  # [0, 1]
        del reduction_axes[self.axis_bn]  # [0]
        broadcast_shape = [1] * len(input_shape)  # [1, 1]
        broadcast_shape[self.axis_bn] = input_shape[self.axis_bn]  # [1, 512]
        # m = K.mean(cell, axis=reduction_axes) # m.shape = (1, 512), note that if matrix is 1-d then mean function will return one number even if axis=0
        m = K.mean(cell, axis=0)
        brodcast_m = K.reshape(m, [1, self.hidden_dim])  # m.shape = (1, 512)
        # brodcast_m = m
        std = K.mean(K.square(cell - brodcast_m) + self.epsilon,
                     axis=reduction_axes)  # batchnormed m(m**2)
        std = K.sqrt(std)  # batchnormed m, (1, 512)
        brodcast_std = K.reshape(std, broadcast_shape)  # (1, 512)
        mean_update = self.momentum * mean_p + (1 -
                                                self.momentum) * m  # (1, 512)
        std_update = self.momentum * std_p + (1 -
                                              self.momentum) * std  # (1, 512)
        cell_normed = (cell - brodcast_m) / (brodcast_std + self.epsilon
                                             )  # (1, 512)
        cell_bn = K.reshape(
            self.params['gamma'], broadcast_shape) * cell_normed + K.reshape(
                self.params['beta'], broadcast_shape)  # (1, 512)

        # cell_bn, mean, std = batchnorm(cell, self.batch_size, self.hidden_dim, self.params['gamma'], self.params['beta'], mean_p, std_p, train=True)

        outgate = T.nnet.sigmoid(
            self._slice(gate, 3, self.hidden_dim) +
            cell_bn * T.repeat(self.params['W_cell'][2], self.batch_size, 0))

        # Compute new hidden unit activation
        hid = outgate * T.tanh(cell_bn)
        return T.reshape(
            cell_bn, [self.batch_size, self.hidden_dim]), T.reshape(
                hid,
                [self.batch_size, self.hidden_dim]), mean_update, std_update