Exemple #1
0
    def forward(self, adj, feat, lambda_max=None):
        r"""

        Description
        -----------
        Compute (Dense) Chebyshev Spectral Graph Convolution layer.

        Parameters
        ----------
        adj : mxnet.NDArray
            The adjacency matrix of the graph to apply Graph Convolution on,
            should be of shape :math:`(N, N)`, where a row represents the destination
            and a column represents the source.
        feat : mxnet.NDArray
            The input feature of shape :math:`(N, D_{in})` where :math:`D_{in}`
            is size of input feature, :math:`N` is the number of nodes.
        lambda_max : float or None, optional
            A float value indicates the largest eigenvalue of given graph.
            Default: None.

        Returns
        -------
        mxnet.NDArray
            The output feature of shape :math:`(N, D_{out})` where :math:`D_{out}`
            is size of output feature.
        """
        A = adj.astype(feat.dtype).as_in_context(feat.context)
        num_nodes = A.shape[0]

        in_degree = 1. / nd.clip(A.sum(axis=1), 1, float('inf')).sqrt()
        D_invsqrt = nd.diag(in_degree)
        I = nd.eye(num_nodes, ctx=A.context)
        L = I - nd.dot(D_invsqrt, nd.dot(A, D_invsqrt))

        if lambda_max is None:
            # NOTE(zihao): this only works for directed graph.
            lambda_max = (nd.linalg.syevd(L)[1]).max()

        L_hat = 2 * L / lambda_max - I
        Z = [nd.eye(num_nodes, ctx=A.context)]
        Zh = self.fc[0](feat)
        for i in range(1, self._k):
            if i == 1:
                Z.append(L_hat)
            else:
                Z.append(2 * nd.dot(L_hat, Z[-1]) - Z[-2])
            Zh = Zh + nd.dot(Z[i], self.fc[i](feat))

        if self.bias is not None:
            Zh = Zh + self.bias.data(feat.context)
        return Zh
def calculate_loss(x, y, model, loss, loss_name, class_weight, penalization_coeff):
    """calculate loss value

    Args:
        x (NDArray): intput of model
        y (NDArray): target
        model (Block): model
        loss (gluon.loss): loss function
        loss_name (str): name of loss function
        class_weight (NDArray): weight of sample loss value for each category
        penalization_coeff (float): Attention penalty coefficient

    Returns:
        NDArray: output of model
        NDArray: loss value
    """

    pred, att = model(x)
    if loss_name == 'sce':
        l = loss(pred, y)
    elif loss_name == 'wsce':
        l = loss(pred, y, class_weight, class_weight.shape[0])

    # penalty
    diversity_penalty = nd.batch_dot(att, nd.transpose(att, axes=(0, 2, 1))
                                     ) - nd.eye(att.shape[1], ctx=att.context)
    l = l + penalization_coeff * diversity_penalty.norm(axis=(1, 2))

    return pred, l
Exemple #3
0
    def hybrid_forward(self, F, X, *args, **kwargs):
        # Perform neural network pass
        X = self.linear_1(X)
        X = self.linear_2(X)
        X = self.linear_3(X)
        X = self.linear_4(X)

        # Extract mixture coefficients according to formula 25 in Bishop
        z_alpha = X[:, :self.n_components]
        z_alpha_exp = nd.exp(z_alpha)
        alpha = (z_alpha_exp / nd.sum(z_alpha_exp))[0]

        # Extract variance according to formula 26 in Bishop
        z_sigma = X[:, self.n_components:2 * self.n_components]
        sigma = nd.exp(z_sigma)[0]

        # Extract mu according to formula 27 in Bishop
        mu = nd.reshape(X[:, 2 * self.n_components:],
                        (self.n_components, self.t_dim))

        # create bunch of Gaussians
        distributions = [
            MultivariateGaussian(
                mu[i], nd.linalg.potrf(sigma[i] * nd.eye(self.t_dim)))
            for i in range(self.n_components)
        ]

        # Create mixture model
        p_t_X = MixtureDistribution(alpha, distributions)

        return p_t_X
    def __init__(self, proto: NDArray):
        self.shape = proto.shape
        ctx = proto.context

        h = self.shape[0]
        w = self.shape[1]

        leftEye = nd.eye(h, ctx=ctx)
        rightEye = nd.eye(w, ctx=ctx)

        leftShifts = nd.zeros((h, h, h), ctx=ctx)
        for i in range(0, h):
            leftShifts[i] = leftEye
            shiftY(leftShifts[i], i)

        rightShifts = nd.zeros((w, w, w))
        for i in range(0, w):
            rightShifts[i] = rightEye
            shiftX(rightShifts[i], i)

        self.leftShifts = leftShifts
        self.rightShifts = rightShifts
Exemple #5
0
def compute_rot(v):
    """Return the rotationnal matrix M so that M.v = ||v||e1."""
    if v[0] >= 0:
        M = nd.eye(len(v))
    else:
        M = -nd.eye(len(v))
    for i in range(1, len(v)):
        if v[i] == 0:
            continue
        rot_minus_theta = nd.eye(len(v))
        temp = nd.dot(M, v)

        theta = nd.arctan(temp[i] / temp[0])
        c = nd.cos(theta)
        s = nd.sin(theta)

        rot_minus_theta[0, 0] = c
        rot_minus_theta[i, i] = c
        rot_minus_theta[0, i] = s
        rot_minus_theta[i, 0] = -s

        M = nd.dot(rot_minus_theta, M)
    return M
Exemple #6
0
 def __init__(self, A, in_units, out_units, activation='relu', **kwargs):
     super().__init__(**kwargs)
     I = nd.eye(*A.shape)
     A_hat = A.copy() + I
     D = nd.sum(A_hat, axis=0)
     D_inv = D**-0.5
     A_hat = D_inv * A_hat * D_inv
     self.in_units, self.out_units = in_units, out_units
     with self.name_scope():
         self.A_hat = self.params.get_constant('A_hat', A_hat)
         self.W = self.params.get('W', shape=(self.in_units, self.out_units))
         if activation == 'identity':
             self.activation = lambda X: X
         else:
             self.activation = nn.Activation(activation)
Exemple #7
0
def mmd_loss(x, y, ctx_model, t=0.1, kernel='diffusion'):
    '''
    computes the mmd loss with information diffusion kernel
    :param x: batch_size x latent dimension
    :param y:
    :param t:
    :return:
    '''
    eps = 1e-6
    n,d = x.shape
    if kernel == 'tv':
        sum_xx = nd.zeros(1, ctx=ctx_model)
        for i in range(n):
            for j in range(i+1, n):
                sum_xx = sum_xx + nd.norm(x[i] - x[j], ord=1)
        sum_xx = sum_xx / (n * (n-1))

        sum_yy = nd.zeros(1, ctx=ctx_model)
        for i in range(y.shape[0]):
            for j in range(i+1, y.shape[0]):
                sum_yy = sum_yy + nd.norm(y[i] - y[j], ord=1)
        sum_yy = sum_yy / (y.shape[0] * (y.shape[0]-1))

        sum_xy = nd.zeros(1, ctx=ctx_model)
        for i in range(n):
            for j in range(y.shape[0]):
                sum_xy = sum_xy + nd.norm(x[i] - y[j], ord=1)
        sum_yy = sum_yy / (n * y.shape[0])
    else:
        qx = nd.sqrt(nd.clip(x, eps, 1))
        qy = nd.sqrt(nd.clip(y, eps, 1))
        xx = nd.dot(qx, qx, transpose_b=True)
        yy = nd.dot(qy, qy, transpose_b=True)
        xy = nd.dot(qx, qy, transpose_b=True)

        def diffusion_kernel(a, tmpt, dim):
            # return (4 * np.pi * tmpt)**(-dim / 2) * nd.exp(- nd.square(nd.arccos(a)) / tmpt)
            return nd.exp(- nd.square(nd.arccos(a)) / tmpt)

        off_diag = 1 - nd.eye(n, ctx=ctx_model)
        k_xx = diffusion_kernel(nd.clip(xx, 0, 1-eps), t, d-1)
        k_yy = diffusion_kernel(nd.clip(yy, 0, 1-eps), t, d-1)
        k_xy = diffusion_kernel(nd.clip(xy, 0, 1-eps), t, d-1)
        sum_xx = (k_xx * off_diag).sum() / (n * (n-1))
        sum_yy = (k_yy * off_diag).sum() / (n * (n-1))
        sum_xy = 2 * k_xy.sum() / (n * n)
    return sum_xx + sum_yy - sum_xy
Exemple #8
0
def compute_eigenvals(A):
    A_11 = A[:, :, 0, 0]  # (N, P)
    A_12 = A[:, :, 0, 1]
    A_13 = A[:, :, 0, 2]
    A_22 = A[:, :, 1, 1]
    A_23 = A[:, :, 1, 2]
    A_33 = A[:, :, 2, 2]
    I = nd.eye(3)
    p1 = nd.square(A_12) + nd.square(A_13) + nd.square(A_23)  # (N, P)
    q = (A_11 + A_22 + A_33) / 3  # (N, P)
    p2 = nd.square(A_11 - q) + nd.square(A_22 -
                                         q) + nd.square(A_33 -
                                                        q) + 2 * p1  # (N, P)
    p = nd.sqrt(p2 / 6) + 1e-8  # (N, P)
    N = A.shape[0]
    q_4d = nd.reshape(q, (N, -1, 1, 1))  # (N, P, 1, 1)
    p_4d = nd.reshape(p, (N, -1, 1, 1))
    B = (1 / p_4d) * (A - q_4d * I)  # (N, P, 3, 3)
    r = nd.clip(compute_determinant(B) / 2, -1, 1)  # (N, P)
    phi = nd.arccos(r) / 3  # (N, P)
    eig1 = q + 2 * p * nd.cos(phi)  # (N, P)
    eig3 = q + 2 * p * nd.cos(phi + (2 * math.pi / 3))
    eig2 = 3 * q - eig1 - eig3
    return nd.abs(nd.stack([eig1, eig2, eig3], axis=2))  # (N, P, 3)
Exemple #9
0
    def folding_net(cls, symetries, ctx=mx.cpu(0), optimize=False):
        """NeuralNet generated by a set of symetries.
        Given the symetries, it computes the net which folds the space according to them.

        The symetries considered are ONLY the orthogonal reflections accros hyperplanes.
        Let the equation of the hyperplane be n.x + b = 0. Then the representation of the symetry
        is the tupple (n, b).

        One should notice than (-n, -b) represent the same hyperplane. But not the same operation for the NeuralNet :
        It will fold the space into the subspace pointed by n (where x.n + b >= 0)

        A list of tupple (n, b) is expected as symetries
        The optimize parameter allows you to break up the operations in more layers.
        """
        layers = []

        #For optimization
        n = symetries[0][0]
        weights_rest = nd.eye(len(n))
        bias_rest = nd.zeros(len(n))

        for n, b in symetries:
            ROT = compute_rot(n)
            if optimize:
                weights_rest = nd.dot(ROT, weights_rest)
                bias_rest = nd.dot(ROT, bias_rest)
            else:
                layers.append(
                    Layer(len(n),
                          len(n),
                          weights=ROT,
                          bias=nd.zeros(len(n)),
                          function=nd.identity,
                          fixed=True,
                          ctx=ctx))

            # Symetry for the plane x = -b
            # n1' = |n1 + b| - b. ni' = ni.
            function = [nd.relu] * 2 + [nd.identity] * (len(n) - 1)
            weights = nd.eye(len(n) + 1, len(n), -1)
            weights[0, 0] = -1
            bias = nd.array([-b, b] + [0] *
                            (len(n) - 1)) + nd.dot(weights, bias_rest)
            weights = nd.dot(weights, weights_rest)

            layers.append(
                Layer(len(n) + 1,
                      weights.shape[1],
                      weights=weights,
                      bias=bias,
                      function=function,
                      fixed=True,
                      ctx=ctx))

            bias = nd.array([-b] + [0] * (len(n) - 1))
            weights = nd.eye(len(n), len(n) + 1, 1)
            weights[0, 0] = 1

            if optimize:
                weights_rest = nd.dot(ROT.T, weights)
                bias_rest = nd.dot(ROT.T, bias)
            else:
                layers.append(
                    Layer(len(n),
                          len(n) + 1,
                          weights=weights,
                          bias=bias,
                          function=nd.identity,
                          fixed=True,
                          ctx=ctx))
                layers.append(
                    Layer(len(n),
                          len(n),
                          weights=ROT.T,
                          bias=nd.zeros(len(n)),
                          function=nd.identity,
                          fixed=True,
                          ctx=ctx))

        if optimize:
            layers.append(
                Layer(len(n),
                      len(n) + 1,
                      weights=weights_rest,
                      bias=bias_rest,
                      function=nd.identity,
                      fixed=True,
                      ctx=ctx))

        sizes = [layers[0].input_size]
        for layer in layers:
            sizes.append(layer.output_size)

        return cls(sizes, layers, ctx=ctx)
Exemple #10
0
from mxnet import autograd, nd
from tqdm import tqdm

try:
    from config import (CTX, DATA_PATH, EVAL_BATCH_SIZE, MAX_ANSWER_LENS,
                        RAW_DEV_FILE, RAW_TRAIN_FILE)
except ImportError:
    from .config import (CTX, DATA_PATH, EVAL_BATCH_SIZE, MAX_ANSWER_LENS,
                         RAW_DEV_FILE, RAW_TRAIN_FILE)
from data_loader import DataLoader
from offical_evaluate import evaluate as offical_eval

ctx = CTX[0]
ANSWER_MASK_MATRIX = nd.zeros(shape=(1, 1000, 1000), ctx=ctx)
for idx in range(MAX_ANSWER_LENS):
    ANSWER_MASK_MATRIX += nd.eye(N=1000, M=1000, k=idx, ctx=ctx)


def evaluate(model, dataset_type='train', ema=None):
    r"""Evaluate the model on train/dev/test dataset.

    This function is just an encapsulation of official evaluate function.

    The official evaluate code can be find in https://rajpurkar.github.io/SQuAD-explorer/

    Parameters
    ----------
    dataset_type : string, default 'train'
        which dataset to evaluate.
    ema : object or None, default None
        Whether use the shadow variable to evaluate.
args = get_args()
vision = Agent_Location_Classifier()
vision.load_parameters(path_to_clf_params)

rnn = mdn_rnn(input_dim=7, interface_dim=10, output_dim=4)
rnn.load_parameters(path_to_rnn_params)

env = Neurosmash.Environment(args)
agent = Neurosmash.Agent()
end, reward, previous_state = env.reset()
n_steps = 30

extr = Background_Extractor(env, agent, args)
background = extr.get_background(oned=True)
h, c = (nd.zeros((1, rnn.RNN.h_dim)), nd.zeros((1, rnn.RNN.c_dim)))
eye = nd.eye(args.move_dim)
prev_pred = nd.zeros((1,4))
while end == 0:    # Get latent representation from LSTM
    z = vision(extr.clean_and_reshape(previous_state, args.size)/255)

    # Make random step
    a = np.random.randint(0,3)

    # Take the step and store the reward
    end, reward, state = env.step(a)

    # Get the new hidden states by feeding the RNN the previous state and the action
    action_onehot = eye[None, a]
    rnn_input = nd.concatenate([z, action_onehot], 1)
    pz, h, c = rnn(rnn_input, h, c)
Exemple #12
0
                                activation=activation_func)
        features.add(feature)
        in_units = layer_size
    return features, in_units

def build_model(A, X):
    model = nn.Sequential()
    with model.name_scope():
        features, out_units = bulid_features(A, X)
        model.add(features)
        calssifier = LogisticRegressor(out_units)
        model.add(calssifier)
    model.initialize(init.Uniform(1))
    return model, features

X_1 = nd.eye(*A.shape)
model_1, features_1 = build_model(A, X_1)

model_1(X_1)

def train(model, features, X, X_train, y_train, epochs):

    cross_entropy = gloss.SigmoidBinaryCrossEntropyLoss(from_sigmoid=True)
    trainer = gluon.Trainer(model.collect_params(), 'sgd',
                            {'learning_rate': 0.0006, 'momentum': 1})
    features_representation = [features(X).asnumpy()]

    for e in range(1, epochs+1):
        cum_loss = 0.0
        cum_preds = []
        for i, x in enumerate(X_train):
Exemple #13
0
        train_loss+=loss.mean().asscalar()
        acc.update(preds=output, labels=label)
        print('epoch: %d, iter: %d, acc: %.4f'%(e,i,acc.get()[1]))

        out_argmax=output.argmax(axis=1)
        label=label.reshape((-1,)).asnumpy().astype('int32')
        out_argmax=out_argmax.reshape((-1,)).asnumpy().astype('int32')
        for i,j in zip(label,out_argmax):
            confusion_matrix[i,j]+=1
        

    sum_confusion_matrix=confusion_matrix.sum(axis=1).reshape((-1,1))
    confusion_matrix_ratio=confusion_matrix*(1.0/sum_confusion_matrix)

    evalu_confusion_matrix_ratio,evalu_confusion_matrix=metric_confusion_matrix(val_datait,net4img,cfg.num_classes,ctx)
    eye_nd=nd.eye(cfg.num_classes,ctx=ctx)

    train_acc=(confusion_matrix_ratio*eye_nd).sum().asscalar()
    val_acc=(evalu_confusion_matrix_ratio*eye_nd).sum().asscalar()
    train_loss_scalar=train_loss/len(train_datait)

    print('epoch: %d , loss: %.4f , train_acc: %.4f , val_acc: %.4f'%(e,train_loss_scalar,train_acc,val_acc))

    with open('evaluate_result.txt','a+') as f:
        f.write('epoch: %d , loss: %.4f , train_acc: %.4f , val_acc: %.4f\n'
                %(e,train_loss_scalar,train_acc,val_acc))

        f.write('train:\n')
        f.write(str(confusion_matrix_ratio)+'\n')
        f.write(str(confusion_matrix)+'\n')