def forward(self, adj, feat, lambda_max=None): r""" Description ----------- Compute (Dense) Chebyshev Spectral Graph Convolution layer. Parameters ---------- adj : mxnet.NDArray The adjacency matrix of the graph to apply Graph Convolution on, should be of shape :math:`(N, N)`, where a row represents the destination and a column represents the source. feat : mxnet.NDArray The input feature of shape :math:`(N, D_{in})` where :math:`D_{in}` is size of input feature, :math:`N` is the number of nodes. lambda_max : float or None, optional A float value indicates the largest eigenvalue of given graph. Default: None. Returns ------- mxnet.NDArray The output feature of shape :math:`(N, D_{out})` where :math:`D_{out}` is size of output feature. """ A = adj.astype(feat.dtype).as_in_context(feat.context) num_nodes = A.shape[0] in_degree = 1. / nd.clip(A.sum(axis=1), 1, float('inf')).sqrt() D_invsqrt = nd.diag(in_degree) I = nd.eye(num_nodes, ctx=A.context) L = I - nd.dot(D_invsqrt, nd.dot(A, D_invsqrt)) if lambda_max is None: # NOTE(zihao): this only works for directed graph. lambda_max = (nd.linalg.syevd(L)[1]).max() L_hat = 2 * L / lambda_max - I Z = [nd.eye(num_nodes, ctx=A.context)] Zh = self.fc[0](feat) for i in range(1, self._k): if i == 1: Z.append(L_hat) else: Z.append(2 * nd.dot(L_hat, Z[-1]) - Z[-2]) Zh = Zh + nd.dot(Z[i], self.fc[i](feat)) if self.bias is not None: Zh = Zh + self.bias.data(feat.context) return Zh
def calculate_loss(x, y, model, loss, loss_name, class_weight, penalization_coeff): """calculate loss value Args: x (NDArray): intput of model y (NDArray): target model (Block): model loss (gluon.loss): loss function loss_name (str): name of loss function class_weight (NDArray): weight of sample loss value for each category penalization_coeff (float): Attention penalty coefficient Returns: NDArray: output of model NDArray: loss value """ pred, att = model(x) if loss_name == 'sce': l = loss(pred, y) elif loss_name == 'wsce': l = loss(pred, y, class_weight, class_weight.shape[0]) # penalty diversity_penalty = nd.batch_dot(att, nd.transpose(att, axes=(0, 2, 1)) ) - nd.eye(att.shape[1], ctx=att.context) l = l + penalization_coeff * diversity_penalty.norm(axis=(1, 2)) return pred, l
def hybrid_forward(self, F, X, *args, **kwargs): # Perform neural network pass X = self.linear_1(X) X = self.linear_2(X) X = self.linear_3(X) X = self.linear_4(X) # Extract mixture coefficients according to formula 25 in Bishop z_alpha = X[:, :self.n_components] z_alpha_exp = nd.exp(z_alpha) alpha = (z_alpha_exp / nd.sum(z_alpha_exp))[0] # Extract variance according to formula 26 in Bishop z_sigma = X[:, self.n_components:2 * self.n_components] sigma = nd.exp(z_sigma)[0] # Extract mu according to formula 27 in Bishop mu = nd.reshape(X[:, 2 * self.n_components:], (self.n_components, self.t_dim)) # create bunch of Gaussians distributions = [ MultivariateGaussian( mu[i], nd.linalg.potrf(sigma[i] * nd.eye(self.t_dim))) for i in range(self.n_components) ] # Create mixture model p_t_X = MixtureDistribution(alpha, distributions) return p_t_X
def __init__(self, proto: NDArray): self.shape = proto.shape ctx = proto.context h = self.shape[0] w = self.shape[1] leftEye = nd.eye(h, ctx=ctx) rightEye = nd.eye(w, ctx=ctx) leftShifts = nd.zeros((h, h, h), ctx=ctx) for i in range(0, h): leftShifts[i] = leftEye shiftY(leftShifts[i], i) rightShifts = nd.zeros((w, w, w)) for i in range(0, w): rightShifts[i] = rightEye shiftX(rightShifts[i], i) self.leftShifts = leftShifts self.rightShifts = rightShifts
def compute_rot(v): """Return the rotationnal matrix M so that M.v = ||v||e1.""" if v[0] >= 0: M = nd.eye(len(v)) else: M = -nd.eye(len(v)) for i in range(1, len(v)): if v[i] == 0: continue rot_minus_theta = nd.eye(len(v)) temp = nd.dot(M, v) theta = nd.arctan(temp[i] / temp[0]) c = nd.cos(theta) s = nd.sin(theta) rot_minus_theta[0, 0] = c rot_minus_theta[i, i] = c rot_minus_theta[0, i] = s rot_minus_theta[i, 0] = -s M = nd.dot(rot_minus_theta, M) return M
def __init__(self, A, in_units, out_units, activation='relu', **kwargs): super().__init__(**kwargs) I = nd.eye(*A.shape) A_hat = A.copy() + I D = nd.sum(A_hat, axis=0) D_inv = D**-0.5 A_hat = D_inv * A_hat * D_inv self.in_units, self.out_units = in_units, out_units with self.name_scope(): self.A_hat = self.params.get_constant('A_hat', A_hat) self.W = self.params.get('W', shape=(self.in_units, self.out_units)) if activation == 'identity': self.activation = lambda X: X else: self.activation = nn.Activation(activation)
def mmd_loss(x, y, ctx_model, t=0.1, kernel='diffusion'): ''' computes the mmd loss with information diffusion kernel :param x: batch_size x latent dimension :param y: :param t: :return: ''' eps = 1e-6 n,d = x.shape if kernel == 'tv': sum_xx = nd.zeros(1, ctx=ctx_model) for i in range(n): for j in range(i+1, n): sum_xx = sum_xx + nd.norm(x[i] - x[j], ord=1) sum_xx = sum_xx / (n * (n-1)) sum_yy = nd.zeros(1, ctx=ctx_model) for i in range(y.shape[0]): for j in range(i+1, y.shape[0]): sum_yy = sum_yy + nd.norm(y[i] - y[j], ord=1) sum_yy = sum_yy / (y.shape[0] * (y.shape[0]-1)) sum_xy = nd.zeros(1, ctx=ctx_model) for i in range(n): for j in range(y.shape[0]): sum_xy = sum_xy + nd.norm(x[i] - y[j], ord=1) sum_yy = sum_yy / (n * y.shape[0]) else: qx = nd.sqrt(nd.clip(x, eps, 1)) qy = nd.sqrt(nd.clip(y, eps, 1)) xx = nd.dot(qx, qx, transpose_b=True) yy = nd.dot(qy, qy, transpose_b=True) xy = nd.dot(qx, qy, transpose_b=True) def diffusion_kernel(a, tmpt, dim): # return (4 * np.pi * tmpt)**(-dim / 2) * nd.exp(- nd.square(nd.arccos(a)) / tmpt) return nd.exp(- nd.square(nd.arccos(a)) / tmpt) off_diag = 1 - nd.eye(n, ctx=ctx_model) k_xx = diffusion_kernel(nd.clip(xx, 0, 1-eps), t, d-1) k_yy = diffusion_kernel(nd.clip(yy, 0, 1-eps), t, d-1) k_xy = diffusion_kernel(nd.clip(xy, 0, 1-eps), t, d-1) sum_xx = (k_xx * off_diag).sum() / (n * (n-1)) sum_yy = (k_yy * off_diag).sum() / (n * (n-1)) sum_xy = 2 * k_xy.sum() / (n * n) return sum_xx + sum_yy - sum_xy
def compute_eigenvals(A): A_11 = A[:, :, 0, 0] # (N, P) A_12 = A[:, :, 0, 1] A_13 = A[:, :, 0, 2] A_22 = A[:, :, 1, 1] A_23 = A[:, :, 1, 2] A_33 = A[:, :, 2, 2] I = nd.eye(3) p1 = nd.square(A_12) + nd.square(A_13) + nd.square(A_23) # (N, P) q = (A_11 + A_22 + A_33) / 3 # (N, P) p2 = nd.square(A_11 - q) + nd.square(A_22 - q) + nd.square(A_33 - q) + 2 * p1 # (N, P) p = nd.sqrt(p2 / 6) + 1e-8 # (N, P) N = A.shape[0] q_4d = nd.reshape(q, (N, -1, 1, 1)) # (N, P, 1, 1) p_4d = nd.reshape(p, (N, -1, 1, 1)) B = (1 / p_4d) * (A - q_4d * I) # (N, P, 3, 3) r = nd.clip(compute_determinant(B) / 2, -1, 1) # (N, P) phi = nd.arccos(r) / 3 # (N, P) eig1 = q + 2 * p * nd.cos(phi) # (N, P) eig3 = q + 2 * p * nd.cos(phi + (2 * math.pi / 3)) eig2 = 3 * q - eig1 - eig3 return nd.abs(nd.stack([eig1, eig2, eig3], axis=2)) # (N, P, 3)
def folding_net(cls, symetries, ctx=mx.cpu(0), optimize=False): """NeuralNet generated by a set of symetries. Given the symetries, it computes the net which folds the space according to them. The symetries considered are ONLY the orthogonal reflections accros hyperplanes. Let the equation of the hyperplane be n.x + b = 0. Then the representation of the symetry is the tupple (n, b). One should notice than (-n, -b) represent the same hyperplane. But not the same operation for the NeuralNet : It will fold the space into the subspace pointed by n (where x.n + b >= 0) A list of tupple (n, b) is expected as symetries The optimize parameter allows you to break up the operations in more layers. """ layers = [] #For optimization n = symetries[0][0] weights_rest = nd.eye(len(n)) bias_rest = nd.zeros(len(n)) for n, b in symetries: ROT = compute_rot(n) if optimize: weights_rest = nd.dot(ROT, weights_rest) bias_rest = nd.dot(ROT, bias_rest) else: layers.append( Layer(len(n), len(n), weights=ROT, bias=nd.zeros(len(n)), function=nd.identity, fixed=True, ctx=ctx)) # Symetry for the plane x = -b # n1' = |n1 + b| - b. ni' = ni. function = [nd.relu] * 2 + [nd.identity] * (len(n) - 1) weights = nd.eye(len(n) + 1, len(n), -1) weights[0, 0] = -1 bias = nd.array([-b, b] + [0] * (len(n) - 1)) + nd.dot(weights, bias_rest) weights = nd.dot(weights, weights_rest) layers.append( Layer(len(n) + 1, weights.shape[1], weights=weights, bias=bias, function=function, fixed=True, ctx=ctx)) bias = nd.array([-b] + [0] * (len(n) - 1)) weights = nd.eye(len(n), len(n) + 1, 1) weights[0, 0] = 1 if optimize: weights_rest = nd.dot(ROT.T, weights) bias_rest = nd.dot(ROT.T, bias) else: layers.append( Layer(len(n), len(n) + 1, weights=weights, bias=bias, function=nd.identity, fixed=True, ctx=ctx)) layers.append( Layer(len(n), len(n), weights=ROT.T, bias=nd.zeros(len(n)), function=nd.identity, fixed=True, ctx=ctx)) if optimize: layers.append( Layer(len(n), len(n) + 1, weights=weights_rest, bias=bias_rest, function=nd.identity, fixed=True, ctx=ctx)) sizes = [layers[0].input_size] for layer in layers: sizes.append(layer.output_size) return cls(sizes, layers, ctx=ctx)
from mxnet import autograd, nd from tqdm import tqdm try: from config import (CTX, DATA_PATH, EVAL_BATCH_SIZE, MAX_ANSWER_LENS, RAW_DEV_FILE, RAW_TRAIN_FILE) except ImportError: from .config import (CTX, DATA_PATH, EVAL_BATCH_SIZE, MAX_ANSWER_LENS, RAW_DEV_FILE, RAW_TRAIN_FILE) from data_loader import DataLoader from offical_evaluate import evaluate as offical_eval ctx = CTX[0] ANSWER_MASK_MATRIX = nd.zeros(shape=(1, 1000, 1000), ctx=ctx) for idx in range(MAX_ANSWER_LENS): ANSWER_MASK_MATRIX += nd.eye(N=1000, M=1000, k=idx, ctx=ctx) def evaluate(model, dataset_type='train', ema=None): r"""Evaluate the model on train/dev/test dataset. This function is just an encapsulation of official evaluate function. The official evaluate code can be find in https://rajpurkar.github.io/SQuAD-explorer/ Parameters ---------- dataset_type : string, default 'train' which dataset to evaluate. ema : object or None, default None Whether use the shadow variable to evaluate.
args = get_args() vision = Agent_Location_Classifier() vision.load_parameters(path_to_clf_params) rnn = mdn_rnn(input_dim=7, interface_dim=10, output_dim=4) rnn.load_parameters(path_to_rnn_params) env = Neurosmash.Environment(args) agent = Neurosmash.Agent() end, reward, previous_state = env.reset() n_steps = 30 extr = Background_Extractor(env, agent, args) background = extr.get_background(oned=True) h, c = (nd.zeros((1, rnn.RNN.h_dim)), nd.zeros((1, rnn.RNN.c_dim))) eye = nd.eye(args.move_dim) prev_pred = nd.zeros((1,4)) while end == 0: # Get latent representation from LSTM z = vision(extr.clean_and_reshape(previous_state, args.size)/255) # Make random step a = np.random.randint(0,3) # Take the step and store the reward end, reward, state = env.step(a) # Get the new hidden states by feeding the RNN the previous state and the action action_onehot = eye[None, a] rnn_input = nd.concatenate([z, action_onehot], 1) pz, h, c = rnn(rnn_input, h, c)
activation=activation_func) features.add(feature) in_units = layer_size return features, in_units def build_model(A, X): model = nn.Sequential() with model.name_scope(): features, out_units = bulid_features(A, X) model.add(features) calssifier = LogisticRegressor(out_units) model.add(calssifier) model.initialize(init.Uniform(1)) return model, features X_1 = nd.eye(*A.shape) model_1, features_1 = build_model(A, X_1) model_1(X_1) def train(model, features, X, X_train, y_train, epochs): cross_entropy = gloss.SigmoidBinaryCrossEntropyLoss(from_sigmoid=True) trainer = gluon.Trainer(model.collect_params(), 'sgd', {'learning_rate': 0.0006, 'momentum': 1}) features_representation = [features(X).asnumpy()] for e in range(1, epochs+1): cum_loss = 0.0 cum_preds = [] for i, x in enumerate(X_train):
train_loss+=loss.mean().asscalar() acc.update(preds=output, labels=label) print('epoch: %d, iter: %d, acc: %.4f'%(e,i,acc.get()[1])) out_argmax=output.argmax(axis=1) label=label.reshape((-1,)).asnumpy().astype('int32') out_argmax=out_argmax.reshape((-1,)).asnumpy().astype('int32') for i,j in zip(label,out_argmax): confusion_matrix[i,j]+=1 sum_confusion_matrix=confusion_matrix.sum(axis=1).reshape((-1,1)) confusion_matrix_ratio=confusion_matrix*(1.0/sum_confusion_matrix) evalu_confusion_matrix_ratio,evalu_confusion_matrix=metric_confusion_matrix(val_datait,net4img,cfg.num_classes,ctx) eye_nd=nd.eye(cfg.num_classes,ctx=ctx) train_acc=(confusion_matrix_ratio*eye_nd).sum().asscalar() val_acc=(evalu_confusion_matrix_ratio*eye_nd).sum().asscalar() train_loss_scalar=train_loss/len(train_datait) print('epoch: %d , loss: %.4f , train_acc: %.4f , val_acc: %.4f'%(e,train_loss_scalar,train_acc,val_acc)) with open('evaluate_result.txt','a+') as f: f.write('epoch: %d , loss: %.4f , train_acc: %.4f , val_acc: %.4f\n' %(e,train_loss_scalar,train_acc,val_acc)) f.write('train:\n') f.write(str(confusion_matrix_ratio)+'\n') f.write(str(confusion_matrix)+'\n')