def verify_batch_dot(ashp, bshp, transpose_a, transpose_b): A_np = np.random.uniform(size=ashp) B_np = np.random.uniform(size=bshp) A = nd.array(A_np) B = nd.array(B_np) # org op y = nd.batch_dot(A, B, transpose_a, transpose_b) # rewrite op andims, bndims = len(ashp), len(bshp) assert andims == 3 and bndims == 3, \ "batch_dot currently only support 3D*3D array." + \ "name: (%s), op_name: (%s)" % (name, op_name) if transpose_a: ashp = ashp[:-2] + (ashp[-1], ashp[-2]) axes = tuple(range(andims - 2)) + (andims - 1, andims - 2) A = nd.transpose(A, axes=axes, name=N.n("transpose_a")) if transpose_b: bshp = bshp[:-2] + (bshp[-1], bshp[-2]) bndims = len(bshp) axes = tuple(range(bndims - 2)) + (bndims - 1, bndims - 2) B = nd.transpose(B, axes=axes, name=N.n("transpose_b")) assert ashp[-1] == bshp[1] C, MATRIX_MAXIMUM_SIZE = ashp[-1], 4096 if ashp[-1] <= MATRIX_MAXIMUM_SIZE: op = nd.batch_dot(A, B, name=N.n("batch_dot")) else: C, nodes, step, start = \ ashp[-1], [], MATRIX_MAXIMUM_SIZE, 0 while start < C: stop = min(start + step, C) begin, end = (0, 0, start), (ashp[0], ashp[1], stop) Ak = nd.slice(A, begin=begin, end=end, name=N.n("slice_a")) begin, end = (0, start, 0), (bshp[0], stop, bshp[2]) Bk = nd.slice(B, begin=begin, end=end, name=N.n("slice_b")) tmp = nd.batch_dot(Ak, Bk, name=N.n("batch_dot")) nodes.append(tmp) start += step while len(nodes) > 1: A, B = nodes.pop(0), nodes.pop(0) tmp = nd.elemwise_add(A, B, name=N.n("elemwise_add")) nodes.append(tmp) op = nodes[0] z = op # compare assert z.shape == y.shape zn, zp = get_norm(z) yn, yp = get_norm(y) rn = np.linalg.norm(zp - yp) print(zn, yn, rn)
def fn(rel_id, num_chunks, head, tail, gpu_id, trace=False): # pos node, project to its relation projection = self.projection_emb(rel_id, gpu_id, trace) projection = projection.reshape(-1, self.entity_dim, self.relation_dim) head = head.reshape(-1, 1, self.entity_dim) head = nd.batch_dot(head, projection).squeeze() head = head.reshape(num_chunks, -1, self.relation_dim) projection = projection.reshape(num_chunks, -1, self.entity_dim, self.relation_dim) tail = tail.reshape(num_chunks, -1, 1, self.entity_dim) num_rels = projection.shape[1] num_nnodes = tail.shape[1] tails = [] for i in range(num_chunks): tail_negs = [] for j in range(num_nnodes): tail_neg = tail[i][j] tail_neg = tail_neg.reshape(1, 1, self.entity_dim) tail_neg = nd.broadcast_axis(tail_neg, axis=0, size=num_rels) tail_neg = nd.batch_dot(tail_neg, projection[i]) tail_neg = tail_neg.squeeze(axis=1) tail_negs.append(tail_neg) tail_negs = nd.stack(*tail_negs, axis=1) tails.append(tail_negs) tail = nd.stack(*tails) return head, tail
def prepare(self, g, gpu_id, trace=False): head_ids, tail_ids = g.all_edges(order='eid') projection = self.projection_emb(g.edata['id'], gpu_id, trace) projection = projection.reshape(-1, self.entity_dim, self.relation_dim) head_emb = g.ndata['emb'][head_ids.as_in_context(g.ndata['emb'].context)].expand_dims(axis=-2) tail_emb = g.ndata['emb'][tail_ids.as_in_context(g.ndata['emb'].context)].expand_dims(axis=-2) g.edata['head_emb'] = nd.batch_dot(head_emb, projection).squeeze() g.edata['tail_emb'] = nd.batch_dot(tail_emb, projection).squeeze()
def forward(self, x): # input X is a 3D feature map self.P = F.batch_dot( F.broadcast_to(self.weight.data(), shape=(self.gram.shape)), self.gram.data()) return F.batch_dot( F.SwapAxis(self.P, 1, 2).broadcast_to( (x.shape[0], self.C, self.C)), x.reshape((0, 0, x.shape[2] * x.shape[3]))).reshape(x.shape)
def bilinear(x, W, y, input_size, seq_len, batch_size, num_outputs=1, bias_x=False, bias_y=False): """ Do xWy :param x: (input_size x seq_len) x batch_size :param W: (num_outputs x ny) x nx :param y: (input_size x seq_len) x batch_size :param input_size: input dimension :param seq_len: sequence length :param batch_size: batch size :param num_outputs: number of outputs :param bias_x: whether concat bias vector to input x :param bias_y: whether concat bias vector to input y :return: [seq_len_y x seq_len_x if output_size == 1 else seq_len_y x num_outputs x seq_len_x] x batch_size """ if bias_x: x = nd.concat(x, nd.ones((1, seq_len, batch_size)), dim=0) if bias_y: y = nd.concat(y, nd.ones((1, seq_len, batch_size)), dim=0) nx, ny = input_size + bias_x, input_size + bias_y # W: (num_outputs x ny) x nx lin = nd.dot(W, x) if num_outputs > 1: lin = reshape_fortran(lin, (ny, num_outputs * seq_len, batch_size)) y = y.transpose([2, 1, 0]) # May cause performance issues lin = lin.transpose([2, 1, 0]) blin = nd.batch_dot(lin, y, transpose_b=True) blin = blin.transpose([2, 1, 0]) if num_outputs > 1: blin = reshape_fortran(blin, (seq_len, num_outputs, seq_len, batch_size)) return blin
def forward(self, x): #x: 'nwc' #import pdb #pdb.set_trace() x = F.transpose(x, axes=(0, 2, 1)) # (nwc) -> (ncw) X_ = F.batch_dot(self.w1.data(ctx), x) # (n,c,w) -> (n,c,w) # E = dot(X_, W) E = F.batch_dot(X_, self.w.data(ctx)) # (n,c,w) -> (n,c,w) attn_weights = F.softmax(E, axis=2) # (n, c, w) attn_applied = F.elemwise_mul(attn_weights, X_) #(n,c,w) output = self.c.data(ctx) * (attn_applied) + ( 1 - self.c.data(ctx)) * X_ # (n,c,w) output = F.batch_dot(output, self.w2.data(ctx)) + self.b.data( ctx) # (n, c,w) output = F.transpose(output, axes=(0, 2, 1)) # (ncw) -> (nwc) return output
def backward(self, req, out_grad, in_data, out_data, in_grad, aux): data = in_data[0] rois = in_data[1] BS, C, H, W = data.shape N = rois.shape[0] dout = out_grad[0] ddata = nd.zeros_like(data) rois = rois.asnumpy() for i in range(N): roi = rois[i] batch_id = roi[0].astype(np.int64) x1, y1, x2, y2 = roi[1:] * self.spatial_scale x1, y1, x2, y2 = np.floor(x1), np.floor(y1), np.ceil(x2), np.ceil( y2) x1, y1, x2, y2 = np.clip(x1, 0, W), np.clip(y1, 0, H), np.clip( x2, 0, W), np.clip(y2, 0, H) x1, y1, x2, y2 = x1.astype(np.int64), y1.astype( np.int64), x2.astype(np.int64), y2.astype(np.int64) if x1 >= x2 or y1 >= y2: continue h = y2 - y1 w = x2 - x1 # (C, h, w) roi_data = data[batch_id, :, y1:y2, x1:x2] # (h*w, C) roi_data = roi_data.reshape((C, -1)).transpose((1, 0)) # (h*w, C, 1) roi_data = roi_data.reshape((0, 0, 1)) # (h*w, C, C) out_product = nd.batch_dot(roi_data, roi_data.transpose((0, 2, 1))) # (C, C) if self.type == "max": reduce_product = nd.max(out_product, axis=0) max_mask = out_product == reduce_product # max_index = nd.argmax(out_product, axis=0) # max_index = max_index.reshape((C * C)) # d_max = nd.eye(h*w)[max_index].transpose((1, 0)).reshape((h*w, C, C)) dout_product = nd.stack(*[dout[i] for _ in range(h * w)]) * max_mask elif self.type == "mean": dout_product = nd.stack(*[dout[i] for _ in range(h * w)]) / (h * w) else: raise NotImplementedError() droi_data = [] for j in range(C): droi_data.append( nd.sum(dout_product[:, j, :] * roi_data[:, :, 0], axis=1) + nd.sum(dout_product[:, :, j] * roi_data[:, :, 0], axis=1)) droi_data = nd.stack(*droi_data, axis=1) # (hw, C) droi_data = droi_data.transpose((1, 0)).reshape((C, h, w)) ddata[batch_id, :, y1:y2, x1:x2] = droi_data self.assign(in_grad[0], req[0], ddata) self.assign(in_grad[1], req[1], nd.zeros_like(in_data[1]))
def get_gram(self, feature): """ 计算features 的 gram矩阵 :param features: :return: """ (b, ch, h, w) = feature.shape features = feature.reshape((b, ch, w * h)) gram = nd.batch_dot(features, features, transpose_b=True) return gram
def bilinear(x, W, y, input_size, seq_len, batch_size, num_outputs=1, bias_x=False, bias_y=False): """Do xWy Parameters ---------- x : NDArray (input_size x seq_len) x batch_size W : NDArray (num_outputs x ny) x nx y : NDArray (input_size x seq_len) x batch_size input_size : int input dimension seq_len : int sequence length batch_size : int batch size num_outputs : int number of outputs bias_x : bool whether concat bias vector to input x bias_y : bool whether concat bias vector to input y Returns ------- output : NDArray [seq_len_y x seq_len_x if output_size == 1 else seq_len_y x num_outputs x seq_len_x] x batch_size """ if bias_x: x = nd.concat(x, nd.ones((1, seq_len, batch_size)), dim=0) if bias_y: y = nd.concat(y, nd.ones((1, seq_len, batch_size)), dim=0) nx, ny = input_size + bias_x, input_size + bias_y # W: (num_outputs x ny) x nx lin = nd.dot(W, x) if num_outputs > 1: lin = reshape_fortran(lin, (ny, num_outputs * seq_len, batch_size)) y = y.transpose([2, 1, 0]) # May cause performance issues lin = lin.transpose([2, 1, 0]) blin = nd.batch_dot(lin, y, transpose_b=True) blin = blin.transpose([2, 1, 0]) if num_outputs > 1: blin = reshape_fortran(blin, (seq_len, num_outputs, seq_len, batch_size)) return blin
def hybrid_forward(self, F, X, gram, weight): self.P = F.batch_dot(F.broadcast_to(weight, shape=(self.gram.shape)), gram) if not isinstance(X, mx.symbol.Symbol): return F.batch_dot( F.SwapAxis(self.P, 1, 2).broadcast_to( (X.shape[0], self.C, self.C)), X.reshape((0, 0, X.shape[2] * X.shape[3]))).reshape(X.shape) else: width = X.slice_axis(axis=2, begin=0, end=0) width = width.ones_like() width = width.sum() height = X.slice_axis(axis=3, begin=0, end=0) height = width.ones_like() height = width.sum() print "width", width print "height", height arg_shapes, out_shapes, aux_shapes = X.infer_shape_partial( data=(1, 3, self.width, self.height)) #1 , RGB, Width, Height return F.batch_dot( F.SwapAxis(self.P, 1, 2).broadcast_to( (out_shapes[0][0], self.C, self.C)), X.reshape((0, 0, out_shapes[0][2] * out_shapes[0][3]))).reshape(out_shapes[0])
def bilinear_roi_pooling(data, rois, spatial_scale, type="max"): """ :param data: (BS, C, H, W) :param rois: (N, 5) :param spatial_scale: float :param type: :return: """ assert isinstance(spatial_scale, float) BS, C, H, W = data.shape N = rois.shape[0] out_data = [] rois = rois.asnumpy() for i in range(N): roi = rois[i] batch_id = roi[0].astype(np.int64) x1, y1, x2, y2 = roi[1:] * spatial_scale x1, y1, x2, y2 = np.floor(x1), np.floor(y1), np.ceil(x2), np.ceil(y2) x1, y1, x2, y2 = np.clip(x1, 0, W), np.clip(y1, 0, H), np.clip(x2, 0, W), np.clip(y2, 0, H) x1, y1, x2, y2 = x1.astype(np.int64), y1.astype(np.int64), x2.astype( np.int64), y2.astype(np.int64) if x1 >= x2 or y1 >= y2: out_data.append( nd.zeros((C, C), ctx=data.context, dtype=data.dtype)) continue # (C, h, w) roi_data = data[batch_id, :, y1:y2, x1:x2] # (h*w, C) roi_data = roi_data.reshape((C, -1)).transpose((1, 0)) # (h*w, C, 1) roi_data = roi_data.reshape((0, 0, 1)) # (h*w, C, C) out_product = nd.batch_dot(roi_data, roi_data.transpose((0, 2, 1))) # (C, C) if type == "max": reduce_product = nd.max(out_product, axis=0) elif type == "mean": reduce_product = nd.mean(out_product, axis=0) else: raise NotImplementedError() out_data.append(reduce_product) out_data = nd.stack(*out_data) return out_data
def bilinear(x, W, y, input_size, seq_len, batch_size, num_outputs=1, bias_x=False, bias_y=False): """Do xWy Parameters ---------- x : NDArray (input_size x seq_len) x batch_size W : NDArray (num_outputs x ny) x nx y : NDArray (input_size x seq_len) x batch_size input_size : int input dimension seq_len : int sequence length batch_size : int batch size num_outputs : int number of outputs bias_x : bool whether concat bias vector to input x bias_y : bool whether concat bias vector to input y Returns ------- output : NDArray [seq_len_y x seq_len_x if output_size == 1 else seq_len_y x num_outputs x seq_len_x] x batch_size """ if bias_x: x = nd.concat(x, nd.ones((1, seq_len, batch_size)), dim=0) if bias_y: y = nd.concat(y, nd.ones((1, seq_len, batch_size)), dim=0) nx, ny = input_size + bias_x, input_size + bias_y # W: (num_outputs x ny) x nx lin = nd.dot(W, x) if num_outputs > 1: lin = reshape_fortran(lin, (ny, num_outputs * seq_len, batch_size)) y = y.transpose([2, 1, 0]) # May cause performance issues lin = lin.transpose([2, 1, 0]) blin = nd.batch_dot(lin, y, transpose_b=True) blin = blin.transpose([2, 1, 0]) if num_outputs > 1: blin = reshape_fortran(blin, (seq_len, num_outputs, seq_len, batch_size)) return blin
def bilinear(x, W, y, input_size, seq_len, batch_size, num_outputs=1, bias_x=False, bias_y=False): """ Do xWy :param x: (input_size x seq_len) x batch_size :param W: :param y: (input_size x seq_len) x batch_size :param input_size: :param seq_len: :param batch_size: :param num_outputs: :param bias_x: :param bias_y: :return: [seq_len_y x seq_len_x if output_size == 1 else seq_len_y x num_outputs x seq_len_x] x batch_size """ if bias_x: x = nd.concat(x, nd.ones((1, seq_len, batch_size)), dim=0) if bias_y: y = nd.concat(y, nd.ones((1, seq_len, batch_size)), dim=0) nx, ny = input_size + bias_x, input_size + bias_y # W: (num_outputs x ny) x nx lin = nd.dot(W, x) if num_outputs > 1: lin = reshape_fortran(lin, (ny, num_outputs * seq_len, batch_size)) y = y.transpose([2, 1, 0]) lin = lin.transpose([2, 1, 0]) blin = nd.batch_dot(lin, y, transpose_b=True) blin = blin.transpose([2, 1, 0]) if num_outputs > 1: blin = reshape_fortran(blin, (seq_len, num_outputs, seq_len, batch_size)) return blin
def forward(self, input, hidden, encoder_outputs): #input shape, (1,) embedded = self.embedding(input) if self.dropout_p > 0: embedded = self.dropout(embedded) attn_weights = F.softmax( self.attn(F.concat(embedded, hidden[0].flatten(), dim=1))) attn_applied = F.batch_dot(attn_weights.expand_dims(0), encoder_outputs.expand_dims(0)) output = F.concat(embedded.flatten(), attn_applied.flatten(), dim=1) output = self.attn_combine(output).expand_dims(0) for i in range(self.n_layers): output = F.relu(output) output, hidden = self.gru(output, hidden) output = self.out(output) return output, hidden, attn_weights
def forward(self, input, hidden, encoder_outputs): #input shape, (1,) embedded = self.embedding(input) if self.dropout_p > 0: embedded = self.dropout(embedded) attn_weights = F.softmax( self.attn(F.concat(embedded, hidden[0].flatten(), dim=1))) attn_applied = F.batch_dot(attn_weights.expand_dims(0), encoder_outputs.expand_dims(0)) output = F.concat(embedded.flatten(), attn_applied.flatten(), dim=1) output = self.attn_combine(output).expand_dims(0) for i in range(self.n_layers): output = F.relu(output) output, hidden = self.gru(output, hidden) output = self.out(output) return output, hidden, attn_weights
def forward(self, is_train, req, in_data, out_data, aux): data = in_data[0] rois = in_data[1] BS, C, H, W = data.shape N = rois.shape[0] out = [] rois = rois.asnumpy() for i in range(N): roi = rois[i] batch_id = roi[0].astype(np.int64) x1, y1, x2, y2 = roi[1:] * self.spatial_scale x1, y1, x2, y2 = np.floor(x1), np.floor(y1), np.ceil(x2), np.ceil( y2) x1, y1, x2, y2 = np.clip(x1, 0, W), np.clip(y1, 0, H), np.clip( x2, 0, W), np.clip(y2, 0, H) x1, y1, x2, y2 = x1.astype(np.int64), y1.astype( np.int64), x2.astype(np.int64), y2.astype(np.int64) if x1 >= x2 or y1 >= y2: out.append(nd.zeros((C, C), ctx=data.context, dtype=data.dtype)) continue # (C, h, w) roi_data = data[batch_id, :, y1:y2, x1:x2] # (h*w, C) roi_data = roi_data.reshape((C, -1)).transpose((1, 0)) # (h*w, C, 1) roi_data = roi_data.reshape((0, 0, 1)) # (h*w, C, C) out_product = nd.batch_dot(roi_data, roi_data.transpose((0, 2, 1))) if self.type == "max": reduce_product = nd.max(out_product, axis=0) elif self.type == "mean": reduce_product = nd.mean(out_product, axis=0) else: raise NotImplementedError() out.append(reduce_product) out = nd.stack(*out) self.assign(out_data[0], req[0], out)
def gram_matrix(y): (b, ch, h, w) = y.shape features = y.reshape((b, ch, w * h)) gram = nd.batch_dot(features, features, transpose_b=True) / (h * w) return gram
def hybrid_forward(self, F, X): # (batch_size, num_channel_prev, h, w, dim_vector) # -->(batch_size,num_capsule_prev,1,1,dim_vector) X = X.reshape((0, -1, 1, 1, 0)) self.num_capsules_prev = X.shape[1] self.batch_size = X.shape[0] # (batch_size,num_capsule_prev,out_channels,1,dim_vector) X_tile = nd.tile(X, reps=(1, 1, self.out_channels, 1, 1)) if self.routing_weight_initial: self.routing_weight = nd.random_normal( shape=(1, self.num_capsules_prev, self.out_channels, self.dim_input_vector, self.dim_vector), name='routing_weight').as_in_context(mx.gpu(0)) self.routing_weight_initial = False # (batch_size,num_capsule_prev,out_channels,dim_input_vector,dim_vector) # (64, 1152, 10, 8, 16) W_tile = nd.tile(self.routing_weight, reps=(self.batch_size, 1, 1, 1, 1)) linear_combination_3d = nd.batch_dot( X_tile.reshape((-1, X_tile.shape[-2], X_tile.shape[-1])), W_tile.reshape((-1, W_tile.shape[-2], W_tile.shape[-1]))) # (64, 1152, 10, 1, 16) linear_combination = linear_combination_3d.reshape( (self.batch_size, self.num_capsules_prev, self.out_channels, 1, self.dim_vector)) # b_ij (1, 1152, 10, 1, 1) priors = nd.zeros((1, self.num_capsules_prev, self.out_channels, 1, 1)) ############################################################################ ## Rounting ## ############################################################################ for iter_index in range(self.num_routing_iter): # NOTE: RoutingAlgorithm-line 4 # b_ij (1, 1152, 10, 1, 1) softmax_prior = nd.softmax(priors, axis=2) # on num_capsule dimension # NOTE: RoutingAlgorithm-line 5 # (64, 1152, 10, 1, 16) # output = torch.mul(softmax_prior, linear_combination) output = softmax_prior * linear_combination # (64, 1, 10, 1, 16) output_sum = output.sum(axis=1, keepdims=True) # s_J # NOTE: RoutingAlgorithm-line 6 # (64, 1, 10, 1, 16) output_squashed = self.squash(output_sum) # v_J # NOTE: RoutingAlgorithm-line 7 # (64, 1152, 10, 1, 16) output_tile = nd.tile(output_squashed, reps=(1, self.num_capsules_prev, 1, 1, 1)) # (64, 1152, 10, 1, 16) x (64, 1152, 10, 1, 16) (transpose on last two axis) # ==> (64, 1152, 10, 1, 1) U_times_v = nd.batch_dot(linear_combination.reshape( (-1, 1, self.dim_vector)), output_tile.reshape( (-1, 1, self.dim_vector)), transpose_b=True) U_times_v = U_times_v.reshape( (self.batch_size, self.num_capsules_prev, self.out_channels, 1, 1)) priors = priors + U_times_v.sum(axis=0).expand_dims(axis=0) return output_squashed # v_J
def debug_bilinear(x, W, y, input_size, seq_len, batch_size, num_outputs=1, bias_x=False, bias_y=False): """ Do xWy :param x: (input_size x seq_len) x batch_size :param W: :param y: (input_size x seq_len) x batch_size :param input_size: :param seq_len: :param batch_size: :param num_outputs: :param bias_x: :param bias_y: :return: [seq_len_y x seq_len_x if output_size == 1 else seq_len_y x num_outputs x seq_len_x] x batch_size """ import dynet as dy xd = dy.inputTensor(x, batched=True) xm = nd.array(x) yd = dy.inputTensor(y, batched=True) ym = nd.array(y) Wd = dy.inputTensor(W) Wm = nd.array(W) def allclose(dyarray, mxarray): a = dyarray.npvalue() b = mxarray.asnumpy() return np.allclose(a, b) if bias_x: xd = dy.concatenate( [xd, dy.inputTensor(np.ones((1, seq_len), dtype=np.float32))]) xm = nd.concat(xm, nd.ones((1, seq_len, batch_size)), dim=0) # print(allclose(xd, xm)) if bias_y: yd = dy.concatenate( [yd, dy.inputTensor(np.ones((1, seq_len), dtype=np.float32))]) ym = nd.concat(ym, nd.ones((1, seq_len, batch_size)), dim=0) # print(allclose(yd, ym)) nx, ny = input_size + bias_x, input_size + bias_y # W: (num_outputs x ny) x nx lind = Wd * xd linm = nd.dot(Wm, xm) # print(allclose(lind, linm)) if num_outputs > 1: lind = dy.reshape(lind, (ny, num_outputs * seq_len), batch_size=batch_size) # linm = nd.reshape(linm, (ny, num_outputs * seq_len, batch_size)) linm = reshape_fortran(linm, (ny, num_outputs * seq_len, batch_size)) # print(allclose(lind, linm)) blind = dy.transpose(yd) * lind ym = ym.transpose([2, 1, 0]) linm = linm.transpose([2, 1, 0]) blinm = nd.batch_dot(linm, ym, transpose_b=True) blinm = blinm.transpose([2, 1, 0]) print(np.allclose(blind.npvalue(), blinm.asnumpy())) if num_outputs > 1: blind = dy.reshape(blind, (seq_len, num_outputs, seq_len), batch_size=batch_size) blinm = reshape_fortran(blinm, (seq_len, num_outputs, seq_len, batch_size)) print(allclose(blind, blinm)) return blind
def gram_matrix(y): (b, ch, h, w) = y.shape features = y.reshape((b, ch, w * h)) # features_t = F.SwapAxis(features,1, 2) gram = F.batch_dot(features, features, transpose_b=True) / (ch * h * w) return gram
def forward(self, X): # input X is a 3D feature map self.P = F.batch_dot(F.broadcast_to(self.weight.data(), shape=(self.gram.shape)), self.gram.data()) return F.batch_dot(F.SwapAxis(self.P,1,2).broadcast_to((X.shape[0], self.C, self.C)), X.reshape((0,0,X.shape[2]*X.shape[3]))).reshape(X.shape)
def gram_matrix(y): (b, ch, h, w) = y.shape features = y.reshape((b, ch, w * h)) #features_t = F.SwapAxis(features,1, 2) gram = F.batch_dot(features, features, transpose_b=True) / (ch * h * w) return gram
def msg_func(edges): w = weight[edges.data['type']] msg = F.batch_dot(edges.src['h'].expand_dims(1), w).reshape(-1, self.out_feat) msg = msg * edges.data['norm'] return {'msg': msg}