def test_edge_softmax(g, norm_by, shp, idtype): g = g.astype(idtype).to(F.ctx()) edata = F.tensor(np.random.rand(g.number_of_edges(), *shp)) e1 = F.attach_grad(F.clone(edata)) with F.record_grad(): score1 = edge_softmax(g, e1, norm_by=norm_by) F.backward(F.reduce_sum(score1)) grad_edata = F.grad(e1) with F.record_grad(): e2 = F.attach_grad(F.clone(edata)) e2_2d = F.reshape( e2, (g.number_of_src_nodes(), g.number_of_dst_nodes(), *e2.shape[1:])) if norm_by == 'src': score2 = F.softmax(e2_2d, 1) score2 = F.reshape(score2, (-1, *e2.shape[1:])) if norm_by == 'dst': score2 = F.softmax(e2_2d, 0) score2 = F.reshape(score2, (-1, *e2.shape[1:])) assert F.allclose(score1, score2) print('forward passed') F.backward(F.reduce_sum(score2)) assert F.allclose(F.grad(e2), grad_edata) print('backward passed')
def test_softmax_edges(): # test#1: basic g0 = dgl.DGLGraph(nx.path_graph(10)) feat0 = F.randn((g0.number_of_edges(), 10)) g0.edata['x'] = feat0 ground_truth = F.softmax(feat0, dim=0) assert F.allclose(dgl.softmax_edges(g0, 'x'), ground_truth) g0.edata.pop('x') # test#2: batched graph g1 = dgl.DGLGraph(nx.path_graph(5)) g2 = dgl.DGLGraph(nx.path_graph(3)) g3 = dgl.DGLGraph() g4 = dgl.DGLGraph(nx.path_graph(10)) bg = dgl.batch([g0, g1, g2, g3, g4]) feat1 = F.randn((g1.number_of_edges(), 10)) feat2 = F.randn((g2.number_of_edges(), 10)) feat4 = F.randn((g4.number_of_edges(), 10)) bg.edata['x'] = F.cat([feat0, feat1, feat2, feat4], 0) ground_truth = F.cat([ F.softmax(feat0, 0), F.softmax(feat1, 0), F.softmax(feat2, 0), F.softmax(feat4, 0) ], 0) assert F.allclose(dgl.softmax_edges(bg, 'x'), ground_truth)
def test_edge_softmax(): # Basic g = dgl.DGLGraph(nx.path_graph(3)) edata = F.ones((g.number_of_edges(), 1)) a = nn.edge_softmax(g, edata) assert len(g.ndata) == 0 assert len(g.edata) == 0 assert F.allclose(a, uniform_attention(g, a.shape)) # Test higher dimension case edata = F.ones((g.number_of_edges(), 3, 1)) a = nn.edge_softmax(g, edata) assert len(g.ndata) == 0 assert len(g.edata) == 0 assert F.allclose(a, uniform_attention(g, a.shape)) # Test both forward and backward with PyTorch built-in softmax. g = dgl.DGLGraph() g.add_nodes(30) # build a complete graph for i in range(30): for j in range(30): g.add_edge(i, j) score = F.randn((900, 1)) score.requires_grad_() grad = F.randn((900, 1)) y = F.softmax(score.view(30, 30), dim=0).view(-1, 1) y.backward(grad) grad_score = score.grad score.grad.zero_() y_dgl = nn.edge_softmax(g, score) assert len(g.ndata) == 0 assert len(g.edata) == 0 # check forward assert F.allclose(y_dgl, y) y_dgl.backward(grad) # checkout gradient assert F.allclose(score.grad, grad_score) print(score.grad[:10], grad_score[:10]) # Test 2 def generate_rand_graph(n): arr = (sp.sparse.random(n, n, density=0.1, format='coo') != 0).astype(np.int64) return dgl.DGLGraph(arr, readonly=True) g = generate_rand_graph(50) a1 = F.randn((g.number_of_edges(), 1)).requires_grad_() a2 = a1.clone().detach().requires_grad_() g.edata['s'] = a1 g.group_apply_edges('dst', lambda edges: {'ss':F.softmax(edges.data['s'], 1)}) g.edata['ss'].sum().backward() builtin_sm = nn.edge_softmax(g, a2) builtin_sm.sum().backward() print(a1.grad - a2.grad) assert len(g.ndata) == 0 assert len(g.edata) == 2 assert F.allclose(a1.grad, a2.grad, rtol=1e-4, atol=1e-4) # Follow tolerance in unittest backend
def softmax(x): #assert x.ndim==2 # todo if x.ndim == 2: return K.softmax(x) if x.ndim > 2: shape = x.shape x_flatten = x.reshape((K.prod(shape[0:-1]), shape[-1])) return K.softmax(x_flatten).reshape(shape)
def softmax(x): """x can be tensor """ if x.ndim == 2: return K.softmax(x) if x.ndim > 2: shape = x.shape x_flatten = x.reshape((K.prod(shape[0:-1]), shape[-1])) return K.softmax(x_flatten).reshape(shape)
def softmax( x ): """x can be tensor """ if x.ndim==2: return K.softmax( x ) if x.ndim>2: shape = x.shape x_flatten = x.reshape( ( K.prod(shape[0:-1]), shape[-1] ) ) return K.softmax( x_flatten ).reshape( shape )
def _test(group_by): g.group_apply_edges(group_by=group_by, func=edge_udf) if group_by == 'src': u, v, eid = g.out_edges(1, form='all') else: u, v, eid = g.in_edges(5, form='all') out_feat = g.edges[eid].data['norm_feat'] result = (g.nodes[u].data['h'] + g.nodes[v].data['h']) * g.edges[eid].data['feat'] result = F.softmax(F.sum(result, dim=1), dim=0) assert F.allclose(out_feat, result)
def test_softmax(g, idtype): g = g.astype(idtype).to(F.ctx()) g.ndata['h'] = F.randn((g.number_of_nodes(), 3)) g.edata['h'] = F.randn((g.number_of_edges(), 2)) # Test.1: node readout x = dgl.softmax_nodes(g, 'h') subg = dgl.unbatch(g) subx = [] for sg in subg: subx.append(F.softmax(sg.ndata['h'], dim=0)) assert F.allclose(x, F.cat(subx, dim=0)) # Test.2: edge readout x = dgl.softmax_edges(g, 'h') subg = dgl.unbatch(g) subx = [] for sg in subg: subx.append(F.softmax(sg.edata['h'], dim=0)) assert F.allclose(x, F.cat(subx, dim=0))
def softmax(x): ndim = K.ndim(x) if ndim == 2: return K.softmax(x) elif ndim == 3: e = K.exp(x - K.max(x, axis=-1, keepdims=True)) s = K.sum(e, axis=-1, keepdims=True) return e / s else: raise ValueError('Cannot apply softmax to a tensor ' 'that is not 2D or 3D. ' 'Here, ndim=' + str(ndim))
def test_edge_softmax2(idtype, g): g = g.astype(idtype).to(F.ctx()) g = g.local_var() g.srcdata.clear() g.dstdata.clear() g.edata.clear() a1 = F.randn((g.number_of_edges(), 1)).requires_grad_() a2 = a1.clone().detach().requires_grad_() g.edata['s'] = a1 g.group_apply_edges('dst', lambda edges: {'ss':F.softmax(edges.data['s'], 1)}) g.edata['ss'].sum().backward() builtin_sm = nn.edge_softmax(g, a2) builtin_sm.sum().backward() #print(a1.grad - a2.grad) assert len(g.srcdata) == 0 assert len(g.dstdata) == 0 assert len(g.edata) == 2 assert F.allclose(a1.grad, a2.grad, rtol=1e-4, atol=1e-4) # Follow tolerance in unittest backend """
def test_edge_softmax(idtype): # Basic g = dgl.graph(nx.path_graph(3)) g = g.astype(idtype).to(F.ctx()) edata = F.ones((g.number_of_edges(), 1)) a = nn.edge_softmax(g, edata) assert len(g.ndata) == 0 assert len(g.edata) == 0 assert F.allclose(a, uniform_attention(g, a.shape)) # Test higher dimension case edata = F.ones((g.number_of_edges(), 3, 1)) a = nn.edge_softmax(g, edata) assert len(g.ndata) == 0 assert len(g.edata) == 0 assert F.allclose(a, uniform_attention(g, a.shape)) # Test both forward and backward with PyTorch built-in softmax. g = dgl.rand_graph(30, 900) g = g.astype(idtype).to(F.ctx()) score = F.randn((900, 1)) score.requires_grad_() grad = F.randn((900, 1)) y = F.softmax(score.view(30, 30), dim=0).view(-1, 1) y.backward(grad) grad_score = score.grad score.grad.zero_() y_dgl = nn.edge_softmax(g, score) assert len(g.ndata) == 0 assert len(g.edata) == 0 # check forward assert F.allclose(y_dgl, y) y_dgl.backward(grad) # checkout gradient assert F.allclose(score.grad, grad_score) print(score.grad[:10], grad_score[:10])
def edge_udf(edges): h = F.sum(edges.data['feat'] * (edges.src['h'] + edges.dst['h']), dim=2) normalized_feat = F.softmax(h, dim=1) return {"norm_feat": normalized_feat}
def test_edge_softmax(): # Basic g = dgl.DGLGraph(nx.path_graph(3)).to(F.ctx()) edata = F.ones((g.number_of_edges(), 1)) a = nn.edge_softmax(g, edata) assert len(g.ndata) == 0 assert len(g.edata) == 0 assert F.allclose(a, uniform_attention(g, a.shape)) # Test higher dimension case edata = F.ones((g.number_of_edges(), 3, 1)) a = nn.edge_softmax(g, edata) assert len(g.ndata) == 0 assert len(g.edata) == 0 assert F.allclose(a, uniform_attention(g, a.shape)) # Test both forward and backward with Tensorflow built-in softmax. g = dgl.DGLGraph().to(F.ctx()) g.add_nodes(30) # build a complete graph for i in range(30): for j in range(30): g.add_edge(i, j) score = F.randn((900, 1)) with tf.GradientTape() as tape: tape.watch(score) grad = F.randn((900, 1)) y = tf.reshape(F.softmax(tf.reshape(score, (30, 30)), dim=0), (-1, 1)) grads = tape.gradient(y, [score]) grad_score = grads[0] with tf.GradientTape() as tape: tape.watch(score) y_dgl = nn.edge_softmax(g, score) assert len(g.ndata) == 0 assert len(g.edata) == 0 # check forward assert F.allclose(y_dgl, y) grads = tape.gradient(y_dgl, [score]) # checkout gradient assert F.allclose(grads[0], grad_score) print(grads[0][:10], grad_score[:10]) # Test 2 def generate_rand_graph(n): arr = (sp.sparse.random(n, n, density=0.1, format='coo') != 0).astype( np.int64) return dgl.DGLGraph(arr, readonly=True) g = generate_rand_graph(50).to(F.ctx()) a1 = F.randn((g.number_of_edges(), 1)) a2 = tf.identity(a1) with tf.GradientTape() as tape: tape.watch(a1) g.edata['s'] = a1 g.group_apply_edges( 'dst', lambda edges: {'ss': F.softmax(edges.data['s'], 1)}) loss = tf.reduce_sum(g.edata['ss']) a1_grad = tape.gradient(loss, [a1])[0] with tf.GradientTape() as tape: tape.watch(a2) builtin_sm = nn.edge_softmax(g, a2) loss = tf.reduce_sum(builtin_sm) a2_grad = tape.gradient(loss, [a2])[0] print(a1_grad - a2_grad) assert len(g.ndata) == 0 assert len(g.edata) == 2 assert F.allclose(a1_grad, a2_grad, rtol=1e-4, atol=1e-4) # Follow tolerance in unittest backend