def test_spmm(idtype, g, shp, msg, reducer): g = g.astype(idtype).to(F.ctx()) print(g) print(g.idtype) hu = F.tensor(np.random.rand(*((g.number_of_src_nodes(), ) + shp[0])) + 1) he = F.tensor(np.random.rand(*((g.number_of_edges(), ) + shp[1])) + 1) print('u shape: {}, e shape: {}'.format(F.shape(hu), F.shape(he))) g.srcdata['x'] = F.attach_grad(F.clone(hu)) g.edata['w'] = F.attach_grad(F.clone(he)) print('SpMM(message func: {}, reduce func: {})'.format(msg, reducer)) u = F.attach_grad(F.clone(hu)) e = F.attach_grad(F.clone(he)) with F.record_grad(): v = gspmm(g, msg, reducer, u, e) if reducer in ['max', 'min']: v = F.replace_inf_with_zero(v) if g.number_of_edges() > 0: F.backward(F.reduce_sum(v)) if msg != 'copy_rhs': grad_u = F.grad(u) if msg != 'copy_lhs': grad_e = F.grad(e) with F.record_grad(): g.update_all(udf_msg[msg], udf_reduce[reducer]) if g.number_of_edges() > 0: v1 = g.dstdata['v'] assert F.allclose(v, v1) print('forward passed') F.backward(F.reduce_sum(v1)) if msg != 'copy_rhs': if reducer in ['min', 'max']: # there might be some numerical errors rate = F.reduce_sum(F.abs(F.grad(g.srcdata['x']) - grad_u)) /\ F.reduce_sum(F.abs(grad_u)) assert F.as_scalar(rate) < 1e-2, rate else: assert F.allclose(F.grad(g.srcdata['x']), grad_u) if msg != 'copy_lhs': if reducer in ['min', 'max']: rate = F.reduce_sum(F.abs(F.grad(g.edata['w']) - grad_e)) /\ F.reduce_sum(F.abs(grad_e)) assert F.as_scalar(rate) < 1e-2, rate else: assert F.allclose(F.grad(g.edata['w']), grad_e) print('backward passed') g.srcdata.pop('x') g.edata.pop('w') if 'v' in g.dstdata: g.dstdata.pop('v')
def get_optimization_func(self, target_dim_list, loss_func, optimizer, clip): """Compile and return optimization function. Args: target_dim_list: list of integars. targets' dimension. e.g. target_dim_list=[2] loss_func: string | function. optimizer: object. clip: None | real value. Return: optimization function. """ # set gt nodes self.set_gt_nodes(target_dim_list) # Default loss if type(loss_func) is str: assert len( self.out_nodes_ ) == 1, "If the number of out_layers > 1, you need define your own loss_func!" loss_node = obj.get(loss_func)(self.out_nodes_[0], self.gt_nodes_[0]) # User defined loss else: loss_node = loss_func(self) # Compute gradient gparams = K.grad(loss_node + self.reg_value_, self.params_) # Clip gradient if clip is not None: gparams = [K.clip(gparam, -clip, clip) for gparam in gparams] # Gradient based optimization param_updates = optimizer.get_updates(self.params_, gparams) # Get all updates updates = param_updates + self.inner_updates_ # Compile model inputs = self.in_nodes_ + self.gt_nodes_ + [K.common_tr_phase_node] outputs = [loss_node] f = K.function_no_given(inputs, outputs, updates) return f
def test_csrsum_backward(idtype, dtype, nelems): a, A = _random_simple_graph(idtype, dtype, F.ctx(), 3, 4, 6, 'A', 'B', 'AB') b, B = _random_simple_graph(idtype, dtype, F.ctx(), 3, 4, 6, 'A', 'B', 'AB') A_row, A_col = A.edges(order='eid') B_row, B_col = B.edges(order='eid') A_row = F.asnumpy(A_row) A_col = F.asnumpy(A_col) B_row = F.asnumpy(B_row) B_col = F.asnumpy(B_col) a_dense = F.attach_grad(F.tensor(a.todense(), dtype=dtype)) b_dense = F.attach_grad(F.tensor(b.todense(), dtype=dtype)) A.edata['w'] = F.attach_grad(A.edata['w']) B.edata['w'] = F.attach_grad(B.edata['w']) with F.record_grad(): if nelems == 2: # Test for two element case C = dgl.adj_sum_graph([A, B], 'w') assert C.canonical_etypes == A.canonical_etypes C_dense = np.zeros((3, 4)) C_row, C_col = C.edges(order='eid') C_row = F.asnumpy(C_row) C_col = F.asnumpy(C_col) C_dense[C_row, C_col] = F.asnumpy(C.edata['w']) c_dense = a_dense + b_dense assert np.allclose(C_dense, F.asnumpy(c_dense), rtol=1e-4, atol=1e-4) F.backward(F.reduce_sum(C.edata['w']) + F.reduce_sum(c_dense)) a_dense_grad = F.asnumpy(F.grad(a_dense))[A_row, A_col] b_dense_grad = F.asnumpy(F.grad(b_dense))[B_row, B_col] A_spspmm_grad = F.asnumpy(F.grad(A.edata['w'])) B_spspmm_grad = F.asnumpy(F.grad(B.edata['w'])) assert np.allclose(a_dense_grad, A_spspmm_grad, rtol=1e-4, atol=1e-4) assert np.allclose(b_dense_grad, B_spspmm_grad, rtol=1e-4, atol=1e-4) elif nelems == 1: # Test for single element case C = dgl.adj_sum_graph([A], 'w') assert C.canonical_etypes == A.canonical_etypes C_dense = np.zeros((3, 4)) C_row, C_col = C.edges(order='eid') C_row = F.asnumpy(C_row) C_col = F.asnumpy(C_col) C_dense[C_row, C_col] = F.asnumpy(C.edata['w']) c_dense = a_dense assert np.allclose(C_dense, F.asnumpy(c_dense), rtol=1e-4, atol=1e-4) F.backward(F.reduce_sum(C.edata['w']) + F.reduce_sum(c_dense)) a_dense_grad = F.asnumpy(F.grad(a_dense))[A_row, A_col] A_spspmm_grad = F.asnumpy(F.grad(A.edata['w'])) assert np.allclose(a_dense_grad, A_spspmm_grad, rtol=1e-4, atol=1e-4)
def get_optimization_func(self, target_dim_list, loss_func, optimizer, clip): """Compile and return optimization function. Args: target_dim_list: list of integars. targets' dimension. e.g. target_dim_list=[2] loss_func: string | function. optimizer: object. clip: None | real value. Return: optimization function. """ # set gt nodes self.set_gt_nodes(target_dim_list) # Default loss if type(loss_func) is str: assert len(self.out_nodes_)==1, "If the number of out_layers > 1, you need define your own loss_func!" loss_node = obj.get(loss_func)(self.out_nodes_[0], self.gt_nodes_[0]) # User defined loss else: loss_node = loss_func(self) # Compute gradient gparams = K.grad(loss_node + self.reg_value_, self.params_) # Clip gradient if clip is not None: gparams = [K.clip(gparam, -clip, clip) for gparam in gparams] # Gradient based optimization param_updates = optimizer.get_updates(self.params_, gparams) # Get all updates updates = param_updates + self.inner_updates_ # Compile model inputs = self.in_nodes_ + self.gt_nodes_ + [K.common_tr_phase_node] outputs = [loss_node] f = K.function_no_given(inputs, outputs, updates) return f
def test_spmm(g, shp, msg, reducer, index_dtype): if dgl.backend.backend_name == 'tensorflow' and ( reducer in ['min', 'max'] or index_dtype == 'int32'): pytest.skip( ) # tensorflow dlpack has problem writing into int32 arrays on GPU. if index_dtype == 'int32': g = g.int() else: g = g.long() print(g) print(g.idtype) hu = F.tensor(np.random.rand(*((g.number_of_src_nodes(), ) + shp[0])) + 1) he = F.tensor(np.random.rand(*((g.number_of_edges(), ) + shp[1])) + 1) print('u shape: {}, e shape: {}'.format(F.shape(hu), F.shape(he))) g.srcdata['x'] = F.attach_grad(F.clone(hu)) g.edata['w'] = F.attach_grad(F.clone(he)) print('SpMM(message func: {}, reduce func: {})'.format(msg, reducer)) u = F.attach_grad(F.clone(hu)) e = F.attach_grad(F.clone(he)) with F.record_grad(): v = gspmm(g, msg, reducer, u, e) non_degree_indices = F.tensor( np.nonzero(F.asnumpy(g.in_degrees()) != 0)[0]) v = F.gather_row(v, non_degree_indices) if g.number_of_edges() > 0: F.backward(F.reduce_sum(v)) if msg != 'copy_rhs': grad_u = F.grad(u) if msg != 'copy_lhs': grad_e = F.grad(e) with F.record_grad(): g.update_all(udf_msg[msg], udf_reduce[reducer]) if g.number_of_edges() > 0: v1 = F.gather_row(g.dstdata['v'], non_degree_indices) assert F.allclose(v, v1) print('forward passed') F.backward(F.reduce_sum(v1)) if msg != 'copy_rhs': if reducer in ['min', 'max']: # there might be some numerical errors rate = F.reduce_sum(F.abs(F.grad(g.srcdata['x']) - grad_u)) /\ F.reduce_sum(F.abs(grad_u)) assert F.as_scalar(rate) < 1e-3, rate else: assert F.allclose(F.grad(g.srcdata['x']), grad_u) if msg != 'copy_lhs': if reducer in ['min', 'max']: rate = F.reduce_sum(F.abs(F.grad(g.edata['w']) - grad_e)) /\ F.reduce_sum(F.abs(grad_e)) assert F.as_scalar(rate) < 1e-3, rate else: assert F.allclose(F.grad(g.edata['w']), grad_e) print('backward passed') g.srcdata.pop('x') g.edata.pop('w') if 'v' in g.dstdata: g.dstdata.pop('v')
def _test(lhs, rhs, binary_op): g = create_test_heterograph(idtype) n1 = F.randn((g.num_nodes('user'), feat_size)) n2 = F.randn((g.num_nodes('developer'), feat_size)) n3 = F.randn((g.num_nodes('game'), feat_size)) x1 = F.randn((g.num_edges('plays'), feat_size)) x2 = F.randn((g.num_edges('follows'), feat_size)) x3 = F.randn((g.num_edges('develops'), feat_size)) x4 = F.randn((g.num_edges('wishes'), feat_size)) builtin_msg_name = "{}_{}_{}".format(lhs, binary_op, rhs) builtin_msg = getattr(fn, builtin_msg_name) ################################################################# # apply_edges() is called on each relation type separately ################################################################# F.attach_grad(n1) F.attach_grad(n2) F.attach_grad(n3) g.nodes['user'].data['h'] = n1 g.nodes['developer'].data['h'] = n2 g.nodes['game'].data['h'] = n3 F.attach_grad(x1) F.attach_grad(x2) F.attach_grad(x3) F.attach_grad(x4) g['plays'].edata['h'] = x1 g['follows'].edata['h'] = x2 g['develops'].edata['h'] = x3 g['wishes'].edata['h'] = x4 with F.record_grad(): [ g.apply_edges(builtin_msg('h', 'h', 'm'), etype=rel) for rel in g.canonical_etypes ] r1 = g['plays'].edata['m'] loss = F.sum(r1.view(-1), 0) F.backward(loss) n_grad1 = F.grad(g.nodes['game'].data['h']) ################################################################# # apply_edges() is called on all relation types ################################################################# F.attach_grad(n1) F.attach_grad(n2) F.attach_grad(n3) g.nodes['user'].data['h'] = n1 g.nodes['developer'].data['h'] = n2 g.nodes['game'].data['h'] = n3 F.attach_grad(x1) F.attach_grad(x2) F.attach_grad(x3) F.attach_grad(x4) g['plays'].edata['h'] = x1 g['follows'].edata['h'] = x2 g['develops'].edata['h'] = x3 g['wishes'].edata['h'] = x4 with F.record_grad(): g.apply_edges(builtin_msg('h', 'h', 'm')) r2 = g['plays'].edata['m'] loss = F.sum(r2.view(-1), 0) F.backward(loss) n_grad2 = F.grad(g.nodes['game'].data['h']) # correctness check def _print_error(a, b): for i, (x, y) in enumerate( zip(F.asnumpy(a).flatten(), F.asnumpy(b).flatten())): if not np.allclose(x, y): print('@{} {} v.s. {}'.format(i, x, y)) if not F.allclose(r1, r2): _print_error(r1, r2) assert F.allclose(r1, r2) if n_grad1 is not None or n_grad2 is not None: if not F.allclose(n_grad1, n_grad2): print('node grad') _print_error(n_grad1, n_grad2) assert (F.allclose(n_grad1, n_grad2))
def _test(lhs, rhs, binary_op, reducer): g = create_test_heterograph(idtype) x1 = F.randn((g.num_nodes('user'), feat_size)) x2 = F.randn((g.num_nodes('developer'), feat_size)) x3 = F.randn((g.num_nodes('game'), feat_size)) F.attach_grad(x1) F.attach_grad(x2) F.attach_grad(x3) g.nodes['user'].data['h'] = x1 g.nodes['developer'].data['h'] = x2 g.nodes['game'].data['h'] = x3 x1 = F.randn((4, feat_size)) x2 = F.randn((4, feat_size)) x3 = F.randn((3, feat_size)) x4 = F.randn((3, feat_size)) F.attach_grad(x1) F.attach_grad(x2) F.attach_grad(x3) F.attach_grad(x4) g['plays'].edata['h'] = x1 g['follows'].edata['h'] = x2 g['develops'].edata['h'] = x3 g['wishes'].edata['h'] = x4 builtin_msg_name = "{}_{}_{}".format(lhs, binary_op, rhs) builtin_msg = getattr(fn, builtin_msg_name) builtin_red = getattr(fn, reducer) ################################################################# # multi_update_all(): call msg_passing separately for each etype ################################################################# with F.record_grad(): g.multi_update_all( { etype: (builtin_msg('h', 'h', 'm'), builtin_red('m', 'y')) for etype in g.canonical_etypes }, 'sum') r1 = g.nodes['game'].data['y'] F.backward(r1, F.ones(r1.shape)) n_grad1 = F.grad(r1) ################################################################# # update_all(): call msg_passing for all etypes ################################################################# g.update_all(builtin_msg('h', 'h', 'm'), builtin_red('m', 'y')) r2 = g.nodes['game'].data['y'] F.backward(r2, F.ones(r2.shape)) n_grad2 = F.grad(r2) # correctness check def _print_error(a, b): for i, (x, y) in enumerate( zip(F.asnumpy(a).flatten(), F.asnumpy(b).flatten())): if not np.allclose(x, y): print('@{} {} v.s. {}'.format(i, x, y)) if not F.allclose(r1, r2): _print_error(r1, r2) assert F.allclose(r1, r2)
def _test(mfunc, rfunc): g = create_test_heterograph_large(idtype) g0 = create_test_heterograph_2(idtype) g1 = create_test_heterograph(idtype) cross_reducer = rfunc.__name__ x1 = F.randn((g.num_edges('plays'), feat_size)) x2 = F.randn((g.num_edges('follows'), feat_size)) x3 = F.randn((g.num_edges('develops'), feat_size)) x4 = F.randn((g.num_edges('wishes'), feat_size)) F.attach_grad(x1) F.attach_grad(x2) F.attach_grad(x3) F.attach_grad(x4) g['plays'].edata['eid'] = x1 g['follows'].edata['eid'] = x2 g['develops'].edata['eid'] = x3 g['wishes'].edata['eid'] = x4 ################################################################# # multi_update_all(): call msg_passing separately for each etype ################################################################# with F.record_grad(): g.multi_update_all( { 'plays': (mfunc('eid', 'm'), rfunc('m', 'y')), 'follows': (mfunc('eid', 'm'), rfunc('m', 'y')), 'develops': (mfunc('eid', 'm'), rfunc('m', 'y')), 'wishes': (mfunc('eid', 'm'), rfunc('m', 'y')) }, cross_reducer) r1 = g.nodes['game'].data['y'].clone() r2 = g.nodes['user'].data['y'].clone() loss = r1.sum() + r2.sum() F.backward(loss) e_grad1 = F.grad(g['develops'].edata['eid']).clone() e_grad2 = F.grad(g['plays'].edata['eid']).clone() e_grad3 = F.grad(g['wishes'].edata['eid']).clone() e_grad4 = F.grad(g['follows'].edata['eid']).clone() { etype: (g[etype].edata.clear()) for _, etype, _ in g.canonical_etypes }, ################################################################# # update_all(): call msg_passing for all etypes ################################################################# # TODO(Israt): output type can be None in multi_update and empty F.attach_grad(x1) F.attach_grad(x2) F.attach_grad(x3) F.attach_grad(x4) g['plays'].edata['eid'] = x1 g['follows'].edata['eid'] = x2 g['develops'].edata['eid'] = x3 g['wishes'].edata['eid'] = x4 with F.record_grad(): g.update_all(mfunc('eid', 'm'), rfunc('m', 'y')) r3 = g.nodes['game'].data['y'] r4 = g.nodes['user'].data['y'] loss = r3.sum() + r4.sum() F.backward(loss) e_grad5 = F.grad(g['develops'].edata['eid']) e_grad6 = F.grad(g['plays'].edata['eid']) e_grad7 = F.grad(g['wishes'].edata['eid']) e_grad8 = F.grad(g['follows'].edata['eid']) # # correctness check def _print_error(a, b): for i, (x, y) in enumerate( zip(F.asnumpy(a).flatten(), F.asnumpy(b).flatten())): if not np.allclose(x, y): print('@{} {} v.s. {}'.format(i, x, y)) assert F.allclose(r1, r3) assert F.allclose(r2, r4) assert (F.allclose(e_grad1, e_grad5)) assert (F.allclose(e_grad2, e_grad6)) assert (F.allclose(e_grad3, e_grad7)) assert (F.allclose(e_grad4, e_grad8))
def _test(g, lhs, rhs, binary_op, reducer, paritial, nid, broadcast='none'): hu, hv, he = generate_feature(g, broadcast) g.ndata['u'] = F.attach_grad(F.clone(hu)) g.ndata['v'] = F.attach_grad(F.clone(hv)) g.edata['e'] = F.attach_grad(F.clone(he)) builtin_msg_name = "{}_{}_{}".format(lhs, binary_op, rhs) builtin_msg = getattr(fn, builtin_msg_name) builtin_red = getattr(fn, reducer) def target_feature_switch(g, target): if target == "u": return g.ndata["u"] elif target == "v": return g.ndata["v"] else: return g.edata["e"] with F.record_grad(): if partial: g.pull(nid, builtin_msg(lhs, rhs, 'm'), builtin_red('m', 'r1')) else: g.update_all(builtin_msg(lhs, rhs, 'm'), builtin_red('m', 'r1')) r1 = g.ndata.pop('r1') F.backward(r1.sum()) lhs_grad_1 = F.grad(target_feature_switch(g, lhs)) rhs_grad_1 = F.grad(target_feature_switch(g, rhs)) # reset grad g.ndata['u'] = F.attach_grad(F.clone(hu)) g.ndata['v'] = F.attach_grad(F.clone(hv)) g.edata['e'] = F.attach_grad(F.clone(he)) def target_switch(edges, target): if target == "u": return edges.src elif target == "v": return edges.dst elif target == "e": return edges.data else: assert (0), "Unknown target {}".format(target) def mfunc(edges): op = getattr(F, binary_op) lhs_data = target_switch(edges, lhs) rhs_data = target_switch(edges, rhs) return {"m": op(lhs_data[lhs], rhs_data[rhs])} def rfunc(nodes): op = getattr(F, reducer) return {"r2": op(nodes.mailbox['m'], 1)} with F.record_grad(): if partial: g.pull(nid, mfunc, rfunc) else: g.update_all(mfunc, rfunc) r2 = g.ndata.pop('r2') F.backward(r2.sum(), F.tensor([1.])) lhs_grad_2 = F.grad(target_feature_switch(g, lhs)) rhs_grad_2 = F.grad(target_feature_switch(g, rhs)) if reducer == 'prod': rtol = 1e-2 atol = 1e-2 else: rtol = 1e-4 atol = 1e-4 def _print_error(a, b): print("ERROR: Test {}_{}_{}_{} {}".format(lhs, binary_op, rhs, reducer, broadcast)) print(a, b) for i, (x, y) in enumerate( zip( F.asnumpy(F.cpu(a)).flatten(), F.asnumpy(F.cpu(b)).flatten())): if not np.allclose(x, y, rtol, atol): print('@{} {} v.s. {}'.format(i, x, y)) if not F.allclose(r1, r2, rtol, atol): _print_error(r1, r2) assert F.allclose(r1, r2, rtol, atol) if not F.allclose(lhs_grad_1, lhs_grad_2, rtol, atol): print("left grad") _print_error(lhs_grad_1, lhs_grad_2) assert (F.allclose(lhs_grad_1, lhs_grad_2, rtol, atol)) if not F.allclose(rhs_grad_1, rhs_grad_2, rtol, atol): print("right grad") _print_error(rhs_grad_1, rhs_grad_2) assert (F.allclose(rhs_grad_1, rhs_grad_2, rtol, atol))
def _test(g, lhs, rhs, binary_op, reducer, partial, nid, broadcast='none'): # initialize node/edge features with uniform(-1, 1) hu, hv, he = generate_feature(g, broadcast, binary_op) if binary_op == 'div': # op = div # lhs range: [-1, 1] # rhs range: [1, 2] # result range: [-1, 1] if rhs == 'u': hu = (hu + 3) / 2 elif rhs == 'v': hv = (hv + 3) / 2 elif rhs == 'e': he = (he + 3) / 2 if binary_op == 'add' or binary_op == 'sub': # op = add, sub # lhs range: [-1/2, 1/2] # rhs range: [-1/2, 1/2] # result range: [-1, 1] hu = hu / 2 hv = hv / 2 he = he / 2 g.ndata['u'] = F.attach_grad(F.clone(hu)) g.ndata['v'] = F.attach_grad(F.clone(hv)) g.edata['e'] = F.attach_grad(F.clone(he)) builtin_msg_name = "{}_{}_{}".format(lhs, binary_op, rhs) builtin_msg = getattr(fn, builtin_msg_name) builtin_red = getattr(fn, reducer) def target_feature_switch(g, target): if target == "u": return g.ndata["u"] elif target == "v": return g.ndata["v"] else: return g.edata["e"] with F.record_grad(): if partial: g.pull(nid, builtin_msg(lhs, rhs, 'm'), builtin_red('m', 'r1')) else: g.update_all(builtin_msg(lhs, rhs, 'm'), builtin_red('m', 'r1')) r1 = g.ndata.pop('r1') F.backward(F.reduce_sum(r1)) lhs_grad_1 = F.grad(target_feature_switch(g, lhs)) rhs_grad_1 = F.grad(target_feature_switch(g, rhs)) # reset grad g.ndata['u'] = F.attach_grad(F.clone(hu)) g.ndata['v'] = F.attach_grad(F.clone(hv)) g.edata['e'] = F.attach_grad(F.clone(he)) def target_switch(edges, target): if target == "u": return edges.src elif target == "v": return edges.dst elif target == "e": return edges.data else: assert(0), "Unknown target {}".format(target) def mfunc(edges): op = getattr(F, binary_op) lhs_data = target_switch(edges, lhs)[lhs] rhs_data = target_switch(edges, rhs)[rhs] # NOTE(zihao): we need to do batched broadcast # e.g. (68, 3, 1) op (68, 5, 3, 4) while F.ndim(lhs_data) < F.ndim(rhs_data): lhs_data = F.unsqueeze(lhs_data, 1) while F.ndim(rhs_data) < F.ndim(lhs_data): rhs_data = F.unsqueeze(rhs_data, 1) return {"m": op(lhs_data, rhs_data)} def rfunc(nodes): op = getattr(F, reducer) return {"r2": op(nodes.mailbox['m'], 1)} with F.record_grad(): if partial: g.pull(nid, mfunc, rfunc) else: g.update_all(mfunc, rfunc) r2 = g.ndata.pop('r2') F.backward(F.reduce_sum(r2), F.tensor([1.])) lhs_grad_2 = F.grad(target_feature_switch(g, lhs)) rhs_grad_2 = F.grad(target_feature_switch(g, rhs)) rtol = 1e-4 atol = 1e-4 def _print_error(a, b): print("ERROR: Test {}_{}_{}_{} broadcast: {} partial: {}". format(lhs, binary_op, rhs, reducer, broadcast, partial)) return if lhs == 'u': lhs_data = hu elif lhs == 'v': lhs_data = hv elif lhs == 'e': lhs_data = he if rhs == 'u': rhs_data = hu elif rhs == 'v': rhs_data = hv elif rhs == 'e': rhs_data = he print("lhs", F.asnumpy(lhs_data).tolist()) print("rhs", F.asnumpy(rhs_data).tolist()) for i, (x, y) in enumerate(zip(F.asnumpy(a).flatten(), F.asnumpy(b).flatten())): if not np.allclose(x, y, rtol, atol): print('@{} {} v.s. {}'.format(i, x, y)) if not F.allclose(r1, r2, rtol, atol): _print_error(r1, r2) assert F.allclose(r1, r2, rtol, atol) if not F.allclose(lhs_grad_1, lhs_grad_2, rtol, atol): print("left grad") _print_error(lhs_grad_1, lhs_grad_2) assert(F.allclose(lhs_grad_1, lhs_grad_2, rtol, atol)) if not F.allclose(rhs_grad_1, rhs_grad_2, rtol, atol): print("right grad") _print_error(rhs_grad_1, rhs_grad_2) assert(F.allclose(rhs_grad_1, rhs_grad_2, rtol, atol))
def test_sddmm(g, shp, lhs_target, rhs_target, msg, idtype): if lhs_target == rhs_target: return g = g.astype(idtype).to(F.ctx()) if dgl.backend.backend_name == 'mxnet' and g.number_of_edges() == 0: pytest.skip() # mxnet do not support zero shape tensor print(g) print(g.idtype) len_lhs = select( lhs_target, g.number_of_src_nodes(), g.number_of_edges(), g.number_of_dst_nodes()) lhs_shp = (len_lhs,) + shp[0] len_rhs = select( rhs_target, g.number_of_src_nodes(), g.number_of_edges(), g.number_of_dst_nodes()) rhs_shp = (len_rhs,) + shp[1] feat_lhs = F.tensor(np.random.rand(*lhs_shp) + 1) feat_rhs = F.tensor(np.random.rand(*rhs_shp) + 1) print('lhs shape: {}, rhs shape: {}'.format(F.shape(feat_lhs), F.shape(feat_rhs))) lhs_frame = select( lhs_target, g.srcdata, g.edata, g.dstdata) rhs_frame = select( rhs_target, g.srcdata, g.edata, g.dstdata) lhs_frame['x'] = F.attach_grad(F.clone(feat_lhs)) rhs_frame['y'] = F.attach_grad(F.clone(feat_rhs)) msg_func = lhs_target + '_' + msg + '_' + rhs_target print('SDDMM(message func: {})'.format(msg_func)) lhs = F.attach_grad(F.clone(feat_lhs)) rhs = F.attach_grad(F.clone(feat_rhs)) with F.record_grad(): e = gsddmm(g, msg, lhs, rhs, lhs_target=lhs_target, rhs_target=rhs_target) F.backward(F.reduce_sum(e)) grad_lhs = F.grad(lhs) grad_rhs = F.grad(rhs) with F.record_grad(): g.apply_edges(udf_apply_edges[msg_func]) if g.number_of_edges() > 0: e1 = g.edata['m'] assert F.allclose(e, e1) print('forward passed') if F.backend_name != "jax": F.backward(F.reduce_sum(e1)) if msg != 'copy_rhs': assert F.allclose(F.grad(lhs_frame['x']), grad_lhs) if msg != 'copy_lhs': assert F.allclose(F.grad(rhs_frame['y']), grad_rhs) print('backward passed') lhs_frame.pop('x') rhs_frame.pop('y') if 'm' in g.edata: g.edata.pop('m')
def fit(self, x, y, batch_size=100, n_epochs=10, loss_func='categorical_crossentropy', optimizer=SGD(lr=0.01, rho=0.9), clip=None, callbacks=[], shuffle=True, verbose=1): x = to_list(x) y = to_list(y) # format x = [K.format_data(e) for e in x] y = [K.format_data(e) for e in y] # shuffle data if shuffle: x, y = supports.shuffle(x, y) # check data self._check_data(y, loss_func) # init gt_nodes self._gt_nodes_ = [K.placeholder(e.ndim) for e in y] # memory usage print "Train", self._show_memory_usage(self._layer_list_, batch_size) # default objective if type(loss_func) is str: assert len(self._out_nodes_)==len(self._gt_nodes_), "If you are using default objectives, " \ + "out_node of out_layers must match ground truth!" loss_node = sum([ obj.get(loss_func)(pred_node, gt_node) for pred_node, gt_node in zip( self._out_nodes_, self._gt_nodes_) ]) # user defined objective else: loss_node = loss_func(self._out_nodes_, self._any_nodes_, self._gt_nodes_) #loss_node = loss_func( self ) # gradient gparams = K.grad(loss_node + self._reg_value_, self._params_) # todo clip gradient if clip is not None: gparams = [K.clip(gparam, -clip, clip) for gparam in gparams] # gradient based opt param_updates = optimizer.get_updates(self._params_, gparams) # get all updates updates = param_updates + self._inner_updates_ # compile for callback if callbacks is not None: callbacks = to_list(callbacks) for callback in callbacks: callback.compile(self) # compile model input_nodes = self._in_nodes_ + self._gt_nodes_ output_nodes = [loss_node] f = K.function_no_given(input_nodes, self._tr_phase_node_, output_nodes, updates) # train N = len(x[0]) batch_num = int(np.ceil(float(N) / batch_size)) n_abs_epoch = n_epochs + self._epoch_ # callback print '\n0th epoch:' for callback in callbacks: if (self._epoch_ % callback.call_freq == 0): callback.call() while self._epoch_ < n_abs_epoch: self._epoch_ += 1 # train t1 = time.time() loss_list = [] for i2 in xrange(batch_num): batch_x = [ e[i2 * batch_size:min((i2 + 1) * batch_size, N)] for e in x ] batch_y = [ e[i2 * batch_size:min((i2 + 1) * batch_size, N)] for e in y ] in_list = batch_x + batch_y + [1.] loss = f(*in_list)[0] # training phase loss_list.append(loss) if verbose == 1: self._print_progress(self._epoch_, batch_num, i2) if verbose == 2: self._print_progress_loss(self._epoch_, batch_num, i2, loss) t2 = time.time() self._tr_time_ += (t2 - t1) if verbose != 0: print '\n', ' tr_time: ', "%.2f" % ( t2 - t1), 's' # print an empty line # callback for callback in callbacks: if (self._epoch_ % callback.call_freq == 0): callback.call()
def test_edge_softmax(g, norm_by, idtype): print("params", norm_by, idtype) g = create_test_heterograph(idtype) x1 = F.randn((g.num_edges('plays'), feat_size)) x2 = F.randn((g.num_edges('follows'), feat_size)) x3 = F.randn((g.num_edges('develops'), feat_size)) x4 = F.randn((g.num_edges('wishes'), feat_size)) F.attach_grad(F.clone(x1)) F.attach_grad(F.clone(x2)) F.attach_grad(F.clone(x3)) F.attach_grad(F.clone(x4)) g['plays'].edata['eid'] = x1 g['follows'].edata['eid'] = x2 g['develops'].edata['eid'] = x3 g['wishes'].edata['eid'] = x4 ################################################################# # edge_softmax() on homogeneous graph ################################################################# with F.record_grad(): hm_g = dgl.to_homogeneous(g) hm_x = F.cat((x3, x2, x1, x4), 0) hm_e = F.attach_grad(F.clone(hm_x)) score_hm = edge_softmax(hm_g, hm_e, norm_by=norm_by) hm_g.edata['score'] = score_hm ht_g = dgl.to_heterogeneous(hm_g, g.ntypes, g.etypes) r1 = ht_g.edata['score'][('user', 'plays', 'game')] r2 = ht_g.edata['score'][('user', 'follows', 'user')] r3 = ht_g.edata['score'][('developer', 'develops', 'game')] r4 = ht_g.edata['score'][('user', 'wishes', 'game')] F.backward(F.reduce_sum(r1) + F.reduce_sum(r2)) grad_edata_hm = F.grad(hm_e) ################################################################# # edge_softmax() on heterogeneous graph ################################################################# e1 = F.attach_grad(F.clone(x1)) e2 = F.attach_grad(F.clone(x2)) e3 = F.attach_grad(F.clone(x3)) e4 = F.attach_grad(F.clone(x4)) e = { ('user', 'follows', 'user'): e2, ('user', 'plays', 'game'): e1, ('user', 'wishes', 'game'): e4, ('developer', 'develops', 'game'): e3 } with F.record_grad(): score = edge_softmax(g, e, norm_by=norm_by) r5 = score[('user', 'plays', 'game')] r6 = score[('user', 'follows', 'user')] r7 = score[('developer', 'develops', 'game')] r8 = score[('user', 'wishes', 'game')] F.backward(F.reduce_sum(r5) + F.reduce_sum(r6)) grad_edata_ht = F.cat((F.grad(e3), F.grad(e2), F.grad(e1), F.grad(e4)), 0) # correctness check assert F.allclose(r1, r5) assert F.allclose(r2, r6) assert F.allclose(r3, r7) assert F.allclose(r4, r8) assert F.allclose(grad_edata_hm, grad_edata_ht)
def assemble(self): u = TrialFunction(self.V) v = TestFunction(self.V) A = inner(u, v)*dx + alpha*inner(grad(u), grad(v))*dx return assemble(A)
def assemble(self): u = TrialFunction(self.V) v = TestFunction(self.V) A = inner(u, v) * dx + alpha * inner(grad(u), grad(v)) * dx return assemble(A)