def _makekernel(self, **kwargs): self.kernel = MarginalizedGraphKernel( TensorProduct(element=KroneckerDelta(self.element_prior)), TensorProduct(length=SquareExponential(self.edge_length_scale)), q=self.stopping_probability, p=self.starting_probability, **kwargs)
def test_mlgk_starting_probability(caseitem): '''custom starting probability''' _, case = caseitem G = case['graphs'] knode = case['knode'] kedge = case['kedge'] for q in case['q']: mlgk = MarginalizedGraphKernel(knode, kedge, q=q, p=lambda node: 2.0) R = mlgk(G) R_nodal = mlgk(G, nodal=True) gnd_R00 = MLGK(G[0], knode, kedge, q, q) * 2.0**2 gnd_R11 = MLGK(G[1], knode, kedge, q, q) * 2.0**2 assert (R[0, 0] == pytest.approx(gnd_R00, 1e-5)) assert (R[1, 1] == pytest.approx(gnd_R11, 1e-5)) n = np.array([len(g.nodes) for g in G]) N = np.cumsum(n) start = N - n end = N for i1, j1, g1 in zip(start, end, G): for i2, j2, g2 in zip(start, end, G): gnd = R_nodal[i1:j1, :][:, i2:j2] sub = mlgk([g1], [g2], nodal=True) for r1, r2 in zip(sub, gnd): assert (r1 == pytest.approx(r2, 1e-5))
def test_mlgk_on_permuted_graph(): g = Graph.from_ase(molecule('C6H6')) for _ in range(10): h = g.permute(np.random.permutation(len(g.nodes))) kernel = MarginalizedGraphKernel( TensorProduct(element=KroneckerDelta(0.5)), TensorProduct(length=SquareExponential(0.1))) assert (kernel([g], [h]).item() == pytest.approx(kernel([g]).item()))
def test_mlgk_diag(caseitem): '''diagonal similarities''' _, case = caseitem G = case['graphs'] knode = case['knode'] kedge = case['kedge'] for q in case['q']: mlgk = MarginalizedGraphKernel(knode, kedge, q=q) R = mlgk(G) D = mlgk.diag(G) assert (len(D) == 2) assert (D[0] == pytest.approx(R[0, 0], 1e-7)) assert (D[1] == pytest.approx(R[1, 1], 1e-7)) '''nodal diags''' R_nodal = mlgk(G, nodal=True) d_nodal = np.diag(R_nodal)**-0.5 K_nodal = np.diag(d_nodal).dot(R_nodal).dot(np.diag(d_nodal)) '''check submatrices''' n = np.array([len(g.nodes) for g in G]) N = np.cumsum(n) start = N - n end = N assert (R_nodal.shape == (N[-1], N[-1])) assert (np.count_nonzero(R_nodal - R_nodal.T) == 0) for k, (i, j) in enumerate(zip(N - n, N)): gnd = MLGK(G[k], knode, kedge, q, q, nodal=True).ravel() sub = R_nodal[i:j, :][:, i:j].ravel() for r1, r2 in zip(sub, gnd): assert (r1 == pytest.approx(r2, 1e-5)) for i in range(N[-1]): assert (K_nodal[i, i] == pytest.approx(1, 1e-7)) '''check block-diags''' D_nodal = mlgk.diag(G, nodal=True) assert (len(D_nodal) == N[-1]) for k in range(2): i = start[k] j = end[k] sub = D_nodal[i:j] gnd = np.diag(R_nodal[i:j, :][:, i:j]) for r1, r2 in zip(sub, gnd): assert (r1 == pytest.approx(r2, 1e-7))
def test_mlgk_kernel_range_check(): MarginalizedGraphKernel( node_kernel=KroneckerDelta(1e-7), edge_kernel=TensorProduct(attribute=SquareExponential(1.0))) MarginalizedGraphKernel( node_kernel=TensorProduct(feature=KroneckerDelta(0.5)), edge_kernel=TensorProduct(attribute=SquareExponential(1.0))) with pytest.warns(DeprecationWarning): MarginalizedGraphKernel( node_kernel=KroneckerDelta(0), edge_kernel=TensorProduct(attribute=SquareExponential(1.0))) with pytest.warns(DeprecationWarning): MarginalizedGraphKernel( node_kernel=TensorProduct(feature=KroneckerDelta(0.5)) + 1, edge_kernel=SquareExponential(1.0)) with pytest.warns(DeprecationWarning): MarginalizedGraphKernel( node_kernel=TensorProduct(feature=KroneckerDelta(0.5)), edge_kernel=TensorProduct(attribute=SquareExponential(1.0)) + 1) with pytest.warns(DeprecationWarning): MarginalizedGraphKernel( node_kernel=KroneckerDelta(0.5) * 2, edge_kernel=TensorProduct(attribute=SquareExponential(1.0))) with pytest.warns(DeprecationWarning): MarginalizedGraphKernel( node_kernel=TensorProduct(feature=KroneckerDelta(0.5)), edge_kernel=TensorProduct(attribute=SquareExponential(1.0)) * 2)
def test_mlgk_gradient(caseitem, nodal): '''derivative w.r.t. hyperparameters''' _, case = caseitem G = case['graphs'] knode = case['knode'] kedge = case['kedge'] for q in case['q']: mlgk = MarginalizedGraphKernel(knode, kedge, q=q) np.set_printoptions(precision=4, linewidth=999, suppress=True) R, dR = mlgk(G, nodal=nodal, eval_gradient=True) assert (len(dR.shape) == 3) assert (R.shape[0] == dR.shape[0]) assert (R.shape[1] == dR.shape[1]) assert (dR.shape[2] >= 1) for i in range(len(mlgk.theta)): theta = mlgk.theta eps = 1e-3 t = np.copy(theta) t[i] += eps mlgk.theta = t Rr = mlgk(G, nodal=nodal) t = np.copy(theta) t[i] -= eps mlgk.theta = t Rl = mlgk(G, nodal=nodal) mlgk.theta = theta dR_dLogt = (Rr - Rl) / (2 * eps) dLogt_dt = 1 / np.exp(theta)[i] dR_dt = dR_dLogt * dLogt_dt assert np.allclose(dR[:, :, i], dR_dt, rtol=0.05, atol=0.05)
class Tang2019MolecularKernel: """A margianlized graph kernel for **3D molecular structures** as in: Tang, Y. H., & de Jong, W. A. (2019). Prediction of atomization energy using graph kernel and active learning. *The Journal of chemical physics*, 150(4), 044107. The kernel can be directly used together with Graph.from_ase() to operate on molecular structures. Parameters ---------- stopping_probability: float in (0, 1) The probability for the random walk to stop during each step. starting_probability: float The probability for the random walk to start from any node. See the `p` kwarg of :class:`graphdot.kernel.marginalized.MarginalizedGraphKernel` element_prior: float in (0, 1) The baseline similarity between distinct elements --- an element always have a similarity 1 to itself. edge_length_scale: float in (0, inf) length scale of the Gaussian kernel on edge length. A rule of thumb is that the similarity decays smoothly from 1 to nearly 0 around three times of the length scale. """ def __init__(self, stopping_probability=0.01, starting_probability='uniform', element_prior=0.2, edge_length_scale=0.05, **kwargs): self.stopping_probability = stopping_probability self.starting_probability = starting_probability self.element_prior = element_prior self.edge_length_scale = edge_length_scale self._makekernel(**kwargs) def _makekernel(self, **kwargs): self.kernel = MarginalizedGraphKernel( TensorProduct(element=KroneckerDelta(self.element_prior, 1.0)), TensorProduct(length=SquareExponential(self.edge_length_scale)), q=self.stopping_probability, p=self.starting_probability, **kwargs) def __call__(self, X, Y=None, **kwargs): """Same call signature as :py:meth:`graphdot.kernel.marginalized.MarginalizedGraphKernel.__call__` """ return self.kernel(X, Y, **kwargs) def diag(self, X, **kwargs): """Same call signature as :py:meth:`graphdot.kernel.marginalized.MarginalizedGraphKernel.diag` """ return self.kernel.diag(X, **kwargs)
def test_mlgk_fixed_hyperparameters(): g = nx.Graph() g.add_node(0, feature=0) g.add_node(1, feature=1) g.add_node(2, feature=0) g.add_edge(0, 1, attribute=1.0) g.add_edge(0, 2, attribute=2.0) G = [Graph.from_networkx(g)] knodeV = TensorProduct(feature=KroneckerDelta(0.5)) knodeF = TensorProduct(feature=KroneckerDelta(0.5, h_bounds='fixed')) kedgeV = TensorProduct(attribute=SquareExponential(1.0)) kedgeF = TensorProduct( attribute=SquareExponential(1.0, length_scale_bounds='fixed')) kernelVV = MarginalizedGraphKernel(knodeV, kedgeV) kernelVF = MarginalizedGraphKernel(knodeV, kedgeF) kernelFV = MarginalizedGraphKernel(knodeF, kedgeV) kernelFF = MarginalizedGraphKernel(knodeF, kedgeF) assert (len(kernelVV.theta) == len(kernelVF.theta) + 1) assert (len(kernelVV.theta) == len(kernelFV.theta) + 1) assert (len(kernelVV.theta) == len(kernelFF.theta) + 2) assert (len(kernelVV.bounds) == len(kernelVF.bounds) + 1) assert (len(kernelVV.bounds) == len(kernelFV.bounds) + 1) assert (len(kernelVV.bounds) == len(kernelFF.bounds) + 2) Rvv, dRvv = kernelVV(G, eval_gradient=True) Rvf, dRvf = kernelVF(G, eval_gradient=True) Rfv, dRfv = kernelFV(G, eval_gradient=True) Rff, dRff = kernelFF(G, eval_gradient=True) assert (Rvv == pytest.approx(Rvf)) assert (Rvv == pytest.approx(Rfv)) assert (Rvv == pytest.approx(Rff)) assert (dRvv.shape[2] == dRvf.shape[2] + 1) assert (dRvv.shape[2] == dRfv.shape[2] + 1) assert (dRvv.shape[2] == dRff.shape[2] + 2) assert (dRvv[:, :, kernelVF.active_theta_mask] == pytest.approx(dRvf)) assert (dRvv[:, :, kernelFV.active_theta_mask] == pytest.approx(dRfv)) assert (dRvv[:, :, kernelFF.active_theta_mask] == pytest.approx(dRff))
def test_marginalized_graph_kernel_2nd_launch(benchmark, batch): graphs = [Graph.from_networkx(g, weight='weight') for g in make_graphs(batch, 48)] knode = TensorProduct(label=KroneckerDelta(0.5)) kedge = TensorProduct(label=KroneckerDelta(0.5)) kernel = MarginalizedGraphKernel(knode, kedge) def fun(): kernel(graphs, nodal=False) benchmark.pedantic(fun, iterations=3, rounds=3, warmup_rounds=0)
def test_mlgk_dtype(): g = nx.Graph() n = 8 for i, row in enumerate(np.random.randint(0, 2, (n, n))): g.add_node(i, type=0) for j, pred in enumerate(row[:i]): if pred: g.add_edge(i, j, weight=1) dfg = Graph.from_networkx(g, weight='weight') q = 0.5 node_kernel = TensorProduct(type=KroneckerDelta(1.0)) edge_kernel = Constant(1.0) for dtype in [np.float, np.float32, np.float64]: mlgk = MarginalizedGraphKernel(node_kernel, edge_kernel, q=q, dtype=dtype) assert (mlgk([dfg]).dtype == dtype) assert (mlgk.diag([dfg]).dtype == dtype)
def test_mlgk_diag_gradient(caseitem, nodal): '''derivative w.r.t. hyperparameters''' _, case = caseitem G = case['graphs'] knode = case['knode'] kedge = case['kedge'] for q in case['q']: mlgk = MarginalizedGraphKernel(knode, kedge, q=q) R, dR = mlgk.diag(G, nodal=nodal, eval_gradient=True) assert (len(dR.shape) == 2) assert (R.shape[0] == dR.shape[0]) assert (dR.shape[1] >= 1) for i in range(len(mlgk.theta)): theta = mlgk.theta eps = 1e-3 t = np.copy(theta) t[i] += eps mlgk.theta = t Rr = mlgk.diag(G, nodal=nodal, eval_gradient=False) t = np.copy(theta) t[i] -= eps mlgk.theta = t Rl = mlgk.diag(G, nodal=nodal, eval_gradient=False) mlgk.theta = theta dR_dLogt = (Rr - Rl) / (2 * eps) dLogt_dt = 1 / np.exp(theta)[i] dR_dt = dR_dLogt * dLogt_dt for a, b in zip(dR[:, i].ravel(), dR_dt.ravel()): assert (a == pytest.approx(b, rel=0.05, abs=0.05))
def test_mlgk_lmin(caseitem): '''exclude first step''' _, case = caseitem G = case['graphs'] knode = case['knode'] kedge = case['kedge'] for q in case['q']: mlgk = MarginalizedGraphKernel(knode, kedge, q=q) g = G[0] R0 = mlgk([g], nodal=True, lmin=0) R1 = mlgk([g], nodal=True, lmin=1) for i, n1 in g.nodes.iterrows(): for j, n2 in g.nodes.iterrows(): assert (R0[i, j] == pytest.approx(R1[i, j] + knode(n1, n2), abs=1e-7))
def test_mlgk_self_loops(): kedge = Constant(1.0) knode = Constant(1.0) q = 0.1 mlgk = MarginalizedGraphKernel(knode, kedge, q=q) np.random.seed(2) for i in range(10): n = np.random.randint(4, 20) A = np.random.randn(n, n) A = A + A.T G = [Graph.from_networkx(nx.from_numpy_array(A), weight='weight')] K = mlgk(G).item() K0 = MLGK(G[0], knode, kedge, q, q, nodal=False) assert (K == pytest.approx(K0, 5e-4))
def test_mlgk_large(): g = nx.Graph() n = 24 for i, row in enumerate(np.random.randint(0, 2, (n, n))): g.add_node(i, type=0) for j, pred in enumerate(row[:i]): if pred: g.add_edge(i, j, weight=1) dfg = Graph.from_networkx(g, weight='weight') q = 0.5 node_kernel = TensorProduct(type=KroneckerDelta(1.0)) edge_kernel = Constant(1.0) mlgk = MarginalizedGraphKernel(node_kernel, edge_kernel, q=q) dot = mlgk([dfg]) gold = MLGK(dfg, node_kernel, edge_kernel, q, q) assert (dot.shape == (1, 1)) assert (dot.item() == pytest.approx(gold))
def test_mlgk_self_similarity(caseitem): '''overall similarities within X''' _, case = caseitem G = case['graphs'] knode = case['knode'] kedge = case['kedge'] for q in case['q']: mlgk = MarginalizedGraphKernel(knode, kedge, q=q) R = mlgk(G) d = np.diag(R)**-0.5 K = np.diag(d).dot(R).dot(np.diag(d)) assert (R.shape == (len(G), len(G))) assert (np.count_nonzero(R - R.T) == 0) assert (R[0, 0] == pytest.approx(MLGK(G[0], knode, kedge, q, q), 1e-5)) assert (R[1, 1] == pytest.approx(MLGK(G[1], knode, kedge, q, q), 1e-5)) assert (K[0, 0] == pytest.approx(1, 1e-7)) assert (K[1, 1] == pytest.approx(1, 1e-7))
def test_mlgk_typecheck(): node_kernel = Constant(1.0) edge_kernel = Constant(1.0) mlgk = MarginalizedGraphKernel(node_kernel, edge_kernel, q=0.5) G = [ Graph.from_networkx(unlabeled_graph1), Graph.from_networkx(labeled_graph1), Graph.from_networkx(weighted_graph1, weight='w') ] with pytest.raises(TypeError): mlgk([G[0], G[1]]) with pytest.raises(TypeError): mlgk([G[0], G[2]]) with pytest.raises(TypeError): mlgk([G[1], G[2]]) with pytest.raises(TypeError): mlgk([G[1], G[0]]) with pytest.raises(TypeError): mlgk([G[2], G[0]]) with pytest.raises(TypeError): mlgk([G[2], G[1]])
def test_mlgk_cross_similarity(caseitem): '''similarities across X and Y''' _, case = caseitem G = case['graphs'] knode = case['knode'] kedge = case['kedge'] for q in case['q']: mlgk = MarginalizedGraphKernel(knode, kedge, q=q) R = mlgk(G) for x, y in zip(mlgk(G[:1], G).ravel(), R[:1, :].ravel()): assert (x == pytest.approx(y, 1e-6)) for x, y in zip(mlgk(G[1:], G).ravel(), R[1:, :].ravel()): assert (x == pytest.approx(y, 1e-6)) for x, y in zip(mlgk(G, G[:1]).ravel(), R[:, :1].ravel()): assert (x == pytest.approx(y, 1e-6)) for x, y in zip(mlgk( G, G[1:], ).ravel(), R[:, 1:].ravel()): assert (x == pytest.approx(y, 1e-6))
graphs = list(map(lambda smi: Graph.from_rdkit(MolFromSmiles(smi)), smiles)) train_X = graphs[::2] train_y = energy[::2] test_X = graphs[1::2] test_y = energy[1::2] core = train_X[::2] kernel = MarginalizedGraphKernel( node_kernel=Additive( aromatic=kC(0.5, (0.1, 1.0)) * kDelta(0.5, (0.1, 0.9)), atomic_number=kC(0.5, (0.1, 1.0)) * kDelta(0.8, (0.1, 0.9)), charge=kC(0.5, (0.1, 1.0)) * kSE(1.0), chiral=kC(0.5, (0.1, 1.0)) * kDelta(0.5, (0.1, 0.9)), hcount=kC(0.5, (0.1, 1.0)) * kSE(1.0), hybridization=kC(0.5, (0.1, 1.0)) * kDelta(0.5, (0.1, 0.9)), ring_list=kC(0.5, (0.01, 1.0)) * kConv(kDelta(0.5, (0.1, 0.9)))).normalized, edge_kernel=Additive( aromatic=kC(0.5, (0.1, 1.0)) * kDelta(0.5, (0.1, 0.9)), conjugated=kC(0.5, (0.1, 1.0)) * kDelta(0.5, (0.1, 0.9)), order=kC(0.5, (0.1, 1.0)) * kDelta(0.8, (0.1, 0.9)), ring_stereo=kC(0.5, (0.1, 1.0)) * kDelta(0.8, (0.1, 0.9)), stereo=kC(0.5, (0.1, 1.0)) * kDelta(0.8, (0.1, 0.9))).normalized, p=Uniform(1.0, (0.1, 40.0)), q=0.05) gpr = LowRankApproximateGPR(kernel=kernel, alpha=1.0, optimizer=True) gpr.fit(core, train_X, train_y, verbose=True) predict_y = gpr.predict(test_X) print('Prediction:', predict_y) print('Ground truth:', test_y)
def fun(): return MarginalizedGraphKernel(knode, kedge)
def fun(): kernel = MarginalizedGraphKernel(knode, kedge) kernel(graphs, nodal=False)
molecules = [ molecule('CH4'), molecule('NH3'), molecule('CH3OH'), molecule('H2O'), ] graphs = [Graph.from_ase(m) for m in molecules] metric = MaxiMin(node_kernel=TensorProduct(element=KroneckerDelta(0.5)), edge_kernel=TensorProduct(length=SquareExponential(0.1)), q=0.01) kernel = Normalization( MarginalizedGraphKernel( node_kernel=TensorProduct(element=KroneckerDelta(0.5)), edge_kernel=TensorProduct(length=SquareExponential(0.1)), q=0.01)) def check_hausdorff(X, Y=None): # GPU direct computation D = metric(X, Y) # Manual approach K = kernel(X, Y, nodal=True) d = np.sqrt(np.maximum(0, 2 - 2 * K)) starts1 = np.cumsum([0] + [len(g.nodes) for g in X])[:-1] starts2 = np.cumsum([0] + [len(g.nodes) for g in Y])[:-1] if Y else starts1 d1 = np.maximum.reduceat(np.minimum.reduceat(d, starts2, axis=1), starts1, axis=0) d2 = np.maximum.reduceat(np.minimum.reduceat(d, starts1, axis=0),
g2.add_node(2) g2.add_edge(0, 1) g2.add_edge(1, 2) # 0 --- 1 # \ / # 2 g3 = nx.Graph() g3.add_node(0) g3.add_node(1) g3.add_node(2) g3.add_edge(0, 1) g3.add_edge(0, 2) g3.add_edge(1, 2) # define trivial node and edge kernelets knode = Constant(1.0) kedge = Constant(1.0) # compose the marginalized graph kernel and compute pairwise similarity mlgk = MarginalizedGraphKernel(knode, kedge, q=0.05) R = mlgk([Graph.from_networkx(g) for g in [g1, g2, g3]]) # normalize the similarity matrix d = np.diag(R)**-0.5 K = np.diag(d).dot(R).dot(np.diag(d)) # all entries should be approximately 1 plus round-off error print(K)
from graphdot import Graph from graphdot.kernel.marginalized import MarginalizedGraphKernel from graphdot.kernel.fix import Normalization from graphdot.microkernel import (TensorProduct, DotProduct, Constant) # The 'category' attribute on the nodes could have variable lengths. # So does the 'spectra' attributes on the edges. g1 = nx.Graph() g1.add_node(0, soap=[0.5, 1.5, 2.5, 0.5]) g1.add_node(1, soap=[0.5, 1.5, 2.5, 0.5]) g1.add_edge(0, 1, w=1.0) g2 = nx.Graph() g2.add_node(0, soap=[0.5, 1.5, 2.5, 3.5]) g2.add_node(1, soap=[1.5, 1.5, 0.5, 3.5]) g2.add_node(2, soap=[0.5, 2.5, 2.5, 0.5]) g2.add_edge(0, 1, w=2.0) g2.add_edge(0, 2, w=0.5) g2.add_edge(1, 2, w=0.5) # compose the marginalized graph kernel and compute pairwise similarity mlgk = Normalization( MarginalizedGraphKernel( node_kernel=TensorProduct(soap=DotProduct().normalized), edge_kernel=Constant(1), q=0.05)) G = [Graph.from_networkx(g, weight='w') for g in [g1, g2]] print(f'Whole-graph similarity\n{mlgk(G)}') print(f'Nodal similarity\n{mlgk(G, nodal=True)}')