def test_mlgk_diag(caseitem): '''diagonal similarities''' _, case = caseitem G = case['graphs'] knode = case['knode'] kedge = case['kedge'] for q in case['q']: mlgk = MarginalizedGraphKernel(knode, kedge, q=q) R = mlgk(G) D = mlgk.diag(G) assert (len(D) == 2) assert (D[0] == pytest.approx(R[0, 0], 1e-7)) assert (D[1] == pytest.approx(R[1, 1], 1e-7)) '''nodal diags''' R_nodal = mlgk(G, nodal=True) d_nodal = np.diag(R_nodal)**-0.5 K_nodal = np.diag(d_nodal).dot(R_nodal).dot(np.diag(d_nodal)) '''check submatrices''' n = np.array([len(g.nodes) for g in G]) N = np.cumsum(n) start = N - n end = N assert (R_nodal.shape == (N[-1], N[-1])) assert (np.count_nonzero(R_nodal - R_nodal.T) == 0) for k, (i, j) in enumerate(zip(N - n, N)): gnd = MLGK(G[k], knode, kedge, q, q, nodal=True).ravel() sub = R_nodal[i:j, :][:, i:j].ravel() for r1, r2 in zip(sub, gnd): assert (r1 == pytest.approx(r2, 1e-5)) for i in range(N[-1]): assert (K_nodal[i, i] == pytest.approx(1, 1e-7)) '''check block-diags''' D_nodal = mlgk.diag(G, nodal=True) assert (len(D_nodal) == N[-1]) for k in range(2): i = start[k] j = end[k] sub = D_nodal[i:j] gnd = np.diag(R_nodal[i:j, :][:, i:j]) for r1, r2 in zip(sub, gnd): assert (r1 == pytest.approx(r2, 1e-7))
def test_mlgk_diag_gradient(caseitem, nodal): '''derivative w.r.t. hyperparameters''' _, case = caseitem G = case['graphs'] knode = case['knode'] kedge = case['kedge'] for q in case['q']: mlgk = MarginalizedGraphKernel(knode, kedge, q=q) R, dR = mlgk.diag(G, nodal=nodal, eval_gradient=True) assert (len(dR.shape) == 2) assert (R.shape[0] == dR.shape[0]) assert (dR.shape[1] >= 1) for i in range(len(mlgk.theta)): theta = mlgk.theta eps = 1e-3 t = np.copy(theta) t[i] += eps mlgk.theta = t Rr = mlgk.diag(G, nodal=nodal, eval_gradient=False) t = np.copy(theta) t[i] -= eps mlgk.theta = t Rl = mlgk.diag(G, nodal=nodal, eval_gradient=False) mlgk.theta = theta dR_dLogt = (Rr - Rl) / (2 * eps) dLogt_dt = 1 / np.exp(theta)[i] dR_dt = dR_dLogt * dLogt_dt for a, b in zip(dR[:, i].ravel(), dR_dt.ravel()): assert (a == pytest.approx(b, rel=0.05, abs=0.05))
class Tang2019MolecularKernel: """A margianlized graph kernel for **3D molecular structures** as in: Tang, Y. H., & de Jong, W. A. (2019). Prediction of atomization energy using graph kernel and active learning. *The Journal of chemical physics*, 150(4), 044107. The kernel can be directly used together with Graph.from_ase() to operate on molecular structures. Parameters ---------- stopping_probability: float in (0, 1) The probability for the random walk to stop during each step. starting_probability: float The probability for the random walk to start from any node. See the `p` kwarg of :class:`graphdot.kernel.marginalized.MarginalizedGraphKernel` element_prior: float in (0, 1) The baseline similarity between distinct elements --- an element always have a similarity 1 to itself. edge_length_scale: float in (0, inf) length scale of the Gaussian kernel on edge length. A rule of thumb is that the similarity decays smoothly from 1 to nearly 0 around three times of the length scale. """ def __init__(self, stopping_probability=0.01, starting_probability='uniform', element_prior=0.2, edge_length_scale=0.05, **kwargs): self.stopping_probability = stopping_probability self.starting_probability = starting_probability self.element_prior = element_prior self.edge_length_scale = edge_length_scale self._makekernel(**kwargs) def _makekernel(self, **kwargs): self.kernel = MarginalizedGraphKernel( TensorProduct(element=KroneckerDelta(self.element_prior, 1.0)), TensorProduct(length=SquareExponential(self.edge_length_scale)), q=self.stopping_probability, p=self.starting_probability, **kwargs) def __call__(self, X, Y=None, **kwargs): """Same call signature as :py:meth:`graphdot.kernel.marginalized.MarginalizedGraphKernel.__call__` """ return self.kernel(X, Y, **kwargs) def diag(self, X, **kwargs): """Same call signature as :py:meth:`graphdot.kernel.marginalized.MarginalizedGraphKernel.diag` """ return self.kernel.diag(X, **kwargs)
def test_mlgk_dtype(): g = nx.Graph() n = 8 for i, row in enumerate(np.random.randint(0, 2, (n, n))): g.add_node(i, type=0) for j, pred in enumerate(row[:i]): if pred: g.add_edge(i, j, weight=1) dfg = Graph.from_networkx(g, weight='weight') q = 0.5 node_kernel = TensorProduct(type=KroneckerDelta(1.0)) edge_kernel = Constant(1.0) for dtype in [np.float, np.float32, np.float64]: mlgk = MarginalizedGraphKernel(node_kernel, edge_kernel, q=q, dtype=dtype) assert (mlgk([dfg]).dtype == dtype) assert (mlgk.diag([dfg]).dtype == dtype)