def parse_input(self, X): """Parse and create features for the NSPD kernel. Parameters ---------- X : iterable For the input to pass the test, we must have: Each element must be an iterable with at most three features and at least one. The first that is obligatory is a valid graph structure (adjacency matrix or edge_dictionary) while the second is node_labels and the third edge_labels (that correspond to the given graph format). A valid input also consists of graph type objects. Returns ------- M : dict A dictionary with keys all the distances from 0 to self.d and values the the np.arrays with rows corresponding to the non-null input graphs and columns to the enumerations of tuples consisting of pairs of hash values and radius, from all the given graphs of the input (plus the fitted one's on transform). """ if not isinstance(X, collections.Iterable): raise TypeError('input must be an iterable\n') else: # Hold the number of graphs ng = 0 # Holds all the data for combinations of r, d data = collections.defaultdict(dict) # Index all keys for combinations of r, d all_keys = collections.defaultdict(dict) for (idx, x) in enumerate(iter(X)): is_iter = False if isinstance(x, collections.Iterable): is_iter, x = True, list(x) if is_iter and len(x) in [0, 3]: if len(x) == 0: warnings.warn('Ignoring empty element' + ' on index: ' + str(idx)) continue else: g = Graph(x[0], x[1], x[2]) g.change_format("adjacency") elif type(x) is Graph: g = Graph( x.get_adjacency_matrix(), x.get_labels(purpose="adjacency", label_type="vertex"), x.get_labels(purpose="adjacency", label_type="edge")) else: raise TypeError('each element of X must have either ' + 'a graph with labels for node and edge ' + 'or 3 elements consisting of a graph ' + 'type object, labels for vertices and ' + 'labels for edges.') # Bring to the desired format g.change_format(self._graph_format) # Take the vertices vertices = set(g.get_vertices(purpose=self._graph_format)) # Extract the dicitionary ed = g.get_edge_dictionary() # Convert edges to tuples edges = {(j, k) for j in ed.keys() for k in ed[j].keys()} # Extract labels for nodes Lv = g.get_labels(purpose=self._graph_format) # and for edges Le = g.get_labels(purpose=self._graph_format, label_type="edge") # Produce all the neighborhoods and the distance pairs # up to the desired radius and maximum distance N, D, D_pair = g.produce_neighborhoods(self.r, purpose="dictionary", with_distances=True, d=self.d) # Hash all the neighborhoods H = self._hash_neighborhoods(vertices, edges, Lv, Le, N, D_pair) if self._method_calling == 1: for d in filterfalse(lambda x: x not in D, range(self.d + 1)): for (A, B) in D[d]: for r in range(self.r + 1): key = (H[r, A], H[r, B]) keys = all_keys[r, d] idx = keys.get(key, None) if idx is None: idx = len(keys) keys[key] = idx data[r, d][ng, idx] = data[r, d].get( (ng, idx), 0) + 1 elif self._method_calling == 3: for d in filterfalse(lambda x: x not in D, range(self.d + 1)): for (A, B) in D[d]: # Based on the edges of the bidirected graph for r in range(self.r + 1): keys = all_keys[r, d] fit_keys = self._fit_keys[r, d] key = (H[r, A], H[r, B]) idx = fit_keys.get(key, None) if idx is None: idx = keys.get(key, None) if idx is None: idx = len(keys) + len(fit_keys) keys[key] = idx data[r, d][ng, idx] = data[r, d].get( (ng, idx), 0) + 1 ng += 1 if ng == 0: raise ValueError('parsed input is empty') if self._method_calling == 1: # A feature matrix for all levels M = dict() for (key, d) in filterfalse(lambda a: len(a[1]) == 0, iteritems(data)): indexes, data = zip(*iteritems(d)) rows, cols = zip(*indexes) M[key] = csr_matrix((data, (rows, cols)), shape=(ng, len(all_keys[key])), dtype=np.int64) self._fit_keys = all_keys self._ngx = ng elif self._method_calling == 3: # A feature matrix for all levels M = dict() for (key, d) in filterfalse(lambda a: len(a[1]) == 0, iteritems(data)): indexes, data = zip(*iteritems(d)) rows, cols = zip(*indexes) M[key] = csr_matrix( (data, (rows, cols)), shape=(ng, len(all_keys[key]) + len(self._fit_keys[key])), dtype=np.int64) self._ngy = ng return M
def parse_input(self, X): """Parse and create features for multiscale_laplacian kernel. Parameters ---------- X : iterable For the input to pass the test, we must have: Each element must be an iterable with at most three features and at least one. The first that is obligatory is a valid graph structure (adjacency matrix or edge_dictionary) while the second is node_labels and the third edge_labels (that correspond to the given graph format). A valid input also consists of graph type objects. Returns ------- out : list Tuples consisting of the Adjacency matrix, phi, phi_outer dictionary of neihborhood indexes and inverse laplacians up to level self.L and the inverse Laplacian of A. """ if not isinstance(X, collections.Iterable): raise TypeError('input must be an iterable\n') else: ng = 0 out = list() start = time.time() for (idx, x) in enumerate(iter(X)): is_iter = False if isinstance(x, collections.Iterable): is_iter, x = True, list(x) if is_iter and len(x) in [0, 2, 3]: if len(x) == 0: warnings.warn('Ignoring empty element ' + 'on index: ' + str(idx)) continue else: x = Graph(x[0], x[1], {}, self._graph_format) elif type(x) is not Graph: x.desired_format(self._graph_format) else: raise TypeError('each element of X must be either a ' + 'graph or an iterable with at least 1 ' + 'and at most 3 elements\n') ng += 1 phi_d = x.get_labels() A = x.get_adjacency_matrix() N = x.produce_neighborhoods(r=self.L, sort_neighbors=False) try: phi = np.array([list(phi_d[i]) for i in range(A.shape[0])]) except TypeError: raise TypeError('Features must be iterable and castable ' + 'in total to a numpy array.') phi_outer = np.dot(phi, phi.T) Lap = laplacian(A).astype(float) _increment_diagonal_(Lap, self.heta) L = inv(Lap) Q = dict() for level in range(1, self.L + 1): Q[level] = dict() for (key, item) in iteritems(N[level]): Q[level][key] = dict() Q[level][key]["n"] = np.array(item) if len(item) < A.shape[0]: laplac = laplacian(A[item, :][:, item]).astype(float) _increment_diagonal_(laplac, self.heta) laplac = inv(laplac) else: laplac = L Q[level][key]["l"] = laplac out.append((A, phi, phi_outer, Q, L)) if self.verbose: print("Preprocessing took:", time.time() - start, "s.") if ng == 0: raise ValueError('parsed input is empty') return out