def mutagenicity_graph(filepath=None): """ Generate list of mutagenicity graphs. Args: filepath (str): Path to database. Default is None. Returns: ist: [labels, nodes, edge_indices, edges, atoms] - labels (list): Mutagenity label (0,1). - nodes (list): Atoms as Atomnumber array. - edge_indices (list): Bond indices (i,j). - edges (list): Bond type. - atoms (list): Atom list as string. """ if filepath is None: filepath = os.path.join(setup_user_database_directory(), "data", "mutagen") print("Database path:", filepath) if not os.path.exists(os.path.join(filepath, "Mutagenicity")): mutagenicity_download_dataset(filepath) mutagenicity_extract_dataset(filepath) data = mutagenicity_load(filepath) return data
def cora_graph(filepath=None): """ Load and convert cora citation dataset. Args: filepath (str): Path to dataset. Default is None. Returns: list: [adj_matrix,X,labels] - adj_matrix (sp.csr_matrix): Adjacency matrix. - X (sp.csr_matrix): Node features. - labels (np.array): Labels. """ user_default_base = setup_user_database_directory() if filepath is None: filepath = os.path.join(str(user_default_base), "data", "cora") print("Database path:", filepath) if not os.path.exists(os.path.join(filepath, "cora.npz")): cora_download_dataset(filepath) loader = np.load(os.path.join(filepath, "cora.npz"), allow_pickle=True) loader = dict(loader) data = cora_make_graph(loader) return data
def mutag_graph(filepath=None): """ Get MUTAG dataset. Args: filepath (str): Path to dataset. Default is None. Returns: list: [labels, nodes, edge_indices, edges] - labels (np.array): Labels. - nodes (list): Node features. - edge_indices (list): List of edge edge_indices of connections per molecule. - edges (list): List of edge features """ if filepath is None: filepath = os.path.join(setup_user_database_directory(), "data", "mutagen") print("Database path:", filepath) if not os.path.exists(os.path.join(filepath, "MUTAG")): mutag_download_dataset(filepath) mutag_extract_dataset(filepath) print("Making graph ...", end='', flush=True) data = mutag_load(filepath) print('done') return data
def cora_graph(filepath=None): """ Load and convert cora citation dataset. Args: filepath (str): Path to dataset. Default is None. Returns: list: [nodes, edge_indices, labels, mapping] - nodes(np.array): Node features of shape (2708,1434) - edge_indices (np.array): Edge edge_indices of shape (5429,2) - labels (np.array): Class labels of citations of shape (2708,) - mapping (dict): Class label mapping. """ if filepath is None: user_database = setup_user_database_directory() filepath = os.path.join(str(user_database), "data", "cora") print("Database path:", filepath) if not os.path.exists(os.path.join(filepath, "cora.tgz")): cora_download_dataset(filepath) if not os.path.exists(os.path.join(filepath, "cora_lu")): cora_extract_dataset(filepath) data = cora_make_graph(os.path.join(filepath, "cora_lu", "cora")) return data
def cora_graph(filepath=None): """ Load and convert cora citation dataset. Args: filepath (str): Path to dataset. Default is None. Returns: list: [A,X,labels] - A (sp.csr_matrix): Adjacency matrix. - X (sp.csr_matrix): Node features. - labels (np.array): Labels. """ if filepath is None: filepath = os.path.join(setup_user_database_directory(), "data", "cora") print("Database path:", filepath) if not os.path.exists(os.path.join(filepath, "cora.tgz")): cora_download_dataset(filepath) if not os.path.exists(os.path.join(filepath, "cora_lu")): cora_extract_dataset(filepath) data = cora_make_graph(os.path.join(filepath, "cora_lu", "cora")) return data
def qm9_graph(filepath=None, max_distance=4, max_neighbours=15, gauss_distance=None, max_mols=133885): """ Get list of graphs np.arrays for qm9 dataset. Args: filepath (str): Filepath to database. max_distance (int): 4 max_neighbours (int): 15 gauss_distance (dict): {'GBins' : 20, 'GRange' : 4, 'GSigma' : 0.4} max_mols (int): Maximum number of molecules to take from qm9. Default is 133885. Returns: list: List of graph props [labels, nodes, edges, edge_idx, gstates] - labels: All labels of qm9 - nodes: List of atomic numbers for emebdding layer - edges: Edgefeatures (inverse distance, gauss distance) - edge_idx: Edge edge_indices (N,2) - gstates: Graph states, mean moleculare weight - 7 g/mol """ if gauss_distance is None: gauss_distance = {'gbins': 20, 'grange': 4, 'gsigma': 0.4} user_database = setup_user_database_directory() if filepath is None: filepath = os.path.join(user_database, "data", "qm") print("Database path:", filepath) if not os.path.exists(os.path.join(filepath, "qm9.pickle")): qm9_download_dataset(filepath) qm9_extract_dataset(filepath) qm9 = qm9_write_pickle(filepath) qm9_remove_extracted_dataset(filepath) else: print("Loading qm9.pickle ...", end='', flush=True) with open(os.path.join(filepath, "qm9.pickle"), 'rb') as f: qm9 = pickle.load(f) print('done') # Make graph print("Making graph ...", end='', flush=True) out_graph = make_qm9_graph(qm9, max_distance=max_distance, max_neighbours=max_neighbours, gauss_distance=gauss_distance, max_mols=max_mols) print('done') return out_graph