Esempio n. 1
0
def mutagenicity_graph(filepath=None):
    """
    Generate list of mutagenicity graphs.

    Args:
        filepath (str): Path to database. Default is None.

    Returns:
        ist: [labels, nodes, edge_indices, edges, atoms]
        
        - labels (list): Mutagenity label (0,1).
        - nodes (list): Atoms as Atomnumber array.
        - edge_indices (list): Bond indices (i,j).
        - edges (list): Bond type.
        - atoms (list): Atom list as string.
    """
    if filepath is None:
        filepath = os.path.join(setup_user_database_directory(), "data",
                                "mutagen")

    print("Database path:", filepath)
    if not os.path.exists(os.path.join(filepath, "Mutagenicity")):
        mutagenicity_download_dataset(filepath)
        mutagenicity_extract_dataset(filepath)

    data = mutagenicity_load(filepath)
    return data
Esempio n. 2
0
def cora_graph(filepath=None):
    """
    Load and convert cora citation dataset.

    Args:
        filepath (str): Path to dataset. Default is None.

    Returns:
        list: [adj_matrix,X,labels]
        
        - adj_matrix (sp.csr_matrix): Adjacency matrix.
        - X (sp.csr_matrix): Node features.
        - labels (np.array): Labels.
    """
    user_default_base = setup_user_database_directory()
    if filepath is None:
        filepath = os.path.join(str(user_default_base), "data", "cora")

    print("Database path:", filepath)
    if not os.path.exists(os.path.join(filepath, "cora.npz")):
        cora_download_dataset(filepath)

    loader = np.load(os.path.join(filepath, "cora.npz"), allow_pickle=True)
    loader = dict(loader)
    data = cora_make_graph(loader)

    return data
Esempio n. 3
0
def mutag_graph(filepath=None):
    """
    Get MUTAG dataset.

    Args:
        filepath (str): Path to dataset. Default is None.

    Returns:
        list: [labels, nodes, edge_indices, edges]
        
        - labels (np.array): Labels.
        - nodes (list): Node features.
        - edge_indices (list): List of edge edge_indices of connections per molecule.
        - edges (list): List of edge features
    """
    if filepath is None:
        filepath = os.path.join(setup_user_database_directory(), "data",
                                "mutagen")

    print("Database path:", filepath)
    if not os.path.exists(os.path.join(filepath, "MUTAG")):
        mutag_download_dataset(filepath)
        mutag_extract_dataset(filepath)

    print("Making graph ...", end='', flush=True)
    data = mutag_load(filepath)
    print('done')
    return data
Esempio n. 4
0
def cora_graph(filepath=None):
    """
    Load and convert cora citation dataset.

    Args:
        filepath (str): Path to dataset. Default is None.

    Returns:
        list: [nodes, edge_indices, labels, mapping]

        - nodes(np.array): Node features of shape (2708,1434)
        - edge_indices (np.array): Edge edge_indices of shape (5429,2)
        - labels (np.array): Class labels of citations of shape (2708,)
        - mapping (dict): Class label mapping.
    """
    if filepath is None:
        user_database = setup_user_database_directory()
        filepath = os.path.join(str(user_database), "data", "cora")

    print("Database path:", filepath)
    if not os.path.exists(os.path.join(filepath, "cora.tgz")):
        cora_download_dataset(filepath)

    if not os.path.exists(os.path.join(filepath, "cora_lu")):
        cora_extract_dataset(filepath)

    data = cora_make_graph(os.path.join(filepath, "cora_lu", "cora"))

    return data
Esempio n. 5
0
def cora_graph(filepath=None):
    """
    Load and convert cora citation dataset.

    Args:
        filepath (str): Path to dataset. Default is None.

    Returns:
        list: [A,X,labels]

        - A (sp.csr_matrix): Adjacency matrix.
        - X (sp.csr_matrix): Node features.
        - labels (np.array): Labels.
    """
    if filepath is None:
        filepath = os.path.join(setup_user_database_directory(), "data",
                                "cora")

    print("Database path:", filepath)
    if not os.path.exists(os.path.join(filepath, "cora.tgz")):
        cora_download_dataset(filepath)

    if not os.path.exists(os.path.join(filepath, "cora_lu")):
        cora_extract_dataset(filepath)

    data = cora_make_graph(os.path.join(filepath, "cora_lu", "cora"))

    return data
Esempio n. 6
0
def qm9_graph(filepath=None,
              max_distance=4,
              max_neighbours=15,
              gauss_distance=None,
              max_mols=133885):
    """
    Get list of graphs np.arrays for qm9 dataset.
    
    Args:
        filepath (str): Filepath to database.
        max_distance (int): 4
        max_neighbours (int): 15
        gauss_distance (dict): {'GBins' : 20, 'GRange'  : 4, 'GSigma' : 0.4}
        max_mols (int): Maximum number of molecules to take from qm9. Default is 133885.

    Returns:
        list: List of graph props [labels, nodes, edges, edge_idx, gstates]
        
        - labels: All labels of qm9
        - nodes: List of atomic numbers for emebdding layer
        - edges: Edgefeatures (inverse distance, gauss distance)
        - edge_idx: Edge edge_indices (N,2)
        - gstates: Graph states, mean moleculare weight - 7 g/mol   
    """
    if gauss_distance is None:
        gauss_distance = {'gbins': 20, 'grange': 4, 'gsigma': 0.4}

    user_database = setup_user_database_directory()
    if filepath is None:
        filepath = os.path.join(user_database, "data", "qm")

    print("Database path:", filepath)
    if not os.path.exists(os.path.join(filepath, "qm9.pickle")):
        qm9_download_dataset(filepath)
        qm9_extract_dataset(filepath)
        qm9 = qm9_write_pickle(filepath)
        qm9_remove_extracted_dataset(filepath)
    else:
        print("Loading qm9.pickle ...", end='', flush=True)
        with open(os.path.join(filepath, "qm9.pickle"), 'rb') as f:
            qm9 = pickle.load(f)
        print('done')

    # Make graph
    print("Making graph ...", end='', flush=True)
    out_graph = make_qm9_graph(qm9,
                               max_distance=max_distance,
                               max_neighbours=max_neighbours,
                               gauss_distance=gauss_distance,
                               max_mols=max_mols)
    print('done')

    return out_graph