Exemple #1
0
def sample_subgraphs(smiles, num_samples=10, frac=0.5, vis_dir=None):
    mol = Chem.MolFromSmiles(smiles)
    clusters, atom_cls = find_clusters(mol)
    cluster_sizes = [len(cls) for cls in clusters]
    p = np.array(cluster_sizes).astype('float')
    p /= p.sum()

    selected_atoms_list = []

    for n in range(num_samples):
        selected_clusters = np.random.choice(len(clusters), int(frac * len(clusters)), p=p, replace=False)
        selected_atoms = set()
        for i in selected_clusters:
            for j in clusters[i]:
                selected_atoms.add(j)

        minimum_smiles, _ = extract_subgraph(smiles, selected_atoms)
        selected_atoms_list.append(selected_atoms)
        if vis_dir is not None:
            png_f = f'subgraph_{n}.png'
            Draw.MolToFile(mol, filename=os.path.join(vis_dir, png_f), highlightAtoms=selected_atoms)
            png_f = f'subgraph_{n}_extracted.png'
            print(minimum_smiles)
            Draw.MolToFile(Chem.MolFromSmiles(minimum_smiles), filename=os.path.join(vis_dir, png_f))
    return selected_atoms_list
def mcts(smiles, scoring_function, n_rollout, max_atoms, prop_delta):
    mol = Chem.MolFromSmiles(smiles)
    clusters, atom_cls = find_clusters(mol)
    nei_cls = [0] * len(clusters)
    for i, cls in enumerate(clusters):
        nei_cls[i] = [nei for atom in cls for nei in atom_cls[atom]]
        nei_cls[i] = set(nei_cls[i]) - set([i])
        clusters[i] = set(list(cls))
    for a in range(len(atom_cls)):
        atom_cls[a] = set(atom_cls[a])

    root = MCTSNode(smiles, set(range(mol.GetNumAtoms())))
    state_map = {smiles: root}
    for _ in range(n_rollout):
        mcts_rollout(root, state_map, smiles, clusters, atom_cls, nei_cls,
                     scoring_function)

    rationales = [
        node for _, node in state_map.items()
        if len(node.atoms) <= max_atoms and node.P >= prop_delta
    ]
    return smiles, rationales
Exemple #3
0
def extract_selected_subgraph(smiles, selected_atoms):
    mol = Chem.MolFromSmiles(smiles)
    clusters, atom_cls = find_clusters(mol)
    selected_clusters = []

    for cls in clusters:
        if len(cls) > 2:
            num_selected = 0
            for atom in cls:
                num_selected += atom in selected_atoms
            if num_selected >= 2:
                # print('select the whole aromatic ring since 2 or more atoms are selected')
                selected_clusters.append(cls)

    for cls in selected_clusters:
        for atom in cls:
            selected_atoms.add(atom)

    minimum_smiles, _ = extract_subgraph(smiles, selected_atoms)
    # print(selected_atoms)
    # print(f'{smiles} --> {minimum_smiles}')

    return minimum_smiles
Exemple #4
0
def extract_selected_subgraph_for_gcn(smiles, selected_atoms, vis_dir=None):
    mol = Chem.MolFromSmiles(smiles)
    clusters, atom_cls = find_clusters(mol)
    selected_clusters = set()

    for atom in selected_atoms:
        for cls in atom_cls[atom]:
            selected_clusters.add(clusters[cls])
    # print(selected_clusters)

    for cls in selected_clusters:
        for atom in cls:
            selected_atoms.add(atom)

    minimum_smiles, _ = extract_subgraph(smiles, selected_atoms)
    # print(selected_atoms)
    # print(f'{smiles} --> {minimum_smiles}')
    if vis_dir is not None:
        png_f = f'atoms_selected{len(selected_atoms)}.png'
        Draw.MolToFile(mol, filename=os.path.join(vis_dir, png_f), highlightAtoms=selected_atoms)
        # png_f = f'atoms_minimum_extracted{len(selected_atoms)}.png'
        # Draw.MolToFile(Chem.MolFromSmiles(minimum_smiles), filename=os.path.join(vis_dir, png_f))

    return minimum_smiles
Exemple #5
0
def find_minimum_subgraph(smiles, selected_atoms, vis_dir=None):
    mol = Chem.MolFromSmiles(smiles)
    clusters, atom_cls = find_clusters(mol)
    selected_clusters = set()
    cluster_votes = {}
    # First iteration: select a cluster when,
    #   1. An atom uniquely belongs to this cluster,
    #   2. Two atoms belong to this cluster.
    for atom in selected_atoms:
        assert len(atom_cls[atom]) > 0
        if len(atom_cls[atom]) == 1:
            selected_clusters.add(atom_cls[atom][0])
        else:
            for cls in atom_cls[atom]:
                if cls not in cluster_votes:
                    cluster_votes[cls] = 0
                cluster_votes[cls] += 1
                if cluster_votes[cls] >= 2:
                    selected_clusters.add(cls)
    # Second iteration: randomly select a cluster for the remaining atoms.
    for atom in selected_atoms:
        selected = False
        for cls in atom_cls[atom]:
            if cls in selected_clusters:
                selected = True
                break
        if not selected:
            selected_clusters.add(atom_cls[atom][0])

    cluster_neighbor = {}
    for i in range(len(clusters)):
        cluster_neighbor[i] = set()
        for atom in clusters[i]:
            cluster_neighbor[i].update(atom_cls[atom])
        cluster_neighbor[i].remove(i)

    # remove degree-1 unselected clusters iteratively
    leaf_clusters = set()
    while True:
        updated = False
        for i in range(len(clusters)):
            if i in selected_clusters or i in leaf_clusters:
                continue
            if len(cluster_neighbor[i]) > 1:
                removable = True
                neighbor_pairs = [(j, k) for j in cluster_neighbor[i] for k in cluster_neighbor[i] if j < k]
                for j, k in neighbor_pairs:
                    if j not in cluster_neighbor[k] or k not in cluster_neighbor[j]:
                        removable = False
                        break
                if not removable:
                    continue

            leaf_clusters.add(i)
            for j in cluster_neighbor[i]:
                cluster_neighbor[j].remove(i)
            updated = True

        if not updated:
            break

    minimum_atoms = set()
    for i in range(len(clusters)):
        if i not in leaf_clusters:
            minimum_atoms.update(clusters[i])

    minimum_smiles, _ = extract_subgraph(smiles, minimum_atoms)
    # print(f'{smiles} --> {minimum_smiles}')

    if vis_dir is not None:
        png_f = f'atoms_selected{len(selected_atoms)}.png'
        Draw.MolToFile(mol, filename=os.path.join(vis_dir, png_f), highlightAtoms=selected_atoms)
        png_f = f'atoms_minimum{len(selected_atoms)}.png'
        Draw.MolToFile(mol, filename=os.path.join(vis_dir, png_f), highlightAtoms=minimum_atoms)
        png_f = f'atoms_minimum_extracted{len(selected_atoms)}.png'
        Draw.MolToFile(Chem.MolFromSmiles(minimum_smiles), filename=os.path.join(vis_dir, png_f))

    return minimum_smiles