Beispiel #1
0
def substructure(mol, query, largest_only=True, ignore_hydrogen=True):
    """ if mol is a substructure of the query, return True
    Args:
      mol: Compound
      query: Compound
      largest_only: compare only largest graph molecule
    """
    def subset_filter(cnt1, cnt2):
        diff = cnt2
        diff.subtract(cnt1)
        if any(v < 0 for v in diff.values()):
            return True

    if not (len(mol) and len(query)):
        return False  # two blank molecules are not isomorphic
    m = molutil.clone(mol)
    q = molutil.clone(query)
    if largest_only:
        m = molutil.largest_graph(m)
        q = molutil.largest_graph(q)
    if ignore_hydrogen:
        m = molutil.make_Hs_implicit(m)
        q = molutil.make_Hs_implicit(q)
    if filter_(m, q, f=subset_filter):
        gm = GraphMatcher(q.graph, m.graph, node_match=atom_match)
        return gm.subgraph_is_isomorphic()
    return False
Beispiel #2
0
def equal(mol, query, largest_only=True, ignore_hydrogen=True):
    """ if mol is exactly same structure as the query, return True
    Args:
      mol: Compound
      query: Compound
    """
    m = molutil.clone(mol)
    q = molutil.clone(query)
    if largest_only:
        m = molutil.largest_graph(m)
        q = molutil.largest_graph(q)
    if ignore_hydrogen:
        m = molutil.make_Hs_implicit(m)
        q = molutil.make_Hs_implicit(q)
    if molutil.mw(m) == molutil.mw(q):
        gm = GraphMatcher(q.graph, m.graph, node_match=atom_match)
        return gm.is_isomorphic()
    return False
Beispiel #3
0
def ready_to_draw(mol):
    """Shortcut function to prepare molecule to draw.
    Overwrite this function for customized appearance.
    It is recommended to clone the molecule before draw
    because all the methods above are destructive.
    """
    copied = molutil.clone(mol)
    # display_terminal_carbon(mol)
    equalize_terminal_double_bond(copied)
    # spine_to_terminal_wedge(copied)
    scale_and_center(copied)
    format_ring_double_bond(copied)
    return copied
Beispiel #4
0
def comparison_array(molecule, diameter=10, size=30, ignore_hydrogen=True):
    """ Generate comparison array
    Comparison array consists of node pairs in the graph and a collater.
    42 bit collater
        6 bit of distance (0-31)
        18 bit of bond attribute x2
            9 bit of atom attribute x 2
                7 bit of atom number (0-127)
                2 bit of atom pi(0-3)


    [Sheridan, R.P. and Miller, M.D., J. Chem. Inf. Comput. Sci. 38 (1998) 915]

    Using possible_path_length instead of shortest_path_length
    and find intersection of the distance set each other,
    it is possible to detect roundabout matching path.
    Therefore, exact graph isomorphism can be determined.
    However, set intersection is too costful to do
    in spite of a trivial improvement.

    Args:
        mol: Compound
        cutoff: more distant connection than the value is no longer used
                for comparison matrix graph due to performance reason.

    Returns:
        arr(list): comparison array. list of tuple (node1, node2, collater)
        max_mcs(int): maximum size of possible mcs
        int_to_node(dict): int -> node index pair reconverter dict
    Throws:
        ValueError: if len(mol) < 3
    """
    molecule.require("Valence")
    mol = molutil.clone(molecule)
    if ignore_hydrogen:
        mol = molutil.make_Hs_implicit(mol)
    # Ignore salt, water
    remover.remove_salt(mol)
    remover.remove_water(mol)
    # multivalent coordinated metals notably affect the performance
    remover.remove_coordinated_metal(mol)
    g = nx.line_graph(mol.graph)
    node_to_int = {}
    for i, e in enumerate(g.nodes()):
        node_to_int[e] = i
        a1 = mol.atom(e[0])
        a2 = mol.atom(e[1])
        a1t = a1.number << 2 | a1.pi
        a2t = a2.number << 2 | a2.pi
        pair = sorted((a1t, a2t))
        g.node[e]["type"] = pair[0] << 9 | pair[1]
    # convert node index pair to integer expression
    g = nx.relabel_nodes(g, node_to_int)
    # interger -> index pair reconverter
    int_to_node = {v: k for k, v in node_to_int.items()}
    arr = []
    matrix = nx.Graph()
    for ui, ua in g.nodes(data=True):
        r = _reachables(g, ui, diameter, size)
        for vi, d in r.items():
            if not d:
                continue
            matrix.add_edge(ui, vi)
            code = (d << 18 | ua["type"]) << 18 | g.node[vi]["type"]
            arr.append((ui, vi, code))
    max_size = len(max(find_cliques(matrix), key=len, default=[]))
    return arr, max_size, int_to_node
Beispiel #5
0
 def test_clone(self):
     # this takes 0.003 ms
     m = reader.mol_from_text(MOL["Indinavir"])
     cp = molutil.clone(m)
     self.assertEqual(len(m), len(cp))