Example #1
0
def hydrolise(mol):

    peptide_bond = Chem.MolFromSmiles('C(=O)NC')
    ester_bond = Chem.MolFromSmiles('C(=O)OC')
    peptide_ids = mol.GetSubstructMatches(peptide_bond)
    ester_ids = mol.GetSubstructMatches(ester_bond)

    nm = Chem.EditableMol(mol)

    bonds_ids = []
    for x, _, y, __ in peptide_ids:
        nm.RemoveBond(x, y)
        bonds_ids.append(
            nm.AddBond(x, nm.AddAtom(Chem.Atom('O')), Chem.BondType.SINGLE))

    for x, _, y, __ in ester_ids:
        nm.RemoveBond(x, y)
        bonds_ids.append(
            nm.AddBond(x, nm.AddAtom(Chem.Atom('O')), Chem.BondType.SINGLE))

    h_m = nm.GetMol()
    fragments = Chem.GetMolFrags(h_m, asMols=True)
    print()
    if len(fragments) == len(peptide_ids) + len(ester_ids):
        print('Cyclic structure!', end='')
    elif len(fragments) == len(peptide_ids) + len(ester_ids) - 1:
        print('Linear structure!')
    else:
        print('Unknown molecule topology!')

    return fragments
Example #2
0
def delete_bonds(mol, bonds, ftype, hac):

    #use the same parent mol object and create editable mol
    em = Chem.EditableMol(mol)

    #loop through the bonds to delete
    #print "Breaking bonds between atoms: ",bonds

    for b in bonds:
        #remove the bond
        em.RemoveBond(b[0], b[1])

        #now add attachement points
        newAtomA = em.AddAtom(Chem.Atom(0))
        em.AddBond(b[0], newAtomA, Chem.BondType.SINGLE)

        newAtomB = em.AddAtom(Chem.Atom(0))
        em.AddBond(b[1], newAtomB, Chem.BondType.SINGLE)

    #should be able to get away without sanitising mol
    #as the valencies should be okay
    modifiedMol = em.GetMol()

    #do not sanitise!
    #Chem.SanitizeMol(modifiedMol)

    fragmented_smi = Chem.MolToSmiles(modifiedMol, True)

    #print fragmented_smi
    fraggle_framentation = select_fragments(fragmented_smi, ftype, hac)

    return fraggle_framentation
Example #3
0
def fragment_into_dummy_smiles(offmol, cleave_bonds=[], unique_r_groups=True):
    rdmol = Chem.RWMol(offmol.to_rdkit())
    for atom in rdmol.GetAtoms():
        atom.SetAtomMapNum(0)
    utils.assign_stereochemistry(rdmol)
    dummy = Chem.Atom("*")
    r_linkages = {}

    if unique_r_groups:
        r_groups = [(i, i + 1)
                    for i in range(1, (len(cleave_bonds) + 1) * 2, 2)]
    else:
        r_groups = [(1, 2)] * len(cleave_bonds)
    for bond, rs in zip(cleave_bonds, r_groups):
        bond_type = rdmol.GetBondBetweenAtoms(*bond).GetBondType()
        rdmol.RemoveBond(*bond)
        r_linkages[rs[0]] = [rs[1]]
        for atom_index, r in zip(bond, rs):
            dummy_copy = Chem.Atom(dummy)
            dummy_copy.SetAtomMapNum(r)
            new_atom_index = rdmol.AddAtom(dummy_copy)
            rdmol.AddBond(atom_index, new_atom_index, bond_type)
    mols = Chem.GetMolFrags(rdmol, asMols=True)
    for mol in mols:
        counter = 1
        Chem.AssignStereochemistry(mol)
        for atom in mol.GetAtoms():
            if atom.GetSymbol() != "*":
                atom.SetAtomMapNum(counter)
                counter += 1
    smiles = [utils.mol_to_smiles(m) for m in mols]
    return smiles, r_linkages
Example #4
0
def HToOtherElement(m, cn_idx, Z=None):
    """
    arguments: mol object, connection atom index, Z (optional)
    
    returns: new mol object
    
    this function replaces a H atom bound to an atom with index cn_idx with another atom with atomic number Z
    """
    print('chose HToOtherElement')
    if Z == None:
        Z = choice(Z_list)  #pick random element
    mw = Chem.RWMol(m)
    for at in mw.GetAtomWithIdx(cn_idx).GetNeighbors():
        if at.GetSymbol() == 'H':
            H_idx = at.GetIdx()
            break

    mw.ReplaceAtom(H_idx, Chem.Atom(Z))
    Chem.SanitizeMol(mw)

    #add Hs to satisfy the valence of the new atom
    while len(mw.GetAtomWithIdx(H_idx).GetNeighbors()) < mw.GetAtomWithIdx(
            H_idx).GetTotalValence():
        idx = mw.AddAtom(Chem.Atom(1))
        mw.AddBond(H_idx, idx, Chem.BondType.SINGLE)

    Chem.SanitizeMol(mw)
    AllChem.EmbedMolecule(mw)
    AllChem.MMFFOptimizeMolecule(mw)

    return mw
Example #5
0
def split_heterocycle_bonds(rwmol, bonds):
    # TODO: sometimes adds OH when rBAN doesn't (when valence == 1 and type == HETEROCYCLE),
    # but in other cases (e.g. val == 1, type == Oxazole), the result matches w/ rBAN
    carbon_ends = set()
    for b_idx in sorted(bonds, reverse=True):
        b = rwmol.GetBondWithIdx(b_idx)
        st_ = b.GetBeginAtomIdx()
        end_ = b.GetEndAtomIdx()
        rwmol.RemoveBond(st_, end_)
        for a in (st_, end_):
            if rwmol.GetAtomWithIdx(a).GetSymbol() == 'C':
                carbon_ends.add(a)

    for a in carbon_ends:
        atom = rwmol.GetAtomWithIdx(a)
        atom.SetNumExplicitHs(0)
        atom.UpdatePropertyCache()
        valence = atom.GetExplicitValence()

        if valence == 1:
            new_id1 = rwmol.AddAtom(rdc.Atom(8))
            rwmol.AddBond(a, new_id1, rdc.BondType.SINGLE)
            new_id2 = rwmol.AddAtom(rdc.Atom(8))
            rwmol.AddBond(a, new_id2, rdc.BondType.DOUBLE)
        elif valence == 2:
            new_id2 = rwmol.AddAtom(rdc.Atom(8))
            rwmol.AddBond(a, new_id2, rdc.BondType.DOUBLE)
        elif valence == 3:
            new_id2 = rwmol.AddAtom(rdc.Atom(8))
            rwmol.AddBond(a, new_id2, rdc.BondType.SINGLE)

    return rwmol
 def ReadSymbols(self, tree):
     if tree[0] in ['any atom', '$']:
         atom = rdqueries.AtomNumGreaterQueryAtom(0)
     elif tree[0] in ['heteroatom', '&']:
         #N, O, P, S
         atom = rdqueries.AtomNumEqualsQueryAtom(7)
         atom.ExpandQuery(rdqueries.AtomNumEqualsQueryAtom(8),\
             how=Chem.rdchem.CompositeQueryType.COMPOSITE_OR)
         atom.ExpandQuery(rdqueries.AtomNumEqualsQueryAtom(15),\
             how=Chem.rdchem.CompositeQueryType.COMPOSITE_OR)
         atom.ExpandQuery(rdqueries.AtomNumEqualsQueryAtom(16),\
             how=Chem.rdchem.CompositeQueryType.COMPOSITE_OR)
     elif tree[0] in ['heavy atom', 'X']:
         # heavier than H
         atom = rdqueries.AtomNumGreaterQueryAtom(1)
     elif tree[0][0].islower():
         # aromatic molecule
         symbol = tree[0][0].upper() + tree[0][1:]
         try:
             atom = Chem.Atom(symbol)
             atom.SetIsAromatic(True)
         except RuntimeError:
             msg = 'Element aromatic ' + symbol + ' not found'
             raise RINGReaderError(msg)
     elif tree[0] == 'M':
         # metal
         atom = rdqueries.AtomNumGreaterQueryAtom(19)
     else:
         try:
             atom = Chem.Atom(tree[0])
             atom = rdqueries.AtomNumEqualsQueryAtom(atom.GetAtomicNum())
         except RuntimeError:
             msg = 'Element ' + tree[0] + ' not found'
             raise RINGReaderError(msg)
     return atom
def split_hetero(rwmol, bonds):
    carbon_ends = set()
    for b_idx in sorted(bonds, reverse=True):
        b = rwmol.GetBondWithIdx(b_idx)
        st_ = b.GetBeginAtomIdx()
        end_ = b.GetEndAtomIdx()
        rwmol.RemoveBond(st_, end_)
        for a in (st_, end_):
            if rwmol.GetAtomWithIdx(a).GetSymbol() == 'C':
                carbon_ends.add(a)

    for a in carbon_ends:
        atom = rwmol.GetAtomWithIdx(a)
        atom.SetNumExplicitHs(0)
        atom.UpdatePropertyCache()
        valence = atom.GetExplicitValence()

        if valence == 1:
            new_id1 = rwmol.AddAtom(rdc.Atom(8))
            rwmol.AddBond(a, new_id1, rdc.BondType.SINGLE)
            new_id2 = rwmol.AddAtom(rdc.Atom(8))
            rwmol.AddBond(a, new_id2, rdc.BondType.DOUBLE)
        elif valence == 2:
            new_id2 = rwmol.AddAtom(rdc.Atom(8))
            rwmol.AddBond(a, new_id2, rdc.BondType.DOUBLE)
        elif valence == 3:
            new_id2 = rwmol.AddAtom(rdc.Atom(8))
            rwmol.AddBond(a, new_id2, rdc.BondType.SINGLE)
    return rwmol
Example #8
0
def IonizeAtom(mol, atomI, ion_mode):
    ionized_mol = []
    charge = []
    if ion_mode == "protonated":
        charge = 1
        ionized_mol = Chem.RWMol(mol)
        H = ionized_mol.AddAtom((Chem.Atom(1)))
        ionized_mol.AddBond(atomI, H, Chem.BondType.SINGLE)
        ionized_mol.GetAtomWithIdx(atomI).SetFormalCharge(charge)

    if ion_mode == "sodiated":
        charge = 1
        ionized_mol = Chem.RWMol(mol)
        Na = ionized_mol.AddAtom((Chem.Atom(11)))
        ionized_mol.AddBond(atomI, Na, Chem.BondType.SINGLE)
        ionized_mol.GetAtomWithIdx(atomI).SetFormalCharge(charge)

    if ion_mode == "deprotonated" and mol.GetAtomWithIdx(atomI).GetTotalNumHs(
            includeNeighbors=True) > 0:
        charge = -1
        ionized_mol = Chem.RWMol(mol)
        for atom in ionized_mol.GetAtomWithIdx(atomI).GetNeighbors():
            if atom.GetAtomicNum() == 1:
                H = atom.GetIdx()
        ionized_mol.RemoveAtom(H)
        ionized_mol.GetAtomWithIdx(atomI).SetFormalCharge(charge)

    if ionized_mol:
        AllChem.EmbedMolecule(ionized_mol, potential)

    return ionized_mol, charge
def join_mols(mol):
    '''join fragged mols, based on dummy atom positions'''
    #dummy_atoms = find_dummy_atoms(mol)
    #print dummy_atoms 
    #print dummy_atoms[0][0]
    #temp_em = Chem.EditableMol(mol)
    #temp_em.RemoveAtom(dummy_atoms[-1][0])
    #temp_em.RemoveAtom(dummy_atoms[-2][0])
    #temp_em.ReplaceAtom(dummy_atoms[1][0],Chem.Atom(6))
    #temp_em.ReplaceAtom(dummy_atoms[0][0],Chem.Atom(6))
    #temp_em.AddBond(dummy_atoms[0][0],dummy_atoms[-2][1][0],Chem.BondType.AROMATIC)
    #temp_em.AddBond(dummy_atoms[1][0],dummy_atoms[-1][1][0],Chem.BondType.AROMATIC)
    #tm = temp_em.GetMol()
    #Chem.SanitizeMol(tm)
    dummy_atoms = find_dummy_atoms(mol)
    print dummy_atoms 
#print dummy_atoms[0][0]
    temp_em = Chem.EditableMol(mol)
    temp_em.ReplaceAtom(dummy_atoms[1][0],Chem.Atom(6))
    temp_em.ReplaceAtom(dummy_atoms[0][0],Chem.Atom(6))
    temp_em.ReplaceAtom(dummy_atoms[-1][0],Chem.Atom(6))
    temp_em.ReplaceAtom(dummy_atoms[-2][0],Chem.Atom(6))
    #temp_em.AddBond(dummy_atoms[0][0],dummy_atoms[-1][0],Chem.BondType.AROMATIC)
    #temp_em.AddBond(dummy_atoms[1][0],dummy_atoms[-2][0],Chem.BondType.AROMATIC)
    #temp_em.ReplaceAtom(dummy_atoms[1][0],Chem.Atom(6))
    #temp_em.ReplaceAtom(dummy_atoms[0][0],Chem.Atom(6))
    temp_em.AddBond(dummy_atoms[0][0],dummy_atoms[-2][1][0],Chem.BondType.AROMATIC)
    temp_em.AddBond(dummy_atoms[1][0],dummy_atoms[-1][1][0],Chem.BondType.AROMATIC)
    temp_em.RemoveAtom(dummy_atoms[-1][0])
    temp_em.RemoveAtom(dummy_atoms[-2][0])
    tm = temp_em.GetMol()
    Chem.SanitizeMol(tm)
    return tm
Example #10
0
def add_atoms(new_mol, node_symbol, dataset):
    for number in node_symbol:
        if dataset=='qm9' or dataset=='cep':
            idx=new_mol.AddAtom(Chem.Atom(dataset_info(dataset)['number_to_atom'][number]))
        elif dataset=='zinc':
            new_atom = Chem.Atom(dataset_info(dataset)['number_to_atom'][number])            
            charge_num=int(dataset_info(dataset)['atom_types'][number].split('(')[1].strip(')'))
            new_atom.SetFormalCharge(charge_num)
            new_mol.AddAtom(new_atom)
Example #11
0
def test_correct_mol():
    mol = Chem.RWMol()
    mol.AddAtom(Chem.Atom(6))
    mol.AddAtom(Chem.Atom(6))
    mol.AddAtom(Chem.Atom(6))
    mol.AddAtom(Chem.Atom(7))
    mol.AddBond(0, 1, Chem.rdchem.BondType.DOUBLE)
    mol.AddBond(1, 2, Chem.rdchem.BondType.TRIPLE)
    mol.AddBond(0, 3, Chem.rdchem.BondType.TRIPLE)
    print(Chem.MolToSmiles(mol))  # C#C=C#N
    mol = correct_mol(mol)
    print(Chem.MolToSmiles(mol))  # C=C=C=N
Example #12
0
def create_rdkit_molecule(molecule_name, molecules, molecule_numberings,
                          bonds):
    '''
    Using dictionaries containing molecule structure create a rdkit molecule.

    Args:
        molecule_name (str): name of a molecule
        molecules (dict): dictionary containing for each molecules a list with a atom numbers and types in the molecule
        molecule_numberings (dict): dictionary containing for each molecules a list with a atom numbers in the molecule
        bonds (dict): dictionary containing for each molecules the list of its bond in utpels (bond_number_1, bond_number_2, bond_type)
    Returns:
        rdmol (Mol): the molecule in rdkit Molecule
    '''
    # Create an editable molecule.
    rdmol = Chem.Mol()
    rdedmol = Chem.EditableMol(rdmol)

    atoms = {
        atom_tuple[0]: atom_tuple[1]
        for atom_tuple in sorted(molecules[molecule_name])
    }
    atom_numberings = sorted(molecule_numberings[molecule_name])
    # Renumber atom so there is no atom with a number superior to the number of atoms in the molecule.
    atom_replaces = {}

    for atom in atom_numberings:
        if atom != sorted(atom_numberings).index(atom) + 1:
            atom_replaces[atom] = sorted(atom_numberings).index(atom) + 1

    # Add a first atom to keep most of the atom numbering.
    rdatom = Chem.Atom(0)
    rdedmol.AddAtom(rdatom)

    # Add atoms from the molecule.
    # Add absent atoms to keep the atom numbering.
    for atom_number in range(max(atom_numberings)):
        atom_number += 1
        if atom_number in atoms:
            atom = atoms[atom_number]
        else:
            atom = 0
        rdatom = Chem.Atom(atom)
        rdedmol.AddAtom(rdatom)

    # Add bonds from the molecule.
    for bond in bonds[molecule_name]:
        bond = tuple(bond)
        rdedmol.AddBond(bond[0], bond[1], bond[2])

    # Create molecule.
    rdmol = rdedmol.GetMol()

    return rdmol
Example #13
0
def test_dative_bond():
    smis = "CC1=CC=CC(=C1N\\2O[Co]3(ON(\\C=[N]3\\C4=C(C)C=CC=C4C)C5=C(C)C=CC=C5C)[N](=C2)\\C6=C(C)C=CC=C6C)C"
    expected_result = (
        "CC1=CC=CC(C)=C1N1C=N(C2=C(C)C=CC=C2C)->[Co]2(<-N(C3=C(C)C=CC=C3C)=CN(C3=C(C)C=CC=C3C)O2)O1"
    )

    assert dm.is_transition_metal(Chem.Atom("Co"))

    # sodium is not a transition metal
    assert not dm.is_transition_metal(Chem.Atom("Na"))

    mol = dm.set_dative_bonds(Chem.MolFromSmiles(smis, sanitize=False))
    assert Chem.MolToSmiles(mol) == expected_result
    assert dm.to_mol(Chem.MolToSmiles(mol)) is not None
Example #14
0
def split_aa_pk_hybrid(smi):
    mol = rdc.MolFromSmiles(smi)
    if not mol:
        raise PKError
    bond_break = find_aa_pk_bond(mol)

    mw = rdc.RWMol(mol)
    oxs = [
        a.GetIdx() for a in mw.GetAtomWithIdx(bond_break[0]).GetNeighbors()
        if a.GetAtomicNum() == 8
    ]
    if len(oxs) > 1:
        raise PKError

    if oxs:
        bond_type = mw.GetBondBetweenAtoms(bond_break[0], oxs[0]).GetBondType()
        if bond_type == rdc.BondType.DOUBLE:
            cond_type = 'KS'
            new_ox = mw.AddAtom(rdc.Atom(8))
            mw.AddBond(bond_break[0], new_ox, rdc.BondType.SINGLE)
        elif bond_type == rdc.BondType.SINGLE:
            cond_type = 'KS+KR'
            new_ox = mw.AddAtom(rdc.Atom(8))
            mw.AddBond(bond_break[0], new_ox, rdc.BondType.DOUBLE)
        else:
            raise PKError
    else:
        bond_type = mw.GetBondBetweenAtoms(*bond_break).GetBondType()
        if bond_type == rdc.BondType.DOUBLE:
            cond_type = 'KS+KR+DH'
        elif bond_type == rdc.BondType.SINGLE:
            cond_type = 'KS+KR+DH+ER'
        else:
            raise PKError
        new_ox1 = mw.AddAtom(rdc.Atom(8))
        new_ox2 = mw.AddAtom(rdc.Atom(8))
        mw.AddBond(bond_break[0], new_ox1, rdc.BondType.SINGLE)
        mw.AddBond(bond_break[0], new_ox2, rdc.BondType.DOUBLE)
    mw.RemoveBond(*bond_break)

    aa_part_smi = None
    pk_part_smi = None
    for frag_atoms in rdc.GetMolFrags(mw):
        if bond_break[0] in frag_atoms:
            aa_part_smi = rdc.MolFragmentToSmiles(mw, frag_atoms)
        else:
            pk_part_smi = rdc.MolFragmentToSmiles(mw, frag_atoms)
    if (not aa_part_smi) or (not pk_part_smi):
        raise PKError
    return aa_part_smi, pk_part_smi, cond_type
Example #15
0
def de_featurizer(nodes, edges):
    '''Draw out a molecule based on the molecules's graph representation with nodes and edges.
      
     Paramenters:
     ------
     nodes: an array of the molecule with atomic numbers
     edges: a matrix containing bond information between each atom of the molecule
     
     Return:
     ------
     two possible rdkit molecules(since the generated molecule graph's edges contains diagonally two possibilities of bond            informations
  '''

    mol1 = Chem.RWMol()  #initiate two molecules
    mol2 = Chem.RWMol()

    bond_types = [
        Chem.rdchem.BondType.ZERO,
        Chem.rdchem.BondType.SINGLE,
        Chem.rdchem.BondType.DOUBLE,
        Chem.rdchem.BondType.TRIPLE,
        Chem.rdchem.BondType.AROMATIC,
    ]
    decoder = {i: j
               for i, j in enumerate(bond_types, 0)
               }  # create decoder of bondtype corresponding with numbers

    #create atoms
    for atom in nodes:
        mol1.AddAtom(Chem.Atom(int(atom)))
        mol2.AddAtom(Chem.Atom(int(atom)))

    #loop through the matrix to defeaturize bonds
    #mol2 = mol1
    for a in range(len(edges) - 1):
        #for b in range(a+1, len(edges)):
        b = a + 1
        if 0 < edges[int(a)][int(b)] < 6:
            mol1.AddBond(int(a), int(b), decoder.get(edges[int(a)][int(b)]))
        else:
            mol1.AddBond(int(a), int(b), Chem.rdchem.BondType.SINGLE)

        if 0 < edges[int(b)][int(a)] < 6:
            mol2.AddBond(int(a), int(b), decoder.get(edges[int(b)][int(a)]))
        else:
            mol2.AddBond(int(a), int(b), Chem.rdchem.BondType.SINGLE)

    return mol1, mol2
Example #16
0
def LumpH(molecule):
    """
    Lump hydrogen atoms as a single atom. Note that Si, Al, Mg, Na are used as 
    pseudoatoms. However, this does not affect printing SMILES, as smilesSymbol
    are appropriately set.
    """
    molecule = Chem.RWMol(molecule)
    Hidx = list()
    for i in range(0, molecule.GetNumAtoms()):
        atom = molecule.GetAtomWithIdx(i)
        if atom.GetSymbol() != 'H':
            NumH = 0
            for neighbor_atom in atom.GetNeighbors():
                if neighbor_atom.GetSymbol() == 'H':
                    NumH += 1
                    Hidx.append(neighbor_atom.GetIdx())
            if NumH == 4:
                a = Chem.Atom('Si')
                a.SetProp('smilesSymbol', 'H4')
                idx = molecule.AddAtom(a)
                molecule.AddBond(atom.GetIdx(), idx,
                                 Chem.rdchem.BondType.QUADRUPLE)
                molecule.GetAtomWithIdx(idx).SetNoImplicit(True)
            elif NumH == 3:
                a = Chem.Atom('Al')
                a.SetProp('smilesSymbol', 'H3')
                idx = molecule.AddAtom(a)
                molecule.AddBond(atom.GetIdx(), idx,
                                 Chem.rdchem.BondType.TRIPLE)
                molecule.GetAtomWithIdx(idx).SetNoImplicit(True)
            elif NumH == 2:
                a = Chem.Atom('Mg')
                a.SetProp('smilesSymbol', 'H2')
                idx = molecule.AddAtom(a)
                molecule.AddBond(atom.GetIdx(), idx,
                                 Chem.rdchem.BondType.DOUBLE)
                molecule.GetAtomWithIdx(idx).SetNoImplicit(True)
            elif NumH == 1:
                a = Chem.Atom('Na')
                a.SetProp('smilesSymbol', 'H')
                idx = molecule.AddAtom(a)
                molecule.AddBond(atom.GetIdx(), idx,
                                 Chem.rdchem.BondType.SINGLE)
                molecule.GetAtomWithIdx(idx).SetNoImplicit(True)
    Hidx.sort(reverse=True)
    for i in Hidx:
        molecule.RemoveAtom(i)
    return molecule
Example #17
0
 def decode(self, matrix):
     frags, smiles = [], []
     for m, adj in enumerate(matrix):
         # print('decode: ', m)
         emol = Chem.RWMol()
         esub = Chem.RWMol()
         try:
             for atom, curr, prev, bond, frag in adj:
                 atom, curr, prev, bond, frag = int(atom), int(curr), int(
                     prev), int(bond), int(frag)
                 if atom == self.tk2ix['EOS']: continue
                 if atom == self.tk2ix['GO']: continue
                 if atom != self.tk2ix['*']:
                     a = Chem.Atom(self.ix2nr[atom])
                     a.SetFormalCharge(self.ix2ch[atom])
                     emol.AddAtom(a)
                     if frag != 0: esub.AddAtom(a)
                 if bond != 0:
                     b = Chem.BondType(bond)
                     emol.AddBond(curr, prev, b)
                     if frag != 0: esub.AddBond(curr, prev, b)
             Chem.SanitizeMol(emol)
             Chem.SanitizeMol(esub)
         except Exception as e:
             print(adj)
             # raise e
         frags.append(Chem.MolToSmiles(esub))
         smiles.append(Chem.MolToSmiles(emol))
     return frags, smiles
 def _place_between(self, mol: Chem.RWMol, a: int, b: int, aromatic=True):
     oribond = mol.GetBondBetweenAtoms(a, b)
     if oribond is None:
         print('FAIL')
         return None  # fail
     elif aromatic:
         bt = Chem.BondType.AROMATIC
     else:
         bt = oribond.GetBondType()
     idx = mol.AddAtom(Chem.Atom(6))
     neoatom = mol.GetAtomWithIdx(idx)
     atom_a = mol.GetAtomWithIdx(a)
     atom_b = mol.GetAtomWithIdx(b)
     if aromatic:
         neoatom.SetIsAromatic(True)
         atom_a.SetIsAromatic(True)
         atom_b.SetIsAromatic(True)
     # prevent constraints
     neoatom.SetBoolProp('_Novel', True)
     atom_a.SetBoolProp('_Novel', True)
     atom_b.SetBoolProp('_Novel', True)
     # fix position
     conf = mol.GetConformer()
     pos_A = conf.GetAtomPosition(a)
     pos_B = conf.GetAtomPosition(b)
     x = pos_A.x / 2 + pos_B.x / 2
     y = pos_A.y / 2 + pos_B.y / 2
     z = pos_A.z / 2 + pos_B.z / 2
     conf.SetAtomPosition(idx, Point3D(x, y, z))
     # fix bonds
     mol.RemoveBond(a, b)
     mol.AddBond(a, idx, bt)
     mol.AddBond(b, idx, bt)
Example #19
0
def find_context(atom_ids, my_mol, my_sub_m, cont_sub):
    """Function to get the context - by adding all the atoms in a substructure and adding them to the fragment"""
    # First combine the tuples
    # Get the atoms in this match
    out_m = []
    for x in my_mol.GetSubstructMatches(cont_sub):
        for y in x:
            # If it's in both substructures
            if y in my_sub_m:
                out_m.append(y)
    # Add the core structure
    out_m.extend(atom_ids)
    # get an editable mol
    em = Chem.EditableMol(my_mol)
    # Loop through the atoms and remove them
    for my_id in my_mol.GetAtoms():
        if my_id.GetIdx() in out_m:
            continue
        else:
            # Replace with a gap
            em.ReplaceAtom(my_id.GetIdx(), Chem.Atom(0))
    # Now remove all these atoms
    star_mol = em.GetMol()
    out_mol = Chem.DeleteSubstructs(star_mol, Chem.MolFromSmarts('[#0]'))
    out_ans = Chem.MolToSmiles(out_mol)
    print "ORIG MOL", Chem.MolToSmiles(my_mol)
    print "FRACT MOL", out_ans
    return out_ans
Example #20
0
def classification_report(data, model, session, sample=False):
    _, _, _, a, x, _, f, _, _ = data.next_validation_batch()

    n, e = session.run([model.nodes_gumbel_argmax, model.edges_gumbel_argmax] if sample else [
        model.nodes_argmax, model.edges_argmax], feed_dict={model.edges_labels: a, model.nodes_labels: x,
                                                            model.node_features: f, model.training: False,
                                                            model.variational: False})
    n, e = np.argmax(n, axis=-1), np.argmax(e, axis=-1)

    y_true = e.flatten()
    y_pred = a.flatten()
    target_names = [str(Chem.rdchem.BondType.values[int(e)]) for e in data.bond_decoder_m.values()]

    print('######## Classification Report ########\n')
    print(sk_classification_report(y_true, y_pred, labels=list(range(len(target_names))),
                                   target_names=target_names))

    print('######## Confusion Matrix ########\n')
    print(confusion_matrix(y_true, y_pred, labels=list(range(len(target_names)))))

    y_true = n.flatten()
    y_pred = x.flatten()
    target_names = [Chem.Atom(e).GetSymbol() for e in data.atom_decoder_m.values()]

    print('######## Classification Report ########\n')
    print(sk_classification_report(y_true, y_pred, labels=list(range(len(target_names))),
                                   target_names=target_names))

    print('\n######## Confusion Matrix ########\n')
    print(confusion_matrix(y_true, y_pred, labels=list(range(len(target_names)))))
Example #21
0
def numpy_to_rdkit(adj, nf, ef, sanitize=False):
    """
    Converts a molecule from numpy to RDKit format.
    :param adj: binary numpy array of shape (N, N) 
    :param nf: numpy array of shape (N, F)
    :param ef: numpy array of shape (N, N, S)
    :param sanitize: whether to sanitize the molecule after conversion
    :return: an RDKit molecule
    """
    if rdc is None:
        raise ImportError('`numpy_to_rdkit` requires RDKit.')
    mol = rdc.RWMol()
    for nf_ in nf:
        atomic_num = int(nf_)
        if atomic_num > 0:
            mol.AddAtom(rdc.Atom(atomic_num))

    for i, j in zip(*np.triu_indices(adj.shape[-1])):
        if i != j and adj[i, j] == adj[j, i] == 1 and not mol.GetBondBetweenAtoms(int(i), int(j)):
            bond_type_1 = BOND_MAP[int(ef[i, j, 0])]
            bond_type_2 = BOND_MAP[int(ef[j, i, 0])]
            if bond_type_1 == bond_type_2:
                mol.AddBond(int(i), int(j), bond_type_1)

    mol = mol.GetMol()
    if sanitize:
        rdc.SanitizeMol(mol)
    return mol
Example #22
0
class AtomicProperty(Descriptor):
    __slots__ = "explicit_hydrogens", "prop", "_initialized"

    def __str__(self):
        return "Prop{}".format(self.as_argument)

    def get_long(self):
        return getattr(self.prop, "long", self.prop.__name__)

    @property
    def as_argument(self):
        return getattr(self.prop, "short", self.prop.__name__)

    def parameters(self):
        return self.explicit_hydrogens, self.prop

    def __new__(cls, explicit_hydrogens, prop):
        if isinstance(prop, cls):
            prop._initialized = True
            return prop

        return super(AtomicProperty, cls).__new__(cls)

    def __init__(self, explicit_hydrogens, prop):
        if getattr(self, "_initialized", False):
            return

        self.explicit_hydrogens = explicit_hydrogens
        self.prop = getters.get(prop)

        if self.prop is not None:
            return

        if callable(prop):
            self.prop = prop
            return

        raise TypeError("atomic property is not callable: {!r}".format(prop))

    def calculate(self):
        if getattr(self.prop, "gasteiger_charges", False):
            ComputeGasteigerCharges(self.mol)

        r = atoms_to_numpy(self.prop, self.mol)

        nans = np.isnan(r)
        if np.any(nans):
            atms = set(
                np.array([a.GetSymbol() for a in self.mol.GetAtoms()])[nans])
            self.fail(
                ValueError("missing {} for {}".format(self.get_long(),
                                                      list(atms))))

        return r

    _carbon = Chem.Atom(6)

    @property
    def carbon(self):
        return self.prop(self._carbon)
Example #23
0
    def to_RDKMol(self):
        """Return an RDKMol object for the configuration, template, or subset."""
        index = {}
        indices = []
        rdk_mol = Chem.RWMol()
        for atno, _id in zip(self.atoms.atomic_numbers, self.atoms.ids):
            idx = rdk_mol.AddAtom(Chem.Atom(atno))
            index[_id] = idx
            indices.append(idx)

        bond_types = {
            1: Chem.BondType.SINGLE,
            2: Chem.BondType.DOUBLE,
            3: Chem.BondType.TRIPLE,
        }
        for row in self.bonds.bonds():
            rdk_mol.AddBond(index[row["i"]], index[row["j"]],
                            bond_types[row["bondorder"]])

        natom = self.atoms.n_atoms
        conf = Chem.Conformer(natom)
        for idx, xyz in zip(indices, self.atoms.coordinates):
            conf.SetAtomPosition(idx, xyz)

        rdk_mol.AddConformer(conf)
        Chem.rdmolops.SanitizeMol(rdk_mol)

        return rdk_mol
Example #24
0
def to_rdmol(plams_mol, sanitize=True):
    """
    Translate a PLAMS molecule into an RDKit molecule type
    """
    # Create rdkit molecule
    e = Chem.EditableMol(Chem.Mol())
    for atom in plams_mol.atoms:
        a = Chem.Atom(atom.atnum)
        ch = atom.properties.charge
        if isinstance(ch, int):
            a.SetFormalCharge(ch)
        e.AddAtom(a)
    for bond in plams_mol.bonds:
        a1 = plams_mol.atoms.index(bond.atom1)
        a2 = plams_mol.atoms.index(bond.atom2)
        e.AddBond(a1, a2, Chem.BondType(bond.order))
    rdmol = e.GetMol()
    if sanitize:
        Chem.SanitizeMol(rdmol)
    conf = Chem.Conformer()
    for a in range(len(plams_mol.atoms)):
        atom = plams_mol.atoms[a]
        p = Geometry.Point3D(atom._getx(), atom._gety(), atom._getz())
        conf.SetAtomPosition(a, p)
    rdmol.AddConformer(conf)
    return rdmol
Example #25
0
def nx_to_mol(G):
    mol = Chem.RWMol()
    atomic_nums = nx.get_node_attributes(G, 'atomic_num')
    chiral_tags = nx.get_node_attributes(G, 'chiral_tag')
    formal_charges = nx.get_node_attributes(G, 'formal_charge')
    node_is_aromatics = nx.get_node_attributes(G, 'is_aromatic')
    node_hybridizations = nx.get_node_attributes(G, 'hybridization')
    num_explicit_hss = nx.get_node_attributes(G, 'num_explicit_hs')
    node_to_idx = {}
    for node in G.nodes():
        a = Chem.Atom(atomic_nums[node])
        a.SetChiralTag(chiral_tags[node])
        a.SetFormalCharge(formal_charges[node])
        a.SetIsAromatic(node_is_aromatics[node])
        a.SetHybridization(node_hybridizations[node])
        a.SetNumExplicitHs(num_explicit_hss[node])
        idx = mol.AddAtom(a)
        node_to_idx[node] = idx

    bond_types = nx.get_edge_attributes(G, 'bond_type')
    for edge in G.edges():
        first, second = edge
        ifirst = node_to_idx[first]
        isecond = node_to_idx[second]
        bond_type = bond_types[first, second]
        mol.AddBond(ifirst, isecond, bond_type)

    Chem.SanitizeMol(mol)
    return mol
Example #26
0
def set_structure(mol, cmat, atom_list):
    """Creates single bonds between atoms based on contact matrix.

    Creates single bonds between all connected atoms in contact
    matrix.

    Parameters
    ----------
    mol : rdkit.Chem.RWMol
    cmat : numpy.ndarray
    atom_list : list"""

    for atom in atom_list:
        mol.AddAtom(Chem.Atom(atom))

    for i, j in zip(*np.where(cmat[:, :] == 1)):
        if mol.GetAtomWithIdx(int(i)).GetSymbol() == 'Li':
            continue
        elif mol.GetAtomWithIdx(int(j)).GetSymbol() == 'Li':
            continue
        else:
            pass

        mol.AddBond(int(i), int(j), Chem.BondType.SINGLE)

    for at in mol.GetAtoms():
        at.SetNoImplicit(True)

    return
Example #27
0
    def __init__(self,
                 atom_map1: int,
                 new_atoms_map_nums: List[int],
                 ring_key: str,
                 action_vocab: dict,
                 is_hard: bool = False):
        super(AddRingAction, self).__init__(atom_map1, -1, action_vocab,
                                            is_hard)
        self.ring_key = ring_key

        # order new atom map nums so map num of the existing atom is first
        map_ind = new_atoms_map_nums.index(self.atom_map1)
        self.new_atoms_map_nums = [
            self.atom_map1
        ] + new_atoms_map_nums[map_ind + 1:] + new_atoms_map_nums[:map_ind]

        new_a = Chem.Atom(6)
        new_a.SetIsAromatic(True)
        new_a.SetBoolProp('is_edited', True)
        self.new_atom_features = get_atom_features(
            new_a, ORDERED_ATOM_OH_KEYS, atom_prop2oh=self.prop2oh['atom'])

        b_type = Chem.rdchem.BondType.AROMATIC
        self.new_bond_features = [
            self.prop2oh['bond'][key][val]
            for key, val in (('bond_type', b_type), ('bond_stereo', 0),
                             ('is_edited', 1))
        ]
Example #28
0
	def graph_to_mol(nodes, adjacency, allow_submolecule=False):
		global MOSES_ATOMIC_NUM_LIST, MOSES_BOND_DECODER

		if isinstance(nodes, torch.Tensor):
			nodes = nodes.detach().cpu().numpy()
		if isinstance(adjacency, torch.Tensor):
			adjacency = adjacency.detach().cpu().numpy()
		
		invalid_atoms = (nodes < 0) | nodes >= MosesDataset.num_node_types()
		if np.any(invalid_atoms) and not np.all(invalid_atoms):
			valid_idx = np.where(~invalid_atoms)[0]
			nodes = nodes[valid_idx]
			adjacency = adjacency[valid_idx,:][:,valid_idx]

		if allow_submolecule:
			nodes, adjacency = find_largest_submolecule(nodes, adjacency)

		mol = Chem.RWMol()
		for n in nodes:
			if n < 0:
				continue
			mol.AddAtom(Chem.Atom(int(MOSES_ATOMIC_NUM_LIST[n])))

		for i in range(adjacency.shape[0]):
			for j in range(i+1, adjacency.shape[1]):
				if adjacency[i,j] == 0:
					continue
				mol.AddBond(i, j, MOSES_BOND_DECODER[adjacency[i,j]])
		return mol
Example #29
0
  def assertBondStereoRoundTrips(self, fname):
    path = os.path.join(RDConfig.RDCodeDir, 'Chem', 'test_data', fname)
    mol = Chem.MolFromMolFile(path)
    refSmiles = mol.GetProp("_Name")
    self.assertTrue(len(refSmiles) > 0)
    self.assertEqual(Chem.MolToSmiles(mol, isomericSmiles=True), refSmiles)

    # now test Chem.DetectBondStereoChemistry more directly by constructing the molecule from scratch
    oldconf = mol.GetConformer(0)
    newconf = Chem.Conformer(mol.GetNumAtoms())
    newmol = Chem.RWMol()

    for atm in mol.GetAtoms():
        ratm = Chem.Atom(atm.GetAtomicNum())
        ratm.SetFormalCharge(atm.GetFormalCharge())
        newmol.AddAtom(ratm)

        atomidx = atm.GetIdx()
        pos = oldconf.GetAtomPosition(atomidx)
        newconf.SetAtomPosition(atomidx, pos)

    for bnd in mol.GetBonds():
        newmol.AddBond(bnd.GetBeginAtomIdx(), bnd.GetEndAtomIdx(), Chem.BondType(bnd.GetBondType()))
    newmol.AddConformer(newconf)

    Chem.SanitizeMol(newmol)
    Chem.DetectBondStereoChemistry(newmol, newmol.GetConformer())

    # these aren't necessary for this specific test case, but are for
    # a more general conversion routine, so would like to see them
    # tested eventually
    # Chem.AssignAtomChiralTagsFromStructure(newmol)
    # Chem.AssignStereochemistry(newmol)

    self.assertEqual(Chem.MolToSmiles(newmol, isomericSmiles=True), refSmiles)
Example #30
0
def CHToN(m, cn_idx, subst=None):
    """
    arguments: mol object, connection atom index, substituent (optional and not necessary for this function,
    but when called from Modifications2 it requires subst as an argument)
    
    returns: new mol object
    
    this function replaces an aromatic atom with index cn_idx with a N atom bound to a substituent
    """

    print('chose CHtoN')
    mw = Chem.RWMol(m)
    Chem.Kekulize(mw, clearAromaticFlags=True)
    #the ring the atom is a part of
    for ring in mw.GetRingInfo().AtomRings():
        if cn_idx in ring:
            target_ring = ring  #ring containing the connection atom

    #remove the ngbs that are not part of the ring
    for ngb in mw.GetAtomWithIdx(cn_idx).GetNeighbors():
        if ngb.GetIdx() not in target_ring:
            mw.RemoveAtom(ngb.GetIdx())

    #replace C with N
    mw.ReplaceAtom(cn_idx, Chem.Atom(7))
    #    mw.GetAtomWithIdx(cn_idx).SetFormalCharge(1)
    #    h=mw.AddAtom(Chem.Atom(1))
    #    mw.AddBond(h,cn_idx,Chem.BondType.SINGLE)
    #    Chem.SanitizeMol(mw)

    return mw