def hydrolise(mol): peptide_bond = Chem.MolFromSmiles('C(=O)NC') ester_bond = Chem.MolFromSmiles('C(=O)OC') peptide_ids = mol.GetSubstructMatches(peptide_bond) ester_ids = mol.GetSubstructMatches(ester_bond) nm = Chem.EditableMol(mol) bonds_ids = [] for x, _, y, __ in peptide_ids: nm.RemoveBond(x, y) bonds_ids.append( nm.AddBond(x, nm.AddAtom(Chem.Atom('O')), Chem.BondType.SINGLE)) for x, _, y, __ in ester_ids: nm.RemoveBond(x, y) bonds_ids.append( nm.AddBond(x, nm.AddAtom(Chem.Atom('O')), Chem.BondType.SINGLE)) h_m = nm.GetMol() fragments = Chem.GetMolFrags(h_m, asMols=True) print() if len(fragments) == len(peptide_ids) + len(ester_ids): print('Cyclic structure!', end='') elif len(fragments) == len(peptide_ids) + len(ester_ids) - 1: print('Linear structure!') else: print('Unknown molecule topology!') return fragments
def delete_bonds(mol, bonds, ftype, hac): #use the same parent mol object and create editable mol em = Chem.EditableMol(mol) #loop through the bonds to delete #print "Breaking bonds between atoms: ",bonds for b in bonds: #remove the bond em.RemoveBond(b[0], b[1]) #now add attachement points newAtomA = em.AddAtom(Chem.Atom(0)) em.AddBond(b[0], newAtomA, Chem.BondType.SINGLE) newAtomB = em.AddAtom(Chem.Atom(0)) em.AddBond(b[1], newAtomB, Chem.BondType.SINGLE) #should be able to get away without sanitising mol #as the valencies should be okay modifiedMol = em.GetMol() #do not sanitise! #Chem.SanitizeMol(modifiedMol) fragmented_smi = Chem.MolToSmiles(modifiedMol, True) #print fragmented_smi fraggle_framentation = select_fragments(fragmented_smi, ftype, hac) return fraggle_framentation
def fragment_into_dummy_smiles(offmol, cleave_bonds=[], unique_r_groups=True): rdmol = Chem.RWMol(offmol.to_rdkit()) for atom in rdmol.GetAtoms(): atom.SetAtomMapNum(0) utils.assign_stereochemistry(rdmol) dummy = Chem.Atom("*") r_linkages = {} if unique_r_groups: r_groups = [(i, i + 1) for i in range(1, (len(cleave_bonds) + 1) * 2, 2)] else: r_groups = [(1, 2)] * len(cleave_bonds) for bond, rs in zip(cleave_bonds, r_groups): bond_type = rdmol.GetBondBetweenAtoms(*bond).GetBondType() rdmol.RemoveBond(*bond) r_linkages[rs[0]] = [rs[1]] for atom_index, r in zip(bond, rs): dummy_copy = Chem.Atom(dummy) dummy_copy.SetAtomMapNum(r) new_atom_index = rdmol.AddAtom(dummy_copy) rdmol.AddBond(atom_index, new_atom_index, bond_type) mols = Chem.GetMolFrags(rdmol, asMols=True) for mol in mols: counter = 1 Chem.AssignStereochemistry(mol) for atom in mol.GetAtoms(): if atom.GetSymbol() != "*": atom.SetAtomMapNum(counter) counter += 1 smiles = [utils.mol_to_smiles(m) for m in mols] return smiles, r_linkages
def HToOtherElement(m, cn_idx, Z=None): """ arguments: mol object, connection atom index, Z (optional) returns: new mol object this function replaces a H atom bound to an atom with index cn_idx with another atom with atomic number Z """ print('chose HToOtherElement') if Z == None: Z = choice(Z_list) #pick random element mw = Chem.RWMol(m) for at in mw.GetAtomWithIdx(cn_idx).GetNeighbors(): if at.GetSymbol() == 'H': H_idx = at.GetIdx() break mw.ReplaceAtom(H_idx, Chem.Atom(Z)) Chem.SanitizeMol(mw) #add Hs to satisfy the valence of the new atom while len(mw.GetAtomWithIdx(H_idx).GetNeighbors()) < mw.GetAtomWithIdx( H_idx).GetTotalValence(): idx = mw.AddAtom(Chem.Atom(1)) mw.AddBond(H_idx, idx, Chem.BondType.SINGLE) Chem.SanitizeMol(mw) AllChem.EmbedMolecule(mw) AllChem.MMFFOptimizeMolecule(mw) return mw
def split_heterocycle_bonds(rwmol, bonds): # TODO: sometimes adds OH when rBAN doesn't (when valence == 1 and type == HETEROCYCLE), # but in other cases (e.g. val == 1, type == Oxazole), the result matches w/ rBAN carbon_ends = set() for b_idx in sorted(bonds, reverse=True): b = rwmol.GetBondWithIdx(b_idx) st_ = b.GetBeginAtomIdx() end_ = b.GetEndAtomIdx() rwmol.RemoveBond(st_, end_) for a in (st_, end_): if rwmol.GetAtomWithIdx(a).GetSymbol() == 'C': carbon_ends.add(a) for a in carbon_ends: atom = rwmol.GetAtomWithIdx(a) atom.SetNumExplicitHs(0) atom.UpdatePropertyCache() valence = atom.GetExplicitValence() if valence == 1: new_id1 = rwmol.AddAtom(rdc.Atom(8)) rwmol.AddBond(a, new_id1, rdc.BondType.SINGLE) new_id2 = rwmol.AddAtom(rdc.Atom(8)) rwmol.AddBond(a, new_id2, rdc.BondType.DOUBLE) elif valence == 2: new_id2 = rwmol.AddAtom(rdc.Atom(8)) rwmol.AddBond(a, new_id2, rdc.BondType.DOUBLE) elif valence == 3: new_id2 = rwmol.AddAtom(rdc.Atom(8)) rwmol.AddBond(a, new_id2, rdc.BondType.SINGLE) return rwmol
def ReadSymbols(self, tree): if tree[0] in ['any atom', '$']: atom = rdqueries.AtomNumGreaterQueryAtom(0) elif tree[0] in ['heteroatom', '&']: #N, O, P, S atom = rdqueries.AtomNumEqualsQueryAtom(7) atom.ExpandQuery(rdqueries.AtomNumEqualsQueryAtom(8),\ how=Chem.rdchem.CompositeQueryType.COMPOSITE_OR) atom.ExpandQuery(rdqueries.AtomNumEqualsQueryAtom(15),\ how=Chem.rdchem.CompositeQueryType.COMPOSITE_OR) atom.ExpandQuery(rdqueries.AtomNumEqualsQueryAtom(16),\ how=Chem.rdchem.CompositeQueryType.COMPOSITE_OR) elif tree[0] in ['heavy atom', 'X']: # heavier than H atom = rdqueries.AtomNumGreaterQueryAtom(1) elif tree[0][0].islower(): # aromatic molecule symbol = tree[0][0].upper() + tree[0][1:] try: atom = Chem.Atom(symbol) atom.SetIsAromatic(True) except RuntimeError: msg = 'Element aromatic ' + symbol + ' not found' raise RINGReaderError(msg) elif tree[0] == 'M': # metal atom = rdqueries.AtomNumGreaterQueryAtom(19) else: try: atom = Chem.Atom(tree[0]) atom = rdqueries.AtomNumEqualsQueryAtom(atom.GetAtomicNum()) except RuntimeError: msg = 'Element ' + tree[0] + ' not found' raise RINGReaderError(msg) return atom
def split_hetero(rwmol, bonds): carbon_ends = set() for b_idx in sorted(bonds, reverse=True): b = rwmol.GetBondWithIdx(b_idx) st_ = b.GetBeginAtomIdx() end_ = b.GetEndAtomIdx() rwmol.RemoveBond(st_, end_) for a in (st_, end_): if rwmol.GetAtomWithIdx(a).GetSymbol() == 'C': carbon_ends.add(a) for a in carbon_ends: atom = rwmol.GetAtomWithIdx(a) atom.SetNumExplicitHs(0) atom.UpdatePropertyCache() valence = atom.GetExplicitValence() if valence == 1: new_id1 = rwmol.AddAtom(rdc.Atom(8)) rwmol.AddBond(a, new_id1, rdc.BondType.SINGLE) new_id2 = rwmol.AddAtom(rdc.Atom(8)) rwmol.AddBond(a, new_id2, rdc.BondType.DOUBLE) elif valence == 2: new_id2 = rwmol.AddAtom(rdc.Atom(8)) rwmol.AddBond(a, new_id2, rdc.BondType.DOUBLE) elif valence == 3: new_id2 = rwmol.AddAtom(rdc.Atom(8)) rwmol.AddBond(a, new_id2, rdc.BondType.SINGLE) return rwmol
def IonizeAtom(mol, atomI, ion_mode): ionized_mol = [] charge = [] if ion_mode == "protonated": charge = 1 ionized_mol = Chem.RWMol(mol) H = ionized_mol.AddAtom((Chem.Atom(1))) ionized_mol.AddBond(atomI, H, Chem.BondType.SINGLE) ionized_mol.GetAtomWithIdx(atomI).SetFormalCharge(charge) if ion_mode == "sodiated": charge = 1 ionized_mol = Chem.RWMol(mol) Na = ionized_mol.AddAtom((Chem.Atom(11))) ionized_mol.AddBond(atomI, Na, Chem.BondType.SINGLE) ionized_mol.GetAtomWithIdx(atomI).SetFormalCharge(charge) if ion_mode == "deprotonated" and mol.GetAtomWithIdx(atomI).GetTotalNumHs( includeNeighbors=True) > 0: charge = -1 ionized_mol = Chem.RWMol(mol) for atom in ionized_mol.GetAtomWithIdx(atomI).GetNeighbors(): if atom.GetAtomicNum() == 1: H = atom.GetIdx() ionized_mol.RemoveAtom(H) ionized_mol.GetAtomWithIdx(atomI).SetFormalCharge(charge) if ionized_mol: AllChem.EmbedMolecule(ionized_mol, potential) return ionized_mol, charge
def join_mols(mol): '''join fragged mols, based on dummy atom positions''' #dummy_atoms = find_dummy_atoms(mol) #print dummy_atoms #print dummy_atoms[0][0] #temp_em = Chem.EditableMol(mol) #temp_em.RemoveAtom(dummy_atoms[-1][0]) #temp_em.RemoveAtom(dummy_atoms[-2][0]) #temp_em.ReplaceAtom(dummy_atoms[1][0],Chem.Atom(6)) #temp_em.ReplaceAtom(dummy_atoms[0][0],Chem.Atom(6)) #temp_em.AddBond(dummy_atoms[0][0],dummy_atoms[-2][1][0],Chem.BondType.AROMATIC) #temp_em.AddBond(dummy_atoms[1][0],dummy_atoms[-1][1][0],Chem.BondType.AROMATIC) #tm = temp_em.GetMol() #Chem.SanitizeMol(tm) dummy_atoms = find_dummy_atoms(mol) print dummy_atoms #print dummy_atoms[0][0] temp_em = Chem.EditableMol(mol) temp_em.ReplaceAtom(dummy_atoms[1][0],Chem.Atom(6)) temp_em.ReplaceAtom(dummy_atoms[0][0],Chem.Atom(6)) temp_em.ReplaceAtom(dummy_atoms[-1][0],Chem.Atom(6)) temp_em.ReplaceAtom(dummy_atoms[-2][0],Chem.Atom(6)) #temp_em.AddBond(dummy_atoms[0][0],dummy_atoms[-1][0],Chem.BondType.AROMATIC) #temp_em.AddBond(dummy_atoms[1][0],dummy_atoms[-2][0],Chem.BondType.AROMATIC) #temp_em.ReplaceAtom(dummy_atoms[1][0],Chem.Atom(6)) #temp_em.ReplaceAtom(dummy_atoms[0][0],Chem.Atom(6)) temp_em.AddBond(dummy_atoms[0][0],dummy_atoms[-2][1][0],Chem.BondType.AROMATIC) temp_em.AddBond(dummy_atoms[1][0],dummy_atoms[-1][1][0],Chem.BondType.AROMATIC) temp_em.RemoveAtom(dummy_atoms[-1][0]) temp_em.RemoveAtom(dummy_atoms[-2][0]) tm = temp_em.GetMol() Chem.SanitizeMol(tm) return tm
def add_atoms(new_mol, node_symbol, dataset): for number in node_symbol: if dataset=='qm9' or dataset=='cep': idx=new_mol.AddAtom(Chem.Atom(dataset_info(dataset)['number_to_atom'][number])) elif dataset=='zinc': new_atom = Chem.Atom(dataset_info(dataset)['number_to_atom'][number]) charge_num=int(dataset_info(dataset)['atom_types'][number].split('(')[1].strip(')')) new_atom.SetFormalCharge(charge_num) new_mol.AddAtom(new_atom)
def test_correct_mol(): mol = Chem.RWMol() mol.AddAtom(Chem.Atom(6)) mol.AddAtom(Chem.Atom(6)) mol.AddAtom(Chem.Atom(6)) mol.AddAtom(Chem.Atom(7)) mol.AddBond(0, 1, Chem.rdchem.BondType.DOUBLE) mol.AddBond(1, 2, Chem.rdchem.BondType.TRIPLE) mol.AddBond(0, 3, Chem.rdchem.BondType.TRIPLE) print(Chem.MolToSmiles(mol)) # C#C=C#N mol = correct_mol(mol) print(Chem.MolToSmiles(mol)) # C=C=C=N
def create_rdkit_molecule(molecule_name, molecules, molecule_numberings, bonds): ''' Using dictionaries containing molecule structure create a rdkit molecule. Args: molecule_name (str): name of a molecule molecules (dict): dictionary containing for each molecules a list with a atom numbers and types in the molecule molecule_numberings (dict): dictionary containing for each molecules a list with a atom numbers in the molecule bonds (dict): dictionary containing for each molecules the list of its bond in utpels (bond_number_1, bond_number_2, bond_type) Returns: rdmol (Mol): the molecule in rdkit Molecule ''' # Create an editable molecule. rdmol = Chem.Mol() rdedmol = Chem.EditableMol(rdmol) atoms = { atom_tuple[0]: atom_tuple[1] for atom_tuple in sorted(molecules[molecule_name]) } atom_numberings = sorted(molecule_numberings[molecule_name]) # Renumber atom so there is no atom with a number superior to the number of atoms in the molecule. atom_replaces = {} for atom in atom_numberings: if atom != sorted(atom_numberings).index(atom) + 1: atom_replaces[atom] = sorted(atom_numberings).index(atom) + 1 # Add a first atom to keep most of the atom numbering. rdatom = Chem.Atom(0) rdedmol.AddAtom(rdatom) # Add atoms from the molecule. # Add absent atoms to keep the atom numbering. for atom_number in range(max(atom_numberings)): atom_number += 1 if atom_number in atoms: atom = atoms[atom_number] else: atom = 0 rdatom = Chem.Atom(atom) rdedmol.AddAtom(rdatom) # Add bonds from the molecule. for bond in bonds[molecule_name]: bond = tuple(bond) rdedmol.AddBond(bond[0], bond[1], bond[2]) # Create molecule. rdmol = rdedmol.GetMol() return rdmol
def test_dative_bond(): smis = "CC1=CC=CC(=C1N\\2O[Co]3(ON(\\C=[N]3\\C4=C(C)C=CC=C4C)C5=C(C)C=CC=C5C)[N](=C2)\\C6=C(C)C=CC=C6C)C" expected_result = ( "CC1=CC=CC(C)=C1N1C=N(C2=C(C)C=CC=C2C)->[Co]2(<-N(C3=C(C)C=CC=C3C)=CN(C3=C(C)C=CC=C3C)O2)O1" ) assert dm.is_transition_metal(Chem.Atom("Co")) # sodium is not a transition metal assert not dm.is_transition_metal(Chem.Atom("Na")) mol = dm.set_dative_bonds(Chem.MolFromSmiles(smis, sanitize=False)) assert Chem.MolToSmiles(mol) == expected_result assert dm.to_mol(Chem.MolToSmiles(mol)) is not None
def split_aa_pk_hybrid(smi): mol = rdc.MolFromSmiles(smi) if not mol: raise PKError bond_break = find_aa_pk_bond(mol) mw = rdc.RWMol(mol) oxs = [ a.GetIdx() for a in mw.GetAtomWithIdx(bond_break[0]).GetNeighbors() if a.GetAtomicNum() == 8 ] if len(oxs) > 1: raise PKError if oxs: bond_type = mw.GetBondBetweenAtoms(bond_break[0], oxs[0]).GetBondType() if bond_type == rdc.BondType.DOUBLE: cond_type = 'KS' new_ox = mw.AddAtom(rdc.Atom(8)) mw.AddBond(bond_break[0], new_ox, rdc.BondType.SINGLE) elif bond_type == rdc.BondType.SINGLE: cond_type = 'KS+KR' new_ox = mw.AddAtom(rdc.Atom(8)) mw.AddBond(bond_break[0], new_ox, rdc.BondType.DOUBLE) else: raise PKError else: bond_type = mw.GetBondBetweenAtoms(*bond_break).GetBondType() if bond_type == rdc.BondType.DOUBLE: cond_type = 'KS+KR+DH' elif bond_type == rdc.BondType.SINGLE: cond_type = 'KS+KR+DH+ER' else: raise PKError new_ox1 = mw.AddAtom(rdc.Atom(8)) new_ox2 = mw.AddAtom(rdc.Atom(8)) mw.AddBond(bond_break[0], new_ox1, rdc.BondType.SINGLE) mw.AddBond(bond_break[0], new_ox2, rdc.BondType.DOUBLE) mw.RemoveBond(*bond_break) aa_part_smi = None pk_part_smi = None for frag_atoms in rdc.GetMolFrags(mw): if bond_break[0] in frag_atoms: aa_part_smi = rdc.MolFragmentToSmiles(mw, frag_atoms) else: pk_part_smi = rdc.MolFragmentToSmiles(mw, frag_atoms) if (not aa_part_smi) or (not pk_part_smi): raise PKError return aa_part_smi, pk_part_smi, cond_type
def de_featurizer(nodes, edges): '''Draw out a molecule based on the molecules's graph representation with nodes and edges. Paramenters: ------ nodes: an array of the molecule with atomic numbers edges: a matrix containing bond information between each atom of the molecule Return: ------ two possible rdkit molecules(since the generated molecule graph's edges contains diagonally two possibilities of bond informations ''' mol1 = Chem.RWMol() #initiate two molecules mol2 = Chem.RWMol() bond_types = [ Chem.rdchem.BondType.ZERO, Chem.rdchem.BondType.SINGLE, Chem.rdchem.BondType.DOUBLE, Chem.rdchem.BondType.TRIPLE, Chem.rdchem.BondType.AROMATIC, ] decoder = {i: j for i, j in enumerate(bond_types, 0) } # create decoder of bondtype corresponding with numbers #create atoms for atom in nodes: mol1.AddAtom(Chem.Atom(int(atom))) mol2.AddAtom(Chem.Atom(int(atom))) #loop through the matrix to defeaturize bonds #mol2 = mol1 for a in range(len(edges) - 1): #for b in range(a+1, len(edges)): b = a + 1 if 0 < edges[int(a)][int(b)] < 6: mol1.AddBond(int(a), int(b), decoder.get(edges[int(a)][int(b)])) else: mol1.AddBond(int(a), int(b), Chem.rdchem.BondType.SINGLE) if 0 < edges[int(b)][int(a)] < 6: mol2.AddBond(int(a), int(b), decoder.get(edges[int(b)][int(a)])) else: mol2.AddBond(int(a), int(b), Chem.rdchem.BondType.SINGLE) return mol1, mol2
def LumpH(molecule): """ Lump hydrogen atoms as a single atom. Note that Si, Al, Mg, Na are used as pseudoatoms. However, this does not affect printing SMILES, as smilesSymbol are appropriately set. """ molecule = Chem.RWMol(molecule) Hidx = list() for i in range(0, molecule.GetNumAtoms()): atom = molecule.GetAtomWithIdx(i) if atom.GetSymbol() != 'H': NumH = 0 for neighbor_atom in atom.GetNeighbors(): if neighbor_atom.GetSymbol() == 'H': NumH += 1 Hidx.append(neighbor_atom.GetIdx()) if NumH == 4: a = Chem.Atom('Si') a.SetProp('smilesSymbol', 'H4') idx = molecule.AddAtom(a) molecule.AddBond(atom.GetIdx(), idx, Chem.rdchem.BondType.QUADRUPLE) molecule.GetAtomWithIdx(idx).SetNoImplicit(True) elif NumH == 3: a = Chem.Atom('Al') a.SetProp('smilesSymbol', 'H3') idx = molecule.AddAtom(a) molecule.AddBond(atom.GetIdx(), idx, Chem.rdchem.BondType.TRIPLE) molecule.GetAtomWithIdx(idx).SetNoImplicit(True) elif NumH == 2: a = Chem.Atom('Mg') a.SetProp('smilesSymbol', 'H2') idx = molecule.AddAtom(a) molecule.AddBond(atom.GetIdx(), idx, Chem.rdchem.BondType.DOUBLE) molecule.GetAtomWithIdx(idx).SetNoImplicit(True) elif NumH == 1: a = Chem.Atom('Na') a.SetProp('smilesSymbol', 'H') idx = molecule.AddAtom(a) molecule.AddBond(atom.GetIdx(), idx, Chem.rdchem.BondType.SINGLE) molecule.GetAtomWithIdx(idx).SetNoImplicit(True) Hidx.sort(reverse=True) for i in Hidx: molecule.RemoveAtom(i) return molecule
def decode(self, matrix): frags, smiles = [], [] for m, adj in enumerate(matrix): # print('decode: ', m) emol = Chem.RWMol() esub = Chem.RWMol() try: for atom, curr, prev, bond, frag in adj: atom, curr, prev, bond, frag = int(atom), int(curr), int( prev), int(bond), int(frag) if atom == self.tk2ix['EOS']: continue if atom == self.tk2ix['GO']: continue if atom != self.tk2ix['*']: a = Chem.Atom(self.ix2nr[atom]) a.SetFormalCharge(self.ix2ch[atom]) emol.AddAtom(a) if frag != 0: esub.AddAtom(a) if bond != 0: b = Chem.BondType(bond) emol.AddBond(curr, prev, b) if frag != 0: esub.AddBond(curr, prev, b) Chem.SanitizeMol(emol) Chem.SanitizeMol(esub) except Exception as e: print(adj) # raise e frags.append(Chem.MolToSmiles(esub)) smiles.append(Chem.MolToSmiles(emol)) return frags, smiles
def _place_between(self, mol: Chem.RWMol, a: int, b: int, aromatic=True): oribond = mol.GetBondBetweenAtoms(a, b) if oribond is None: print('FAIL') return None # fail elif aromatic: bt = Chem.BondType.AROMATIC else: bt = oribond.GetBondType() idx = mol.AddAtom(Chem.Atom(6)) neoatom = mol.GetAtomWithIdx(idx) atom_a = mol.GetAtomWithIdx(a) atom_b = mol.GetAtomWithIdx(b) if aromatic: neoatom.SetIsAromatic(True) atom_a.SetIsAromatic(True) atom_b.SetIsAromatic(True) # prevent constraints neoatom.SetBoolProp('_Novel', True) atom_a.SetBoolProp('_Novel', True) atom_b.SetBoolProp('_Novel', True) # fix position conf = mol.GetConformer() pos_A = conf.GetAtomPosition(a) pos_B = conf.GetAtomPosition(b) x = pos_A.x / 2 + pos_B.x / 2 y = pos_A.y / 2 + pos_B.y / 2 z = pos_A.z / 2 + pos_B.z / 2 conf.SetAtomPosition(idx, Point3D(x, y, z)) # fix bonds mol.RemoveBond(a, b) mol.AddBond(a, idx, bt) mol.AddBond(b, idx, bt)
def find_context(atom_ids, my_mol, my_sub_m, cont_sub): """Function to get the context - by adding all the atoms in a substructure and adding them to the fragment""" # First combine the tuples # Get the atoms in this match out_m = [] for x in my_mol.GetSubstructMatches(cont_sub): for y in x: # If it's in both substructures if y in my_sub_m: out_m.append(y) # Add the core structure out_m.extend(atom_ids) # get an editable mol em = Chem.EditableMol(my_mol) # Loop through the atoms and remove them for my_id in my_mol.GetAtoms(): if my_id.GetIdx() in out_m: continue else: # Replace with a gap em.ReplaceAtom(my_id.GetIdx(), Chem.Atom(0)) # Now remove all these atoms star_mol = em.GetMol() out_mol = Chem.DeleteSubstructs(star_mol, Chem.MolFromSmarts('[#0]')) out_ans = Chem.MolToSmiles(out_mol) print "ORIG MOL", Chem.MolToSmiles(my_mol) print "FRACT MOL", out_ans return out_ans
def classification_report(data, model, session, sample=False): _, _, _, a, x, _, f, _, _ = data.next_validation_batch() n, e = session.run([model.nodes_gumbel_argmax, model.edges_gumbel_argmax] if sample else [ model.nodes_argmax, model.edges_argmax], feed_dict={model.edges_labels: a, model.nodes_labels: x, model.node_features: f, model.training: False, model.variational: False}) n, e = np.argmax(n, axis=-1), np.argmax(e, axis=-1) y_true = e.flatten() y_pred = a.flatten() target_names = [str(Chem.rdchem.BondType.values[int(e)]) for e in data.bond_decoder_m.values()] print('######## Classification Report ########\n') print(sk_classification_report(y_true, y_pred, labels=list(range(len(target_names))), target_names=target_names)) print('######## Confusion Matrix ########\n') print(confusion_matrix(y_true, y_pred, labels=list(range(len(target_names))))) y_true = n.flatten() y_pred = x.flatten() target_names = [Chem.Atom(e).GetSymbol() for e in data.atom_decoder_m.values()] print('######## Classification Report ########\n') print(sk_classification_report(y_true, y_pred, labels=list(range(len(target_names))), target_names=target_names)) print('\n######## Confusion Matrix ########\n') print(confusion_matrix(y_true, y_pred, labels=list(range(len(target_names)))))
def numpy_to_rdkit(adj, nf, ef, sanitize=False): """ Converts a molecule from numpy to RDKit format. :param adj: binary numpy array of shape (N, N) :param nf: numpy array of shape (N, F) :param ef: numpy array of shape (N, N, S) :param sanitize: whether to sanitize the molecule after conversion :return: an RDKit molecule """ if rdc is None: raise ImportError('`numpy_to_rdkit` requires RDKit.') mol = rdc.RWMol() for nf_ in nf: atomic_num = int(nf_) if atomic_num > 0: mol.AddAtom(rdc.Atom(atomic_num)) for i, j in zip(*np.triu_indices(adj.shape[-1])): if i != j and adj[i, j] == adj[j, i] == 1 and not mol.GetBondBetweenAtoms(int(i), int(j)): bond_type_1 = BOND_MAP[int(ef[i, j, 0])] bond_type_2 = BOND_MAP[int(ef[j, i, 0])] if bond_type_1 == bond_type_2: mol.AddBond(int(i), int(j), bond_type_1) mol = mol.GetMol() if sanitize: rdc.SanitizeMol(mol) return mol
class AtomicProperty(Descriptor): __slots__ = "explicit_hydrogens", "prop", "_initialized" def __str__(self): return "Prop{}".format(self.as_argument) def get_long(self): return getattr(self.prop, "long", self.prop.__name__) @property def as_argument(self): return getattr(self.prop, "short", self.prop.__name__) def parameters(self): return self.explicit_hydrogens, self.prop def __new__(cls, explicit_hydrogens, prop): if isinstance(prop, cls): prop._initialized = True return prop return super(AtomicProperty, cls).__new__(cls) def __init__(self, explicit_hydrogens, prop): if getattr(self, "_initialized", False): return self.explicit_hydrogens = explicit_hydrogens self.prop = getters.get(prop) if self.prop is not None: return if callable(prop): self.prop = prop return raise TypeError("atomic property is not callable: {!r}".format(prop)) def calculate(self): if getattr(self.prop, "gasteiger_charges", False): ComputeGasteigerCharges(self.mol) r = atoms_to_numpy(self.prop, self.mol) nans = np.isnan(r) if np.any(nans): atms = set( np.array([a.GetSymbol() for a in self.mol.GetAtoms()])[nans]) self.fail( ValueError("missing {} for {}".format(self.get_long(), list(atms)))) return r _carbon = Chem.Atom(6) @property def carbon(self): return self.prop(self._carbon)
def to_RDKMol(self): """Return an RDKMol object for the configuration, template, or subset.""" index = {} indices = [] rdk_mol = Chem.RWMol() for atno, _id in zip(self.atoms.atomic_numbers, self.atoms.ids): idx = rdk_mol.AddAtom(Chem.Atom(atno)) index[_id] = idx indices.append(idx) bond_types = { 1: Chem.BondType.SINGLE, 2: Chem.BondType.DOUBLE, 3: Chem.BondType.TRIPLE, } for row in self.bonds.bonds(): rdk_mol.AddBond(index[row["i"]], index[row["j"]], bond_types[row["bondorder"]]) natom = self.atoms.n_atoms conf = Chem.Conformer(natom) for idx, xyz in zip(indices, self.atoms.coordinates): conf.SetAtomPosition(idx, xyz) rdk_mol.AddConformer(conf) Chem.rdmolops.SanitizeMol(rdk_mol) return rdk_mol
def to_rdmol(plams_mol, sanitize=True): """ Translate a PLAMS molecule into an RDKit molecule type """ # Create rdkit molecule e = Chem.EditableMol(Chem.Mol()) for atom in plams_mol.atoms: a = Chem.Atom(atom.atnum) ch = atom.properties.charge if isinstance(ch, int): a.SetFormalCharge(ch) e.AddAtom(a) for bond in plams_mol.bonds: a1 = plams_mol.atoms.index(bond.atom1) a2 = plams_mol.atoms.index(bond.atom2) e.AddBond(a1, a2, Chem.BondType(bond.order)) rdmol = e.GetMol() if sanitize: Chem.SanitizeMol(rdmol) conf = Chem.Conformer() for a in range(len(plams_mol.atoms)): atom = plams_mol.atoms[a] p = Geometry.Point3D(atom._getx(), atom._gety(), atom._getz()) conf.SetAtomPosition(a, p) rdmol.AddConformer(conf) return rdmol
def nx_to_mol(G): mol = Chem.RWMol() atomic_nums = nx.get_node_attributes(G, 'atomic_num') chiral_tags = nx.get_node_attributes(G, 'chiral_tag') formal_charges = nx.get_node_attributes(G, 'formal_charge') node_is_aromatics = nx.get_node_attributes(G, 'is_aromatic') node_hybridizations = nx.get_node_attributes(G, 'hybridization') num_explicit_hss = nx.get_node_attributes(G, 'num_explicit_hs') node_to_idx = {} for node in G.nodes(): a = Chem.Atom(atomic_nums[node]) a.SetChiralTag(chiral_tags[node]) a.SetFormalCharge(formal_charges[node]) a.SetIsAromatic(node_is_aromatics[node]) a.SetHybridization(node_hybridizations[node]) a.SetNumExplicitHs(num_explicit_hss[node]) idx = mol.AddAtom(a) node_to_idx[node] = idx bond_types = nx.get_edge_attributes(G, 'bond_type') for edge in G.edges(): first, second = edge ifirst = node_to_idx[first] isecond = node_to_idx[second] bond_type = bond_types[first, second] mol.AddBond(ifirst, isecond, bond_type) Chem.SanitizeMol(mol) return mol
def set_structure(mol, cmat, atom_list): """Creates single bonds between atoms based on contact matrix. Creates single bonds between all connected atoms in contact matrix. Parameters ---------- mol : rdkit.Chem.RWMol cmat : numpy.ndarray atom_list : list""" for atom in atom_list: mol.AddAtom(Chem.Atom(atom)) for i, j in zip(*np.where(cmat[:, :] == 1)): if mol.GetAtomWithIdx(int(i)).GetSymbol() == 'Li': continue elif mol.GetAtomWithIdx(int(j)).GetSymbol() == 'Li': continue else: pass mol.AddBond(int(i), int(j), Chem.BondType.SINGLE) for at in mol.GetAtoms(): at.SetNoImplicit(True) return
def __init__(self, atom_map1: int, new_atoms_map_nums: List[int], ring_key: str, action_vocab: dict, is_hard: bool = False): super(AddRingAction, self).__init__(atom_map1, -1, action_vocab, is_hard) self.ring_key = ring_key # order new atom map nums so map num of the existing atom is first map_ind = new_atoms_map_nums.index(self.atom_map1) self.new_atoms_map_nums = [ self.atom_map1 ] + new_atoms_map_nums[map_ind + 1:] + new_atoms_map_nums[:map_ind] new_a = Chem.Atom(6) new_a.SetIsAromatic(True) new_a.SetBoolProp('is_edited', True) self.new_atom_features = get_atom_features( new_a, ORDERED_ATOM_OH_KEYS, atom_prop2oh=self.prop2oh['atom']) b_type = Chem.rdchem.BondType.AROMATIC self.new_bond_features = [ self.prop2oh['bond'][key][val] for key, val in (('bond_type', b_type), ('bond_stereo', 0), ('is_edited', 1)) ]
def graph_to_mol(nodes, adjacency, allow_submolecule=False): global MOSES_ATOMIC_NUM_LIST, MOSES_BOND_DECODER if isinstance(nodes, torch.Tensor): nodes = nodes.detach().cpu().numpy() if isinstance(adjacency, torch.Tensor): adjacency = adjacency.detach().cpu().numpy() invalid_atoms = (nodes < 0) | nodes >= MosesDataset.num_node_types() if np.any(invalid_atoms) and not np.all(invalid_atoms): valid_idx = np.where(~invalid_atoms)[0] nodes = nodes[valid_idx] adjacency = adjacency[valid_idx,:][:,valid_idx] if allow_submolecule: nodes, adjacency = find_largest_submolecule(nodes, adjacency) mol = Chem.RWMol() for n in nodes: if n < 0: continue mol.AddAtom(Chem.Atom(int(MOSES_ATOMIC_NUM_LIST[n]))) for i in range(adjacency.shape[0]): for j in range(i+1, adjacency.shape[1]): if adjacency[i,j] == 0: continue mol.AddBond(i, j, MOSES_BOND_DECODER[adjacency[i,j]]) return mol
def assertBondStereoRoundTrips(self, fname): path = os.path.join(RDConfig.RDCodeDir, 'Chem', 'test_data', fname) mol = Chem.MolFromMolFile(path) refSmiles = mol.GetProp("_Name") self.assertTrue(len(refSmiles) > 0) self.assertEqual(Chem.MolToSmiles(mol, isomericSmiles=True), refSmiles) # now test Chem.DetectBondStereoChemistry more directly by constructing the molecule from scratch oldconf = mol.GetConformer(0) newconf = Chem.Conformer(mol.GetNumAtoms()) newmol = Chem.RWMol() for atm in mol.GetAtoms(): ratm = Chem.Atom(atm.GetAtomicNum()) ratm.SetFormalCharge(atm.GetFormalCharge()) newmol.AddAtom(ratm) atomidx = atm.GetIdx() pos = oldconf.GetAtomPosition(atomidx) newconf.SetAtomPosition(atomidx, pos) for bnd in mol.GetBonds(): newmol.AddBond(bnd.GetBeginAtomIdx(), bnd.GetEndAtomIdx(), Chem.BondType(bnd.GetBondType())) newmol.AddConformer(newconf) Chem.SanitizeMol(newmol) Chem.DetectBondStereoChemistry(newmol, newmol.GetConformer()) # these aren't necessary for this specific test case, but are for # a more general conversion routine, so would like to see them # tested eventually # Chem.AssignAtomChiralTagsFromStructure(newmol) # Chem.AssignStereochemistry(newmol) self.assertEqual(Chem.MolToSmiles(newmol, isomericSmiles=True), refSmiles)
def CHToN(m, cn_idx, subst=None): """ arguments: mol object, connection atom index, substituent (optional and not necessary for this function, but when called from Modifications2 it requires subst as an argument) returns: new mol object this function replaces an aromatic atom with index cn_idx with a N atom bound to a substituent """ print('chose CHtoN') mw = Chem.RWMol(m) Chem.Kekulize(mw, clearAromaticFlags=True) #the ring the atom is a part of for ring in mw.GetRingInfo().AtomRings(): if cn_idx in ring: target_ring = ring #ring containing the connection atom #remove the ngbs that are not part of the ring for ngb in mw.GetAtomWithIdx(cn_idx).GetNeighbors(): if ngb.GetIdx() not in target_ring: mw.RemoveAtom(ngb.GetIdx()) #replace C with N mw.ReplaceAtom(cn_idx, Chem.Atom(7)) # mw.GetAtomWithIdx(cn_idx).SetFormalCharge(1) # h=mw.AddAtom(Chem.Atom(1)) # mw.AddBond(h,cn_idx,Chem.BondType.SINGLE) # Chem.SanitizeMol(mw) return mw