def to_graph(smiles, dataset): mol = Chem.MolFromSmiles(smiles) if mol is None: return [], [] # Kekulize it if need_kekulize(mol): rdmolops.Kekulize(mol) if mol is None: return None, None # remove stereo information, such as inward and outward edges Chem.RemoveStereochemistry(mol) edges = [] nodes = [] for bond in mol.GetBonds(): edges.append((bond.GetBeginAtomIdx(), bond_dict[str(bond.GetBondType())], bond.GetEndAtomIdx())) assert bond_dict[str(bond.GetBondType())] != 3 for atom in mol.GetAtoms(): if dataset=='qm9' or dataset=="cep": nodes.append(onehot(dataset_info(dataset)['atom_types'].index(atom.GetSymbol()), len(dataset_info(dataset)['atom_types']))) elif dataset=='zinc': # transform using "<atom_symbol><valence>(<charge>)" notation symbol = atom.GetSymbol() valence = atom.GetTotalValence() charge = atom.GetFormalCharge() atom_str = "%s%i(%i)" % (symbol, valence, charge) if atom_str not in dataset_info(dataset)['atom_types']: print('unrecognized atom type %s' % atom_str) return [], [] nodes.append(onehot(dataset_info(dataset)['atom_types'].index(atom_str), len(dataset_info(dataset)['atom_types']))) return nodes, edges
def _build_drug_graph(self, smiles): """ Builds a molecular graph form a smiles string. Taken from [FIND SOURCE!] """ mol = Chem.MolFromSmiles(smiles) if mol is None: raise ValueError('Molecule construction failed on Inchi %s' % smiles) # Kekulize it if self.need_kekulize(mol): rdmolops.Kekulize(mol) if mol is None: return None, None # remove stereo information, such as inward and outward edges Chem.RemoveStereochemistry(mol) edges = [] nodes = [] for bond in mol.GetBonds(): edges.append((bond.GetBeginAtomIdx(), self.bond_dict[str(bond.GetBondType())], bond.GetEndAtomIdx())) assert self.bond_dict[str(bond.GetBondType())] != 3 for atom in mol.GetAtoms(): # This could probably be spead up.... nodes.append( onehot(self.dataset_info['atom_types'].index(atom.GetSymbol()), len(self.dataset_info['atom_types']))) nodes = torch.tensor(nodes).float() edges = torch.tensor(edges) return nodes, edges, mol
def to_graph(smiles, dataset): mol = Chem.MolFromSmiles(smiles) mol = Chem.AddHs(mol) if mol is None: return [], [] if need_kekulize(mol): rdmolops.Kekulize(mol) if mol is None: return [], [] Chem.RemoveStereochemistry(mol) edges = [] nodes = [] for bond in mol.GetBonds(): edges.append( (bond.GetBeginAtomIdx(), bond_dict[str(bond.GetBondType())], bond.GetEndAtomIdx())) for atom in mol.GetAtoms(): symbol = atom.GetSymbol() valence = atom.GetTotalValence() charge = atom.GetFormalCharge() atom_str = "%s%i(%i)" % (symbol, valence, charge) if atom_str not in dataset_info(dataset)['atom_types']: return [], [] nodes.append( onehot( dataset_info(dataset)['atom_types'].index(atom_str), len(dataset_info(dataset)['atom_types']))) return nodes, edges
def prepare_mol(self, mol: rdchem.Mol) -> Tuple[str, rdchem.Mol]: """Prepare both smiles and mol by standardizing to common rules. This method should be called before `get_input_feats`. Params: ------- mol: rdkit.Chem.rdchem.Mol Molecule of interest. Returns: -------- canonical_smiles: str Canonical SMILES representation of the molecule. mol: rdkit.Chem.rdchem.Mol Modified molecule w/ kekulization and Hs added, if specified. """ canonical_smiles = rdmolfiles.MolToSmiles(mol, canonical=True) mol = rdmolfiles.MolFromSmiles(canonical_smiles) if self.add_Hs: mol = rdmolops.AddHs(mol) if self.kekulize: rdmolops.Kekulize(mol) return canonical_smiles, mol
def to_graph(mol, dataset): if mol is None: return [], [] if need_kekulize(mol): rdmolops.Kekulize(mol) if mol is None: return None, None Chem.RemoveStereochemistry(mol) edges = [] nodes = [] atom_types = get_atom_types(dataset) for bond in mol.GetBonds(): edges.append( (bond.GetBeginAtomIdx(), bond_dict[str(bond.GetBondType())], bond.GetEndAtomIdx())) assert bond_dict[str(bond.GetBondType())] != 3 for atom in mol.GetAtoms(): nodes.append( onehot(atom_types.index(str(atom.GetSymbol())), len(atom_types))) return nodes, edges
if test_mol != None: energy = get_BO_energy(raw_mol) if smiles not in smiles_list and energy_of_reactant - energy < E_cutoff: smiles_list.append(smiles) molecules.append(raw_mol) smiles_list.insert(0, Chem.MolToSmiles(mol)) molecules.insert(0, mol) return smiles_list, molecules if __name__ == "__main__": smiles_list = ['CC', 'C=C', 'C#C'] smiles_list = ['C=C.C=CC=C'] smiles_list = ['C(=O)O'] heterolytic = False E_cutoff = 200 for smiles in smiles_list: mol = Chem.MolFromSmiles(smiles) rdmolops.Kekulize(mol, clearAromaticFlags=True) charge = Chem.GetFormalCharge(mol) mol = Chem.AddHs(mol) elementary_smiles, elementary_mols = take_elementary_step( mol, charge, E_cutoff, heterolytic) print "len(elementary_smiles)", len(elementary_smiles) print elementary_smiles