def str_to_mol(identifier, single_bonds=False): if identifier.startswith('InChI'): mol = molecule.Molecule().fromInChI(identifier, backend='rdkit-first') else: mol = molecule.Molecule().fromSMILES(identifier) if single_bonds: return mol.toSingleBonds() else: return mol
def _rdkit_translator(input_object, identifier_type, mol=None): """ Converts between formats using RDKit. If input is a :class:`Molecule`, the identifier_type is used to determine the output type. If the input is a `str`, then the identifier_type is used to identify the input, and the desired output is assumed to be a :class:`Molecule` object. Args: input_object: either molecule or string identifier identifier_type: format of string identifier 'inchi' -> InChI 'inchikey' -> InChI Key 'sma' -> SMARTS 'smi' -> SMILES mol: molecule object for output (optional) """ if identifier_type == 'inchi' and not Chem.inchi.INCHI_AVAILABLE: raise DependencyError("RDKit installed without InChI. Please reinstall to read and write InChI strings.") if isinstance(input_object, str): # We are converting from a string identifier to a molecule if identifier_type == 'inchi': rdkitmol = Chem.inchi.MolFromInchi(input_object, removeHs=False) elif identifier_type == 'sma': rdkitmol = Chem.MolFromSmarts(input_object) elif identifier_type == 'smi': rdkitmol = Chem.MolFromSmiles(input_object) else: raise ValueError('Identifier type {0} is not supported for reading using RDKit.'.format(identifier_type)) if rdkitmol is None: raise ValueError("Could not interpret the identifier {0!r}".format(input_object)) if mol is None: mol = mm.Molecule() output = fromRDKitMol(mol, rdkitmol) elif isinstance(input_object, mm.Molecule): # We are converting from a molecule to a string identifier if identifier_type == 'smi': rdkitmol = toRDKitMol(input_object, sanitize=False) else: rdkitmol = toRDKitMol(input_object, sanitize=True) if identifier_type == 'inchi': output = Chem.inchi.MolToInchi(rdkitmol, options='-SNon') elif identifier_type == 'inchikey': inchi = toInChI(input_object) output = Chem.inchi.InchiToInchiKey(inchi) elif identifier_type == 'sma': output = Chem.MolToSmarts(rdkitmol) elif identifier_type == 'smi': if input_object.isAromatic(): output = Chem.MolToSmiles(rdkitmol) else: output = Chem.MolToSmiles(rdkitmol, kekuleSmiles=True) else: raise ValueError('Identifier type {0} is not supported for writing using RDKit.'.format(identifier_type)) else: raise ValueError('Unexpected input format. Should be a Molecule or a string.') return output
def geo_to_rmg_mol(geo): symbols, coords = geo if len(symbols) == 2 and all(s == 'H' for s in symbols): nums = np.array([atomic_symbol_dict[s] for s in symbols]) mol = molecule.Molecule() mol.fromXYZ(nums, coords) else: mol = geo_to_pybel_mol(geo) mol = pybel_to_rmg(mol) return mol
def _openbabel_translator(input_object, identifier_type, mol=None): """ Converts between formats using OpenBabel. If input is a :class:`Molecule`, the identifier_type is used to determine the output type. If the input is a `str`, then the identifier_type is used to identify the input, and the desired output is assumed to be a :class:`Molecule` object. Args: input_object: either molecule or string identifier identifier_type: format of string identifier 'inchi' -> InChI 'inchikey' -> InChI Key 'smi' -> SMILES mol: molecule object for output (optional) """ ob_conversion = openbabel.OBConversion() if isinstance(input_object, str): # We are converting from a string identifier to a Molecule ob_conversion.SetInFormat(identifier_type) obmol = openbabel.OBMol() ob_conversion.ReadString(obmol, input_object) obmol.AddHydrogens() # In OpenBabel 3+ the function obmol.AssignSpinMultiplicity(True) does nothing. # We could write our own method here and call obatom.SetSpinMultiplicity on # each atom, but instead we will leave them blank for now and fix them # in the from_ob_mol() method. if mol is None: mol = mm.Molecule() output = from_ob_mol(mol, obmol) elif isinstance(input_object, mm.Molecule): # We are converting from a Molecule to a string identifier if identifier_type == 'inchi': ob_conversion.SetOutFormat('inchi') ob_conversion.AddOption('w') elif identifier_type == 'inchikey': ob_conversion.SetOutFormat('inchi') ob_conversion.AddOption('w') ob_conversion.AddOption('K') elif identifier_type == 'smi': ob_conversion.SetOutFormat('can') # turn off isomer and stereochemistry information ob_conversion.AddOption('i') else: raise ValueError('Unexpected identifier type {0}.'.format(identifier_type)) obmol = to_ob_mol(input_object) output = ob_conversion.WriteString(obmol).strip() else: raise ValueError('Unexpected input format. Should be a Molecule or a string.') return output
def to_rmg_mol(self): if self.rmg_mol is not None: return self.rmg_mol import rmgpy.molecule.molecule as rmg_molecule rmg_atoms = [rmg_molecule.Atom(element=atom.symbol, coords=atom.coords) for atom in self] mapping = {atom: rmg_atom for atom, rmg_atom in zip(self.atoms, rmg_atoms)} rmg_bonds = [rmg_molecule.Bond(mapping[connection.atom1], mapping[connection.atom2]) for connection in self.get_all_connections()] rmg_mol = rmg_molecule.Molecule(atoms=rmg_atoms) for bond in rmg_bonds: rmg_mol.add_bond(bond) self.rmg_mol = rmg_mol return rmg_mol
def pybel_to_rmg(pybelmol, addh=False): """ignore charge, multiplicity, and bond orders""" mol = molecule.Molecule() if addh: pybelmol.addh() for pybelatom in pybelmol: num = pybelatom.atomicnum element = elements.getElement(num) atom = molecule.Atom(element=element, coords=np.array(pybelatom.coords)) mol.vertices.append(atom) for obbond in pybel.ob.OBMolBondIter(pybelmol.OBMol): begin_idx = obbond.GetBeginAtomIdx() - 1 end_idx = obbond.GetEndAtomIdx() - 1 bond = molecule.Bond(mol.vertices[begin_idx], mol.vertices[end_idx]) mol.addBond(bond) return mol.toSingleBonds()
def geo_to_mol(geo): """ Use Open Babel for most cases because it's better at recognizing long bonds. Use RMG for hydrogen because Open Babel can't do it for mysterious reasons. """ symbols, coords = geo if len(symbols) == 2 and all(s == 'H' for s in symbols): nums = np.array([atomic_symbol_dict[s] for s in symbols]) mol = molecule.Molecule() mol.fromXYZ(nums, coords) else: xyz = '{}\n\n'.format(len(symbols)) coords = [ '{0} {1[0]: .10f} {1[1]: .10f} {1[2]: .10f}'.format(s, c) for s, c in zip(*geo) ] xyz += '\n'.join(coords) mol = pybel.readstring('xyz', xyz) mol = pybel_to_rmg(mol) return mol