def get_iupac(molecule): """ Generate IUPAC name Parameters ---------- molecule : `oechem.OEMol` Returns ------- str: iupac name Notes ----- Will only be generated if has openeye license """ if not has_openeye: raise ImportError( "OpenEye is not installed. You can use the canonicalization='rdkit' to use the RDKit backend" "The Conda recipe for cmiles installs rdkit") from openeye import oeiupac if not oeiupac.OEIUPACIsLicensed(): raise ImportError("Must have OEIUPAC license!") return oeiupac.OECreateIUPACName(molecule)
def _create_implicit_solvent_openmm(self, mol): """ Take a list of oemols, and generate openmm systems and positions for each. Parameters ---------- mol : oemol oemol to be turned into system, positions Returns ------- system : simtk.openmm.System openmm system corresponding to molecule positions : np.array, Quantity nm array of atomic positions """ molecule_name = oeiupac.OECreateIUPACName(mol) openmoltools.openeye.enter_temp_directory() _, tripos_mol2_filename = openmoltools.openeye.molecule_to_mol2( mol, tripos_mol2_filename=molecule_name + '.tripos.mol2', conformer=0, residue_name='MOL') gaff_mol2, frcmod = openmoltools.amber.run_antechamber( molecule_name, tripos_mol2_filename) prmtop_file, inpcrd_file = openmoltools.amber.run_tleap( molecule_name, gaff_mol2, frcmod) prmtop = app.AmberPrmtopFile(prmtop_file) crd = app.AmberInpcrdFile(inpcrd_file) system = prmtop.createSystem(implicitSolvent=self.implicit_solvent, constraints=self.constraints, removeCMMotion=False) positions = crd.getPositions(asNumpy=True) return system, positions
def generate_molecule_from_smiles(smiles, name=None): """ Parameters ---------- smiles : str The canonical isomeric SMILES string. name : str, optional, default=None If specified, the molecule title will be set to this; if not, the IUPAC name will be assigned. """ # Generate a molecule from canonical isomeric SMILES. molecule = oechem.OEMol() if not oechem.OEParseSmiles(molecule, smiles): raise ValueError("The supplied SMILES '%s' could not be parsed." % smiles) # Assign aromaticity. oechem.OEAssignAromaticFlags(molecule, oechem.OEAroModelOpenEye) # Add hydrogens. oechem.OEAddExplicitHydrogens(molecule) # Set title. if name is None: # Set title to IUPAC name. name = oeiupac.OECreateIUPACName(molecule) molecule.SetTitle(name) # Check for any missing atom names, if found reassign all of them. if any([atom.GetName() == '' for atom in molecule.GetAtoms()]): oechem.OETriposAtomNames(molecule) return molecule
def normalize_molecule(molecule): """Normalize a copy of the molecule by checking aromaticity, adding explicit hydrogens, and renaming by IUPAC name. Parameters ---------- molecule : OEMol the molecule to be normalized. Returns ------- molcopy : OEMol A (copied) version of the normalized molecule """ molcopy = oechem.OEMol(molecule) # Assign aromaticity. oechem.OEAssignAromaticFlags(molcopy, oechem.OEAroModelOpenEye) # Add hydrogens. oechem.OEAddExplicitHydrogens(molcopy) # Set title to IUPAC name. name = oeiupac.OECreateIUPACName(molcopy) molcopy.SetTitle(name) # Check for any missing atom names, if found reassign all of them. if any([atom.GetName() == '' for atom in molcopy.GetAtoms()]): oechem.OETriposAtomNames(molcopy) return molcopy
def __makeChemCompIdentifierCategory(self, ccId, oeMol): """ loop_ _pdbx_chem_comp_identifier.comp_id _pdbx_chem_comp_identifier.type _pdbx_chem_comp_identifier.program _pdbx_chem_comp_identifier.program_version _pdbx_chem_comp_identifier.identifier ATP "SYSTEMATIC NAME" ACDLabs 10.04 ;adenosine 5'-(tetrahydrogen triphosphate) ; ATP "SYSTEMATIC NAME" "OpenEye OEToolkits" 1.5.0 "[[(2R,3S,4R,5R)-5-(6-aminopurin-9-yl)-..." # """ rowL = [] # aRow = {} aRow["comp_id"] = ccId aRow["type"] = "SYSTEMATIC NAME" aRow["program"] = "OpenEye OEToolkits" aRow["program_version"] = self.__oeVersion style = oeiupac.OEGetIUPACNamStyle("systematic") name = oeiupac.OEToUTF8(oeiupac.OECreateIUPACName(oeMol, style)) aRow["identifier"] = name rowL.append(aRow) aRow = {} aRow["comp_id"] = ccId aRow["type"] = "COMMON" aRow["program"] = "OpenEye OEToolkits" aRow["program_version"] = self.__oeVersion style = oeiupac.OEGetIUPACNamStyle("traditional") name = oeiupac.OEToUTF8(oeiupac.OECreateIUPACName(oeMol, style)) aRow["identifier"] = name rowL.append(aRow) # aRow = {} aRow["comp_id"] = ccId aRow["type"] = "SYNONYM" aRow["program"] = "OpenEye OEToolkits" aRow["program_version"] = self.__oeVersion style = oeiupac.OEGetIUPACNamStyle("acdname") name = oeiupac.OEToUTF8(oeiupac.OECreateIUPACName(oeMol, style)) aRow["identifier"] = name rowL.append(aRow) # return rowL
def __makeChemCompCategory(self, ccId, oeMol, site="RCSB", missingModelXyz=False, skipAnnotations=False): # lt = time.strftime("%Y-%m-%d", time.localtime()) formula = oechem.OEMolecularFormula(oeMol) charge = self.__getFormalCharge(oeMol) fW = oechem.OECalculateMolecularWeight(oeMol) # if skipAnnotations: name = ccId else: style = oeiupac.OEGetIUPACNamStyle("systematic") name = oeiupac.OEToUTF8(oeiupac.OECreateIUPACName(oeMol, style)) # ccRow = {} ccRow["id"] = ccId if name is not None: ccRow["name"] = name else: ccRow["name"] = "?" ccRow["type"] = "NON-POLYMER" ccRow["pdbx_type"] = "?" if formula is not None: ccRow["formula"] = formula else: ccRow["formula"] = "?" ccRow["mon_nstd_parent_comp_id"] = "?" # ccRow["pdbx_synonyms"] = "?" if charge is not None: ccRow["pdbx_formal_charge"] = charge else: ccRow["pdbx_formal_charge"] = "?" ccRow["pdbx_ambiguous_flag"] = "N" ccRow["pdbx_initial_date"] = lt ccRow["pdbx_modified_date"] = lt ccRow["pdbx_release_status"] = "HOLD" ccRow["pdbx_replaced_by"] = "?" ccRow["pdbx_replaces"] = "?" if fW is not None: ccRow["formula_weight"] = "%0.3f" % fW else: ccRow["formula_weight"] = "?" ccRow["one_letter_code"] = "?" tlc = ccId.split("_")[0] ccRow["three_letter_code"] = tlc ccRow["pdbx_model_coordinates_details"] = "?" ccRow["pdbx_ideal_coordinates_details"] = "?" if missingModelXyz: ccRow["pdbx_model_coordinates_missing_flag"] = "Y" else: ccRow["pdbx_model_coordinates_missing_flag"] = "N" ccRow["pdbx_model_coordinates_db_code"] = "?" ccRow["pdbx_processing_site"] = site ccRow["pdbx_subcomponent_list"] = "?" return ccRow
def Mol2Nam(itf): ifs = oechem.oemolistream() if not ifs.open(itf.GetString("-in")): oechem.OEThrow.Fatal("Unable to open '%s' for reading" % itf.GetString("-in")) ofs = oechem.oemolostream() outname = None if itf.HasString("-out"): outname = itf.GetString("-out") if not ofs.open(outname): oechem.OEThrow.Fatal("Unable to open '%s' for reading" % outname) language = oeiupac.OEGetIUPACLanguage(itf.GetString("-language")) charset = oeiupac.OEGetIUPACCharSet(itf.GetString("-encoding")) style = oeiupac.OEGetIUPACNamStyle(itf.GetString("-style")) for mol in ifs.GetOEGraphMols(): name = oeiupac.OECreateIUPACName(mol, style) if language > 0: name = oeiupac.OEToLanguage(name, language) if itf.GetBool("-capitalize"): name = oeiupac.OECapitalizeName(name) if charset == oeiupac.OECharSet_ASCII: name = oeiupac.OEToAscii(name) elif charset == oeiupac.OECharSet_UTF8: name = oeiupac.OEToUTF8(name) elif charset == oeiupac.OECharSet_HTML: name = oeiupac.OEToHTML(name) elif charset == oeiupac.OECharSet_SJIS: name = oeiupac.OEToSJIS(name) elif charset == oeiupac.OECharSet_EUCJP: name = oeiupac.OEToEUCJP(name) if outname: if itf.HasString("-delim"): title = mol.GetTitle() name = title + itf.GetString("-delim") + name if itf.HasString("-tag"): oechem.OESetSDData(mol, itf.GetString("-tag"), name) mol.SetTitle(name) oechem.OEWriteMolecule(ofs, mol) else: print(name)
def _state_transition_to_iupac(self, state_transition): """ Convenience function to convert SMILES to IUPAC names Parameters ---------- state_transition : (str, str) Pair of smiles strings for the state transition Returns ------- state_transition_iupac : [str, str] The pair of molecules in IUPAC names """ state_transition_iupac = [] for state in state_transition: mol = oechem.OEMol() oechem.OESmilesToMol(mol, state) iupac = oeiupac.OECreateIUPACName(mol) state_transition_iupac.append(iupac) return state_transition_iupac
template_script.format(yaml_filename) if __name__ == "__main__": template_script_file_eq = "submit-eq.sh" template_script_file_neq = "submit-neq.sh" substituted_benzene_smilefile = "filtered_database.smi" substituted_benzenes_iupac = [] istream = oechem.oemolistream(substituted_benzene_smilefile) for mol in istream.GetOEMols(): mol_copy = oechem.OEMol(mol) substituted_benzenes_iupac.append(oeiupac.OECreateIUPACName(mol_copy)) with open("rj_neq_template.yaml", "r") as yamlfile: template_yamldict = yaml.load(yamlfile) for pair in itertools.permutations(substituted_benzenes_iupac, 2): if pair[0] == pair[1]: continue new_yaml_dict = create_yaml_file(pair[0], pair[1], copy.deepcopy(template_yamldict)) new_yaml_filename = "{}_{}_rjneq.yaml".format(pair[0], pair[1]) with open(new_yaml_filename, 'w') as yaml_outfile: yaml.dump(new_yaml_dict, yaml_outfile)
def generate_fragments(molecule, generate_visualization=False, strict_stereo=False, combinatorial=True, MAX_ROTORS=2, remove_map=True, json_filename=None): """ This function generates fragments from molecules. The output is a dictionary that maps SMILES of molecules to SMILES for fragments. The default SMILES are generated with openeye.oechem.OEMolToSmiles. These SMILES strings are canonical isomeric SMILES. The dictionary also includes a provenance field which defines how the fragments were generated. Parameters ---------- molecule: OEMol to fragment generate_visualization: bool If true, visualization of the fragments will be written to pdf files. The pdf will be writtten in the directory where this function is run from. combinatorial: bool If true, find all connected fragments from fragments and add all new fragments that have less than MAX_ROTORS MAX_ROTORS: int rotor threshold for combinatorial strict_stereo: bool Note: This applies to the molecule being fragmented. Not the fragments. If True, omega will generate conformation with stereochemistry defined in the SMILES string for charging. remove_map: bool If True, the index tags will be removed. This will remove duplicate fragments. Defualt True json_filename: str filenmae for JSON. If provided, will save the returned dictionary to a JSON file. Default is None Returns ------- fragments: dict mapping of SMILES from the parent molecule to the SMILES of the fragments """ fragments = dict() try: molecules = list(molecule) except TypeError: molecules = [molecule] for molecule in molecules: # normalize molecule molecule = normalize_molecule(molecule, molecule.GetTitle()) if remove_map: # Remove tags from smiles. This is done to make it easier to find duplicate fragments for a in molecule.GetAtoms(): a.SetMapIdx(0) frags = _generate_fragments(molecule, strict_stereo=strict_stereo) if not frags: logger().warning('Skipping {}, SMILES: {}'.format(molecule.GetTitle(), oechem.OECreateSmiString(molecule))) continue charged = frags[0] frags = frags[-1] frag_list = list(frags.values()) if combinatorial: smiles = smiles_with_combined(frag_list, charged, MAX_ROTORS) else: smiles = frag_to_smiles(frag_list, charged) parent_smiles = mol_to_smiles(molecule, isomeric=True, explicit_hydrogen=False, mapped=False) if smiles: fragments[parent_smiles] = list(smiles.keys()) else: # Add molecule where no fragments were found for terminal torsions and / or rings and non rotatable bonds fragments[parent_smiles] = [mol_to_smiles(molecule, isomeric=True, explicit_hydrogen=True, mapped=False)] if generate_visualization: IUPAC = oeiupac.OECreateIUPACName(molecule) name = molecule.GetTitle() if IUPAC == name: name = make_python_identifier(oechem.OEMolToSmiles(molecule))[0] oname = '{}.pdf'.format(name) ToPdf(charged, oname, frags) del charged, frags if json_filename: f = open(json_filename, 'w') j = json.dump(fragments, f, indent=2, sort_keys=True) f.close() return fragments