def ProcessSpecifiedPropertyNames(): """Process specified property names. """ PropertyNames = RetrievePropertyNames() OptionsInfo["SpecifiedPropertyNames"] = [] SpecifiedNames = re.sub(" ", "", OptionsInfo["Mode"]) if not SpecifiedNames: MiscUtil.PrintError("No valid property names specifed using \"-m, --mode\" option") if re.match("^All$", SpecifiedNames, re.I): OptionsInfo["SpecifiedPropertyNames"] = PropertyNames return # Validate propery names... CanonicalPropertyNamesMap = {} for Name in PropertyNames: CanonicalPropertyNamesMap[Name.lower()] = Name SpecifiedNamesWords = SpecifiedNames.split(",") for Name in SpecifiedNamesWords: CanonicalName = Name.lower() if CanonicalName not in CanonicalPropertyNamesMap: MiscUtil.PrintError("The property name specified, %s, using \"-m, --mode\" option is not a valid name." % Name) PropertyName = CanonicalPropertyNamesMap[CanonicalName] OptionsInfo["SpecifiedPropertyNames"].append(PropertyName)
def AlignMolecules(): """Align molecules.""" if not re.match("^(OneToOne|FirstToAll)$", OptionsInfo["Mode"], re.I): MiscUtil.PrintError( "Alignment couldn't be performed: Specified mode, %s, is not supported" % OptionsInfo["Mode"]) RefFile = OptionsInfo["RefFile"] ProbeFile = OptionsInfo["ProbeFile"] Outfile = OptionsInfo["Outfile"] # Read reference and probe molecules... OptionsInfo["InfileParams"]["AllowEmptyMols"] = False MiscUtil.PrintInfo("\nProcessing file %s..." % (RefFile)) ValidRefMols, RefMolCount, ValidRefMolCount = RDKitUtil.ReadAndValidateMolecules( RefFile, **OptionsInfo["InfileParams"]) MiscUtil.PrintInfo("Processing file %s..." % (ProbeFile)) ValidProbeMols, ProbeMolCount, ValidProbeMolCount = RDKitUtil.ReadAndValidateMolecules( ProbeFile, **OptionsInfo["InfileParams"]) # Set up a molecule writer... Writer = RDKitUtil.MoleculesWriter(OptionsInfo["Outfile"], **OptionsInfo["OutfileParams"]) if Writer is None: MiscUtil.PrintError("Failed to setup a writer for output fie %s " % OptionsInfo["Outfile"]) MiscUtil.PrintInfo("Generating file %s..." % OptionsInfo["Outfile"]) AlignmentFailedCount = 0 if re.match("^OneToOne$", OptionsInfo["Mode"], re.I): AlignmentFailedCount = PerformOneToOneAlignment( ValidRefMols, ValidProbeMols, Writer) elif re.match("^FirstToAll$", OptionsInfo["Mode"], re.I): AlignmentFailedCount = PerformFirstToAllAlignment( ValidRefMols, ValidProbeMols, Writer) else: MiscUtil.PrintError( "Alignment couldn't be performed: Specified mode, %s, is not supported" % OptionsInfo["Mode"]) if Writer is not None: Writer.close() MiscUtil.PrintInfo( "\nTotal number of molecules: Reference - %d; Probe - %d" % (RefMolCount, ProbeMolCount)) MiscUtil.PrintInfo( "Number of valid molecules: Reference - %d; Probe - %d" % (ValidRefMolCount, ValidProbeMolCount)) MiscUtil.PrintInfo( "Number of probe molecules failed during alignment: %d" % AlignmentFailedCount) MiscUtil.PrintInfo( "Number of ignored molecules: Reference - %d; Probe - %d" % ((RefMolCount - ValidRefMolCount), (ProbeMolCount - ValidProbeMolCount + AlignmentFailedCount)))
def PerformHierarchicalClustering(Mols, MolsFingerprints): """Perform hierarchical clustering.""" try: import numpy except ImportError: MiscUtil.PrintError("Failed to import numpy python module. This is required to cluster molecules using hierarchical clustering methodology.") if OptionsInfo["NumClusters"] > len(Mols): MiscUtil.PrintError("The number of clusters, %d, specified using \"-n, --numClusters\" must be less than total number of valid molecules, %d" % (OptionsInfo["NumClusters"], len(Mols))) MiscUtil.PrintInfo("\nCluster molecules using %s hierarchical clustering methodology and %s similarity metric..." % (OptionsInfo["SpecifiedHierarchicalClusteringMethod"], OptionsInfo["SimilarityMetric"])) NumFingerprints = len(MolsFingerprints) NumClusters = OptionsInfo["NumClusters"] DistanceMatrix = GenerateLowerTriangularDistanceMatrix(MolsFingerprints) ClusterPicker = HierarchicalClusterPicker(OptionsInfo["SpecifiedHierarchicalClusteringMethodID"]) ClusteredMolIndices = ClusterPicker.Cluster(numpy.asarray(DistanceMatrix), NumFingerprints, NumClusters) MolsClusters = [] for Cluster in ClusteredMolIndices: MolsCluster = [Mols[MolIndex] for MolIndex in Cluster] MolsClusters.append(MolsCluster) return MolsClusters
def SetupOutputFiles(): """Open output files.""" if OptionsInfo["MultipleOutFiles"]: MiscUtil.PrintInfo("\nGenerating output files: %s" % (", ".join(OptionsInfo["OutfilesList"]))) else: MiscUtil.PrintInfo("\nGenerating output file %s..." % (OptionsInfo["Outfile"])) # Open combined output file... Outfile = OptionsInfo["Outfile"] OutFH = open(Outfile, "w") if OutFH is None: MiscUtil.PrintError("Couldn't open output file: %s.\n" % (Outfile)) OptionsInfo["OutFH"] = OutFH OptionsInfo["OutfileResCount"] = 0 if not OptionsInfo["MultipleOutFiles"]: return # Open output files for different categories... OptionsInfo["CategoriesOutFHs"] = {} OptionsInfo["CategoriesResCount"] = {} for Category in OptionsInfo["Categories"]: CategoryOutfile = OptionsInfo["CategoriesOutfiles"][Category] CategoryOutFH = open(CategoryOutfile, "w") if CategoryOutfile is None: MiscUtil.PrintError("Couldn't open output file: %s.\n" % (CategoryOutfile)) OptionsInfo["CategoriesOutFHs"][Category] = CategoryOutFH OptionsInfo["CategoriesResCount"][Category] = 0
def SetupMoleculeWriters(CombineMatchResults, Outfile, GroupsOutfiles): """Set up molecule writers for output files.""" Writer = None GroupOutfilesWriters = [] if CombineMatchResults: Writer = RDKitUtil.MoleculesWriter(Outfile, **OptionsInfo["OutfileParams"]) if Writer is None: MiscUtil.PrintError("Failed to setup a writer for output fie %s " % Outfile) MiscUtil.PrintInfo("Generating file %s..." % Outfile) else: for GroupOutfile in GroupsOutfiles: GroupOutfileWriter = RDKitUtil.MoleculesWriter( GroupOutfile, **OptionsInfo["OutfileParams"]) if GroupOutfileWriter is None: MiscUtil.PrintError( "Failed to setup a writer for output fie %s " % Writer) GroupOutfilesWriters.append(GroupOutfileWriter) GroupsCount = len(GroupsOutfiles) if GroupsCount > 4: MiscUtil.PrintInfo( "Generating %d output files with the following file name format: %s<GroupName>.%s" % (GroupsCount, OptionsInfo["OutfileBasename"], OptionsInfo["OutfileExt"])) else: Delmiter = ', ' OutfileNames = Delmiter.join(GroupsOutfiles) MiscUtil.PrintInfo("Generating %d output files: %s..." % (GroupsCount, OutfileNames)) return (Writer, GroupOutfilesWriters)
def ProcessSpecifiedFingerprintsType(): """Process specified fingerprints type.""" FingerprintsName = OptionsInfo["SpecifiedFingerprints"] FingerprintsType = OptionsInfo["FingerprintsType"] SimilarityName = OptionsInfo["SimilarityMetric"] if re.match("^auto$", FingerprintsType, re.I): if re.match("^(MACCS166Keys|PathLength)$", FingerprintsName, re.I): SpecifiedFingerprintsType = "BitVect" else: if re.match("^(Tanimoto|Dice)$", SimilarityName, re.I): SpecifiedFingerprintsType = "IntVect" else: SpecifiedFingerprintsType = "BitVect" elif re.match("^IntVect$", FingerprintsType, re.I): SpecifiedFingerprintsType = "IntVect" if re.match("^(MACCS166Keys|PathLength)$", FingerprintsName, re.I): MiscUtil.PrintError("The fingerprints Type, %s, specified using \"--fingerprintsType\" is not allowed for fingerprints %s." % (FingerprintsType, FingerprintsName)) # RDKit similarity functions, besides Dice and Tanimoto, are not able to handle int bit vectors... if not re.match("^(Tanimoto|Dice)$", SimilarityName, re.I): MiscUtil.PrintError("The fingerprints Type, %s, specified using \"--fingerprintsType\" is not allowed for similarity metric %s.\nSupported similarity metrics: Tanimoto or Dice" % (FingerprintsType, SimilarityName)) elif re.match("^BitVect$", FingerprintsType, re.I): SpecifiedFingerprintsType = "BitVect" else: MiscUtil.PrintError("The fingerprints Type, %s, is not supported." % (FingerprintsType)) OptionsInfo["SpecifiedFingerprintsType"] = SpecifiedFingerprintsType
def SetupChainAndLigandOutfiles(): """Setup output file names for chains and ligands.""" OptionsInfo["SpecifiedChainsAndLigandsInfo"]["ChainOutfiles"] = {} OptionsInfo["SpecifiedChainsAndLigandsInfo"]["LigandOutfiles"] = {} InfileRoot = OptionsInfo["InfileRoot"] LigandFileExt = OptionsInfo["LigandFileExt"] for ChainID in OptionsInfo["SpecifiedChainsAndLigandsInfo"]["ChainIDs"]: ChainOutfileRoot = "%s_Chain%s" % (InfileRoot, ChainID) ChainOutfile = "%s.pdb" % (ChainOutfileRoot) OptionsInfo["SpecifiedChainsAndLigandsInfo"]["ChainOutfiles"][ ChainID] = ChainOutfile if os.path.exists(ChainOutfile): if not OptionsInfo["Overwrite"]: MiscUtil.PrintError( "\nThe chain output file, %s, already exist. Use option \"--ov\" or \"--overwrite\" and try again.\n" % (ChainOutfile)) OptionsInfo["SpecifiedChainsAndLigandsInfo"]["LigandOutfiles"][ ChainID] = {} for LigandID in OptionsInfo["SpecifiedChainsAndLigandsInfo"][ "LigandIDs"][ChainID]: LigandOutfile = "%s_%s.%s" % (ChainOutfileRoot, LigandID, LigandFileExt) OptionsInfo["SpecifiedChainsAndLigandsInfo"]["LigandOutfiles"][ ChainID][LigandID] = LigandOutfile if os.path.exists(LigandOutfile): if not OptionsInfo["Overwrite"]: MiscUtil.PrintError( "\nThe ligand output file, %s, already exist. Use option \"--ov\" or \"--overwrite\" and try again.\n" % (LigandOutfile))
def RetrieveReferenceMolecule(): """Retrieve and validate reference molecule """ RefFile = OptionsInfo["RefFile"] MiscUtil.PrintInfo("\nProcessing file %s..." % (RefFile)) OptionsInfo["InfileParams"]["AllowEmptyMols"] = False ValidRefMols, RefMolCount, ValidRefMolCount = RDKitUtil.ReadAndValidateMolecules(RefFile, **OptionsInfo["InfileParams"]) if ValidRefMolCount == 0: MiscUtil.PrintError("The reference file, %s, contains no valid molecules." % RefFile) elif ValidRefMolCount > 1: MiscUtil.PrintWarning("The reference file, %s, contains, %d, valid molecules. Using first molecule as the reference molecule..." % (RefFile, ValidRefMolCount)) RefMol = ValidRefMols[0] if OptionsInfo["UseScaffoldSMARTS"]: ScaffoldPatternMol = Chem.MolFromSmarts(OptionsInfo["ScaffoldSMARTS"]) if ScaffoldPatternMol is None: MiscUtil.PrintError("Failed to create scaffold pattern molecule. The scaffold SMARTS pattern, %s, specified using \"-s, --scaffold\" option is not valid." % (OptionsInfo["ScaffoldSMARTS"])) if not RefMol.HasSubstructMatch(ScaffoldPatternMol): MiscUtil.PrintError("The scaffold SMARTS pattern, %s, specified using \"-s, --scaffold\" option, is missing in the first valid reference molecule." % (OptionsInfo["ScaffoldSMARTS"])) return RefMol
def SetupMoleculeWriters(): """Setup molecule writers.""" Writer = None WriterFiltered = None if OptionsInfo["CountMode"]: return (Writer, WriterFiltered) Writer = RDKitUtil.MoleculesWriter(OptionsInfo["Outfile"], **OptionsInfo["OutfileParams"]) if Writer is None: MiscUtil.PrintError("Failed to setup a writer for output fie %s " % OptionsInfo["Outfile"]) MiscUtil.PrintInfo("Generating file %s..." % OptionsInfo["Outfile"]) if OptionsInfo["OutfileFilteredMode"]: WriterFiltered = RDKitUtil.MoleculesWriter( OptionsInfo["OutfileFiltered"], **OptionsInfo["OutfileParams"]) if WriterFiltered is None: MiscUtil.PrintError("Failed to setup a writer for output fie %s " % OptionsInfo["OutfileFiltered"]) MiscUtil.PrintInfo("Generating file %s..." % OptionsInfo["OutfileFiltered"]) return (Writer, WriterFiltered)
def CalculateRMSDValues(): """Calculate RMSD between reference and probe files.""" Outfile = OptionsInfo["Outfile"] OutDelim = OptionsInfo["OutDelim"] MiscUtil.PrintInfo("\nGenerating file %s..." % Outfile) OutFH = open(Outfile, "w") if OutFH is None: MiscUtil.PrintError("Couldn't open output file: %s.\n" % (Outfile)) WriteColumnLabels(OutFH, OutDelim) pymol.cmd.reinitialize() if re.match("^OneToOne$", OptionsInfo["Mode"], re.I): CalculateOneToOneRMSDValues(OutFH, OutDelim) elif re.match("^AllToAll$", OptionsInfo["Mode"], re.I): CalculateAllToAllRMSDValues(OutFH, OutDelim) elif re.match("^FirstToAll$", OptionsInfo["Mode"], re.I): CalculateFirstToAllRMSDValues(OutFH, OutDelim) else: MiscUtil.PrintError( "RMSD couldn't be calculated: Specified mode, %s, is not supported" % OptionsInfo["Mode"]) OutFH.close()
def ProcessChEMBLAlertsMode(): """Process specified alerts mode. """ # Retrieve filetrs information... RetrieveChEMBLFiltersInfo() # Process alerts mode... OptionsInfo["SpecifiedFilterTypes"] = OptionsInfo["ChEMBLFiltersMap"][ "FilterTypes"] if re.match("^All$", OptionsInfo["AlertsMode"], re.I): return AlertsMode = re.sub(" ", "", OptionsInfo["AlertsMode"]) if not len(AlertsMode): MiscUtil.PrintError( "The alerts mode specified using \"-a, --alertsMode\" option are empty." ) CanonicalFilterTypesMap = {} for FilterType in OptionsInfo["ChEMBLFiltersMap"]["FilterTypes"]: CanonicalFilterTypesMap[FilterType.lower()] = FilterType SpecifiedFilterTypes = [] for FilterType in AlertsMode.split(","): CanonicalFilterType = FilterType.lower() if not CanonicalFilterType in CanonicalFilterTypesMap: MiscUtil.PrintError( "The altert mode, %s, specified using \"-a, --alertsMode\" is not valid. Supported alert modes: %s" % (FilterType, ", ".join( OptionsInfo["ChEMBLFiltersMap"]["FilterTypes"]))) SpecifiedFilterTypes.append( CanonicalFilterTypesMap[CanonicalFilterType]) OptionsInfo["SpecifiedFilterTypes"] = SpecifiedFilterTypes
def ValidateOptions(): """Validate option values""" MiscUtil.ValidateOptionTextValue("-a, --autocorr2DExclude", Options["--autocorr2DExclude"], "yes no") MiscUtil.ValidateOptionTextValue("-f, --fragmentCount", Options["--fragmentCount"], "yes no") MiscUtil.ValidateOptionTextValue("-m, --mode", Options["--mode"], "2D 3D All FragmentCountOnly Specify") if re.match("^Specify$", Options["--mode"], re.I): if re.match("^none$", Options["--descriptorNames"], re.I): MiscUtil.PrintError("The name(s) of molecular descriptors must be specified using \"-d, --descriptorNames\" option during \"Specify\" value of \"-m, --mode\" option.") MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"]) MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "sdf sd mol smi csv tsv txt") if re.match("^3D|All$", Options["--mode"], re.I): if MiscUtil.CheckFileExt(Options["--infile"], "smi"): MiscUtil.PrintError("The input SMILES file, %s, is not valid for \"3D or All\" value of \"-m, --mode\" option.") MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "sdf sd csv tsv txt") MiscUtil.ValidateOptionsOutputFileOverwrite("-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"]) MiscUtil.ValidateOptionsDistinctFileNames("-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"]) MiscUtil.ValidateOptionIntegerValue("-p, --precision", Options["--precision"], {">": 0}) MiscUtil.ValidateOptionTextValue("-s, --smilesOut", Options["--smilesOut"], "yes no")
def ProcessSpecifiedFingerprintsParameters(): """Process specified fingerprints parameters.""" if re.match("^auto$", OptionsInfo["ParamsFingerprints"], re.I): # Nothing to process... return SpecifiedFingerprintsName = OptionsInfo["SpecifiedFingerprints"] # Parse specified fingerprints parameters... ParamsFingerprints = re.sub(" ", "", OptionsInfo["ParamsFingerprints"]) if not ParamsFingerprints: MiscUtil.PrintError( "No valid parameter name and value pairs specified using \"-p, --paramsFingerprints\" option corrresponding to fingerprints %s." % (SpecifiedFingerprintsName)) ParamsFingerprintsWords = ParamsFingerprints.split(",") if len(ParamsFingerprintsWords) % 2: MiscUtil.PrintError( "The number of comma delimited paramater names and values, %d, specified using \"-p, --paramsFingerprints\" option must be an even number." % (len(ParamsFingerprintsWords))) # Setup canonical parameter names for specified fingerprints... ValidParamNames = [] CanonicalParamNamesMap = {} for ParamName in sorted( OptionsInfo["FingerprintsParams"][SpecifiedFingerprintsName]): ValidParamNames.append(ParamName) CanonicalParamNamesMap[ParamName.lower()] = ParamName # Validate and set paramater names and value... for Index in range(0, len(ParamsFingerprintsWords), 2): Name = ParamsFingerprintsWords[Index] Value = ParamsFingerprintsWords[Index + 1] CanonicalName = Name.lower() if not CanonicalName in CanonicalParamNamesMap: MiscUtil.PrintError( "The parameter name, %s, specified using \"-p, --paramsFingerprints\" option for fingerprints, %s, is not a valid name. Supported parameter names: %s" % (Name, SpecifiedFingerprintsName, " ".join(ValidParamNames))) ParamName = CanonicalParamNamesMap[CanonicalName] if re.match("^UseChirality$", ParamName, re.I): if not re.match("^(Yes|No|True|False)$", Value, re.I): MiscUtil.PrintError( "The parameter value, %s, specified using \"-p, --paramsFingerprints\" option for fingerprints, %s, is not a valid value. Supported values: Yes No True False" % (Value, SpecifiedFingerprintsName)) ParamValue = False if re.match("^(Yes|True)$", Value, re.I): ParamValue = True else: ParamValue = int(Value) if ParamValue <= 0: MiscUtil.PrintError( "The parameter value, %s, specified using \"-p, --paramsFingerprints\" option for fingerprints, %s, is not a valid value. Supported values: > 0" % (Value, SpecifiedFingerprintsName)) # Set value... OptionsInfo["FingerprintsParams"][SpecifiedFingerprintsName][ ParamName] = ParamValue
def ProcessSpecifiedDescriptorNames(): """Process and validate specified decriptor names.""" OptionsInfo["SpecifiedDescriptorNames"] = [] if not re.match("^(2D|3D|All|FragmentCountOnly|Specify)$", OptionsInfo["Mode"], re.I): MiscUtil.PrintError("Mode value, %s, using \"-m, --mode\" option is not a valid value." % OptionsInfo["Mode"]) if re.match("^2D$", OptionsInfo["Mode"], re.I): OptionsInfo["SpecifiedDescriptorNames"] = DescriptorNamesMap["2D"]["Names"] if OptionsInfo["FragmentCount"]: OptionsInfo["SpecifiedDescriptorNames"].extend(DescriptorNamesMap["FragmentCount"]["Names"]) return elif re.match("^3D$", OptionsInfo["Mode"], re.I): OptionsInfo["SpecifiedDescriptorNames"] = DescriptorNamesMap["3D"]["Names"] return elif re.match("^All$", OptionsInfo["Mode"], re.I): OptionsInfo["SpecifiedDescriptorNames"] = DescriptorNamesMap["2D"]["Names"] if OptionsInfo["FragmentCount"]: OptionsInfo["SpecifiedDescriptorNames"].extend(DescriptorNamesMap["FragmentCount"]["Names"]) OptionsInfo["SpecifiedDescriptorNames"].extend(DescriptorNamesMap["3D"]["Names"]) return elif re.match("^FragmentCountOnly$", OptionsInfo["Mode"], re.I): OptionsInfo["SpecifiedDescriptorNames"] = DescriptorNamesMap["FragmentCount"]["Names"] return # Set up a canonical descriptor names map for checking specified names... CanonicalNameMap = {} for Name in DescriptorNamesMap["ComputeFunction"]: CanonicalNameMap[Name.lower()] = Name # Parse and validate specified names... DescriptorNames = re.sub(" ", "", OptionsInfo["DescriptorNames"]) if not DescriptorNames: MiscUtil.PrintError("No descriptor names specified for \"-d, --descriptorNames\" option") SMILESInfile = MiscUtil.CheckFileExt(Options["--infile"], "smi") Canonical3DNameMap = {} if SMILESInfile: for Name in DescriptorNamesMap["3D"]["Names"]: Canonical3DNameMap[Name.lower()] = Name SpecifiedDescriptorNames = [] for Name in DescriptorNames.split(","): CanonicalName = Name.lower() if CanonicalName in CanonicalNameMap: SpecifiedDescriptorNames.append(CanonicalNameMap[CanonicalName]) else: MiscUtil.PrintError("The descriptor name, %s, specified using \"-d, --descriptorNames\" option is not a valid name." % (Name)) if SMILESInfile: if CanonicalName in Canonical3DNameMap: MiscUtil.PrintError("The 3D descriptor name, %s, specified using \"-d, --descriptorNames\" option is not a valid for SMILES input file." % (Name)) if not len(SpecifiedDescriptorNames): MiscUtil.PrintError("No valid descriptor name specified for \"-d, --descriptorNames\" option") OptionsInfo["SpecifiedDescriptorNames"] = SpecifiedDescriptorNames
def SetupMoleculeWriters(ClustersCount): """Set up molecule writers for SD and text files.""" Writer = None ClustersOutfilesWriters = [] TextOutFileMode = OptionsInfo["TextOutFileMode"] TextOutFileDelim = OptionsInfo["TextOutFileDelim"] TextOutFileTitleLine = OptionsInfo["TextOutFileTitleLine"] if OptionsInfo["SingleOutFileMode"]: Outfile = OptionsInfo["Outfile"] if TextOutFileMode: Writer = open(Outfile, "w") else: Writer = RDKitUtil.MoleculesWriter(Outfile, **OptionsInfo["OutfileParams"]) if Writer is None: MiscUtil.PrintError("Failed to setup a writer for output fie %s " % Outfile) if TextOutFileMode: if TextOutFileTitleLine: WriteTextFileHeaderLine(Writer, TextOutFileDelim) MiscUtil.PrintInfo("Generating file %s..." % Outfile) else: for ClusterIndex in range(0, ClustersCount): Outfile = OptionsInfo["ClustersOutfiles"][ClusterIndex] if TextOutFileMode: ClusterWriter = open(Outfile, "w") else: ClusterWriter = RDKitUtil.MoleculesWriter( Outfile, **OptionsInfo["OutfileParams"]) if ClusterWriter is None: MiscUtil.PrintError( "Failed to setup a writer for output fie %s " % Outfile) if TextOutFileMode: if TextOutFileTitleLine: WriteTextFileHeaderLine(ClusterWriter, TextOutFileDelim) ClustersOutfilesWriters.append(ClusterWriter) if ClustersCount > 4: MiscUtil.PrintInfo( "Generating %d output files with the following file name format: %s_Cluster<Num>.%s" % (ClustersCount, OptionsInfo["OutfileBasename"], OptionsInfo["OutfileExt"])) else: Delmiter = ',' OutfileNames = Delmiter.join(OptionsInfo["ClustersOutfiles"]) MiscUtil.PrintInfo("Generating %d output files: %s..." % (ClustersCount, OutfileNames)) return (Writer, ClustersOutfilesWriters)
def CompareMoleculeShapes(): """Compare shape of molecules.""" if not re.match("^(OneToOne|AllToAll|FirstToAll)$", OptionsInfo["Mode"], re.I): MiscUtil.PrintError("Shape comparison couldn't be performed: Specified mode, %s, is not supported" % OptionsInfo["Mode"]) if not re.match("^(Open3A|CrippenOpen3A)$", OptionsInfo["Alignment"], re.I): MiscUtil.PrintError("Shape couldn't be performed: Specified alignment mode, %s, is not supported" % OptionsInfo["Alignment"]) RefFile = OptionsInfo["RefFile"] ProbeFile = OptionsInfo["ProbeFile"] Outfile = OptionsInfo["Outfile"] OutDelim = OptionsInfo["OutDelim"] # Read reference and probe molecules... OptionsInfo["InfileParams"]["AllowEmptyMols"] = False MiscUtil.PrintInfo("\nProcessing file %s..." % (RefFile)) ValidRefMols, RefMolCount, ValidRefMolCount = RDKitUtil.ReadAndValidateMolecules(RefFile, **OptionsInfo["InfileParams"]) MiscUtil.PrintInfo("Processing file %s..." % (ProbeFile)) ValidProbeMols, ProbeMolCount, ValidProbeMolCount = RDKitUtil.ReadAndValidateMolecules(ProbeFile, **OptionsInfo["InfileParams"]) # Set up output file... MiscUtil.PrintInfo("Generating file %s...\n" % Outfile) OutFH = open(Outfile, "w") if OutFH is None: MiscUtil.PrintError("Couldn't open output file: %s.\n" % (Outfile)) if OptionsInfo["UseCrippenOpen3A"]: Line = "RefMolID%sProbeMolID%sCrippenOpen3AScore" % (OutDelim, OutDelim) else: Line = "RefMolID%sProbeMolID%sOpen3AScore" % (OutDelim, OutDelim) if OptionsInfo["CalcTanimotoDistance"]: Line = "%s%sTanimotoDistance" % (Line, OutDelim) if OptionsInfo["CalcProtrudeDistance"]: Line = "%s%sProtrudeDistance" % (Line, OutDelim) OutFH.write("%s\n" % Line) if re.match("^OneToOne$", OptionsInfo["Mode"], re.I): PerformOneToOneShapeComparison(ValidRefMols, ValidProbeMols, OutFH, OutDelim) elif re.match("^AllToAll$", OptionsInfo["Mode"], re.I): PerformAllToAllShapeComparison(ValidRefMols, ValidProbeMols, OutFH, OutDelim) elif re.match("^FirstToAll$", OptionsInfo["Mode"], re.I): PerformFirstToAllShapeComparison(ValidRefMols, ValidProbeMols, OutFH, OutDelim) else: MiscUtil.PrintError("Shape comaprison couldn't be performed: Specified mode, %s, is not supported" % OptionsInfo["Mode"]) OutFH.close() MiscUtil.PrintInfo("\nTotal number of molecules: Reference - %d; Probe - %d" % (RefMolCount, ProbeMolCount)) MiscUtil.PrintInfo("Number of valid molecules: Reference - %d; Probe - %d" % (ValidRefMolCount, ValidProbeMolCount)) MiscUtil.PrintInfo("Number of ignored molecules: Reference - %d; Probe - %d" % ((RefMolCount - ValidRefMolCount), (ProbeMolCount - ValidProbeMolCount)))
def SetupCoreScaffoldsByMCS(Mols): """Setup core scaffold molecule using MCS.""" MiscUtil.PrintInfo("\nSetting up core scaffold using MCS...") MCSParams = OptionsInfo["MCSParams"] CoreMols = [] MCSResultObject = rdFMCS.FindMCS( Mols, maximizeBonds=MCSParams["MaximizeBonds"], threshold=MCSParams["Threshold"], timeout=MCSParams["TimeOut"], verbose=MCSParams["Verbose"], matchValences=MCSParams["MatchValences"], ringMatchesRingOnly=MCSParams["RingMatchesRingOnly"], completeRingsOnly=MCSParams["CompleteRingsOnly"], matchChiralTag=MCSParams["MatchChiralTag"], atomCompare=MCSParams["AtomCompare"], bondCompare=MCSParams["BondCompare"], seedSmarts=MCSParams["SeedSMARTS"]) if MCSResultObject.canceled: MiscUtil.PrintError( "MCS failed to identify a core scaffold. Specify a different set of parameters using \"-m, --mcsParams\" option and try again." ) CoreNumAtoms = MCSResultObject.numAtoms CoreNumBonds = MCSResultObject.numBonds SMARTSCore = MCSResultObject.smartsString if not len(SMARTSCore): MiscUtil.PrintError( "MCS failed to identify a core scaffold. Specify a different set of parameters using \"-m, --mcsParams\" option and try again." ) MiscUtil.PrintInfo( "SMARTS core scaffold: %s\nNumber of atoms in core scaffold: %s\nNumber of bonds in core scaffold: %s" % (SMARTSCore, CoreNumAtoms, CoreNumBonds)) if CoreNumAtoms < MCSParams["MinNumAtoms"]: MiscUtil.PrintError( "Number of atoms, %d, in core scaffold identified by MCS is less than, %d, as specified by \"minNumAtoms\" parameter in \"-m, --mcsParams\" option." % (CoreNumAtoms, MCSParams["MinNumAtoms"])) if CoreNumBonds < MCSParams["MinNumBonds"]: MiscUtil.PrintError( "Number of bonds, %d, in core scaffold identified by MCS is less than, %d, as specified by \"minNumBonds\" parameter in \"-m, --mcsParams\" option." % (CoreNumBonds, MCSParams["MinNumBonds"])) CoreMol = Chem.MolFromSmarts(SMARTSCore) CoreMols.append(CoreMol) return CoreMols
def ProcessOptions(): """Process and validate command line arguments and options""" MiscUtil.PrintInfo("Processing options...") # Validate options... ValidateOptions() OptionsInfo["Infile"] = Options["--infile"] OptionsInfo["InfileParams"] = MiscUtil.ProcessOptionInfileParameters("--infileParams", Options["--infileParams"], Options["--infile"]) OptionsInfo["Outfile"] = Options["--outfile"] OptionsInfo["OutfileParams"] = MiscUtil.ProcessOptionOutfileParameters("--outfileParams", Options["--outfileParams"], Options["--infile"], Options["--outfile"]) OptionsInfo["Overwrite"] = Options["--overwrite"] OptionsInfo["CountMode"] = False if re.match("^count$", Options["--mode"], re.I): OptionsInfo["CountMode"] = True OptionsInfo["MPMode"] = True if re.match("^yes$", Options["--mp"], re.I) else False OptionsInfo["MPParams"] = MiscUtil.ProcessOptionMultiprocessingParameters("--mpParams", Options["--mpParams"]) SaltsByComponentsMode = False SaltsBySMARTSFileMode = False SaltsBySMARTSMode = False if re.match("^ByComponent$", Options["--saltsMode"], re.I): SaltsByComponentsMode = True elif re.match("^BySMARTSFile$", Options["--saltsMode"], re.I): SaltsBySMARTSFileMode = False elif re.match("^BySMARTS$", Options["--saltsMode"], re.I): SaltsBySMARTSMode = True else: MiscUtil.PrintError("The salts mode specified, %s, using \"--saltsMode\" option is not valid." % Options["--saltsMode"]) OptionsInfo["SaltsByComponentsMode"] = SaltsByComponentsMode OptionsInfo["SaltsBySMARTSFileMode"] = SaltsBySMARTSFileMode OptionsInfo["SaltsBySMARTSMode"] = SaltsBySMARTSMode SaltsFile = None if re.match("^BySMARTSFile$", Options["--saltsMode"], re.I): if not re.match("^auto$", Options["--saltsFile"], re.I): SaltsFile = Options["--saltsFile"] OptionsInfo["SaltsFile"] = SaltsFile SaltsSMARTS = None if re.match("^BySMARTS$", Options["--saltsMode"], re.I): if not Options["--saltsSMARTS"]: MiscUtil.PrintError("No salts SMARTS pattern specified using \"--saltsSMARTS\" option during \"BySMARTS\" value of \"-s, --saltsMode\" option") SaltsSMARTS = Options["--saltsSMARTS"].strip(" ") if not len(SaltsSMARTS): MiscUtil.PrintError("Empty SMARTS pattern specified using \"--saltsSMARTS\" option during \"BySMARTS\" value of \"-s, --saltsMode\" option") if re.search(" ", SaltsSMARTS): SaltsSMARTS = re.sub('[ ]+', '\n', SaltsSMARTS) OptionsInfo["SaltsSMARTS"] = SaltsSMARTS
def ValidateOptions(): """Validate option values""" MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"]) MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "sdf sd mol smi csv tsv txt") MiscUtil.ValidateOptionsOutputFileOverwrite("-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"]) MiscUtil.ValidateOptionsDistinctFileNames("-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"]) if not re.match("^None$", Options["--alignmentSMARTS"], re.I): PatternMol = Chem.MolFromSmarts(Options["--alignmentSMARTS"]) if PatternMol is None: MiscUtil.PrintError("The value specified, %s, using option \"--alignmentSMARTS\" is not a valid SMARTS: Failed to create pattern molecule" % Options["--alignmentSMARTS"]) MiscUtil.ValidateOptionIntegerValue("--atomLabelFontSize", Options["--atomLabelFontSize"], {">": 0}) MiscUtil.ValidateOptionFloatValue("-b, --bondLineWidth", Options["--bondLineWidth"], {">": 0.0}) MiscUtil.ValidateOptionTextValue("--compute2DCoords", Options["--compute2DCoords"], "yes no auto") MiscUtil.ValidateOptionTextValue("--counterCol", Options["--counterCol"], "yes no") MiscUtil.ValidateOptionTextValue("--colVisibility", Options["--colVisibility"], "yes no") MiscUtil.ValidateOptionTextValue("--f, -fontBold", Options["--fontBold"], "yes no") if not re.match("^None$", Options["--highlightSMARTS"], re.I): PatternMol = Chem.MolFromSmarts(Options["--highlightSMARTS"]) if PatternMol is None: MiscUtil.PrintError("The value specified, %s, using option \"--highlightSMARTS\" is not a valid SMARTS: Failed to create pattern molecule" % Options["--highlightSMARTS"]) MiscUtil.ValidateOptionTextValue("--kekulize", Options["--kekulize"], "yes no") MiscUtil.ValidateOptionTextValue("-k, --keysNavigation", Options["--keysNavigation"], "yes no") MiscUtil.ValidateOptionNumberValues("-m, --molImageSize", Options["--molImageSize"], 2, ",", "integer", {">": 0}) MiscUtil.ValidateOptionTextValue("--molImageEncoded", Options["--molImageEncoded"], "yes no") MiscUtil.ValidateOptionIntegerValue("--numOfMolsPerRow", Options["--numOfMolsPerRow"], {">": 0}) MiscUtil.ValidateOptionTextValue("-p, --paging", Options["--paging"], "yes no") MiscUtil.ValidateOptionIntegerValue("--pageLength", Options["--pageLength"], {">": 0}) MiscUtil.ValidateOptionTextValue("--popover", Options["--popover"], "yes no") MiscUtil.ValidateOptionIntegerValue("--popoverDataCount", Options["--popoverDataCount"], {">": 0}) MiscUtil.ValidateOptionIntegerValue("--popoverTextWidth", Options["--popoverTextWidth"], {">": 0}) MiscUtil.ValidateOptionTextValue("--showMolName", Options["--showMolName"], "yes no") MiscUtil.ValidateOptionTextValue("--scrollX", Options["--scrollX"], "yes no") MiscUtil.ValidateOptionTextValue("--scrollY", Options["--scrollY"], "yes no") if not re.search("vh$", Options["--scrollYSize"], re.I): MiscUtil.ValidateOptionIntegerValue("--scrollYSize", Options["--scrollYSize"], {">": 0}) MiscUtil.ValidateOptionTextValue("--tableFooter", Options["--tableFooter"], "yes no") MiscUtil.ValidateOptionTextValue("--tableHeader", Options["--tableHeader"], "yes no")
def RetrievePAINSPatterns(PAINSFilterMode): """Retrieve PAINS patterns for specified PAINS mode""" MayaChemToolsDataDir = MiscUtil.GetMayaChemToolsLibDataPath() PAINSFiltersFilePath = os.path.join(MayaChemToolsDataDir, "PAINSFilters.csv") MiscUtil.PrintInfo( "\nRetrieving PAINS SMARTS patterns for PAINS filter type, %s, from file %s" % (PAINSFilterMode, PAINSFiltersFilePath)) if not os.path.exists(PAINSFiltersFilePath): MiscUtil.PrintError("The PAINS filters file, %s, doesn't exist.\n" % (PAINSFiltersFilePath)) FilterFile = open(PAINSFiltersFilePath, "r") if FilterFile is None: MiscUtil.PrintError("Couldn't open PAINS filter file: %s.\n" % (PAINSFiltersFilePath)) # Collect all PAINS filter lines... HeaderLine = True FiltersLines = [] for Line in FilterFile: Line = Line.rstrip() # Ignore comments... if re.match("^#", Line, re.I): continue # Ignore header line... if HeaderLine: HeaderLine = False continue FiltersLines.append(Line) # Process PAINS filter lines using csv reader... SMARTSPatterns = [] FiltersReader = csv.reader(FiltersLines, delimiter=',', quotechar='"') for LineWords in FiltersReader: FilterType = LineWords[0] ID = LineWords[1] SMARTS = LineWords[2] if re.match("^All$", PAINSFilterMode, re.I) or FilterType.lower() == PAINSFilterMode.lower(): SMARTSPatterns.append(SMARTS) FilterFile.close() MiscUtil.PrintInfo("Total number of PAINS SMARTS patterns: %d" % (len(SMARTSPatterns))) return SMARTSPatterns
def ProcessSpecifiedFunctionalGroups(): """Process and validate specified functional groups""" OptionsInfo["SpecifiedFunctionalGroups"] = [] OptionsInfo["SpecifiedFunctionalGroupsNegateMatch"] = [] if re.match("^All$", OptionsInfo["FunctionalGroups"], re.I): OptionsInfo["SpecifiedFunctionalGroups"] = FunctionalGroupsMap['Names'] OptionsInfo["SpecifiedFunctionalGroupsNegateMatch"] = [False] * len( OptionsInfo["SpecifiedFunctionalGroups"]) return # Set up a map of valid group names for checking specified group names... CanonicalGroupNameMap = {} for GroupName in FunctionalGroupsMap['Names']: CanonicalGroupNameMap[GroupName.lower()] = GroupName # Parse and validate specified names... GroupNames = re.sub(" ", "", OptionsInfo["FunctionalGroups"]) if not GroupNames: MiscUtil.PrintError( "No functional group name specified for \"-f, --functionalGroups\" option" ) SpecifiedFunctionalGroups = [] SpecifiedNegateMatchStatus = [] for GroupName in GroupNames.split(","): CanonicalGroupName = GroupName.lower() NegateMatchStatus = False if re.match("^!", CanonicalGroupName, re.I): NegateMatchStatus = True CanonicalGroupName = re.sub("^!", "", CanonicalGroupName) if CanonicalGroupName in CanonicalGroupNameMap: SpecifiedFunctionalGroups.append( CanonicalGroupNameMap[CanonicalGroupName]) SpecifiedNegateMatchStatus.append(NegateMatchStatus) else: MiscUtil.PrintWarning( "The functional group name, %s, specified using \"-f, --functionalGroups\" option is not a valid name." % (GroupName)) if not len(SpecifiedFunctionalGroups): MiscUtil.PrintError( "No valid functional group names specified for \"-f, --functionalGroups\" option" ) OptionsInfo["SpecifiedFunctionalGroups"] = SpecifiedFunctionalGroups OptionsInfo[ "SpecifiedFunctionalGroupsNegateMatch"] = SpecifiedNegateMatchStatus
def ValidateOptions(): """Validate option values""" MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"]) MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "sdf sd smi txt csv tsv") MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "sdf sd smi") if re.match("^filter$", Options["--mode"], re.I): MiscUtil.ValidateOptionsOutputFileOverwrite("-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"]) MiscUtil.ValidateOptionsDistinctFileNames("-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"]) MiscUtil.ValidateOptionTextValue("--outfileFiltered", Options["--outfileFiltered"], "yes no") MiscUtil.ValidateOptionTextValue("-m, --mode", Options["--mode"], "filter count") if re.match("^filter$", Options["--mode"], re.I): if not Options["--outfile"]: MiscUtil.PrintError( "The outfile must be specified using \"-o, --outfile\" during \"filter\" value of \"-m, --mode\" option" ) MiscUtil.ValidateOptionTextValue("--mp", Options["--mp"], "yes no") MiscUtil.ValidateOptionTextValue("-n, --negate", Options["--negate"], "yes no")
def RetrieveRefFileInfo(): """Retrieve information for ref file.""" RefFileInfo = {} RefFile = OptionsInfo["RefFileName"] FileDir, FileName, FileExt = MiscUtil.ParseFileName(RefFile) RefFileRoot = FileName if re.match("^FirstInputFile$", OptionsInfo["AlignRefFile"], re.I): ChainIDs = OptionsInfo["InfilesInfo"]["ChainIDs"][0] else: MiscUtil.PrintInfo( "\nRetrieving chains information for alignment reference file %s..." % RefFile) ChainIDs = RetrieveChainIDs(RefFile, RefFileRoot) if not len(ChainIDs): if re.match("^FirstChain$", OptionsInfo["AlignMode"], re.I): MiscUtil.PrintError( "The align mode, %s, can't be used for aligning chains: No non-empty chain IDs found in input file." % (OptionsInfo["AlignMode"])) RefFileInfo["RefFileName"] = RefFile RefFileInfo["RefFileRoot"] = RefFileRoot RefFileInfo["PyMOLObjectName"] = "AlignRef_%s" % RefFileRoot RefFileInfo["ChainIDs"] = ChainIDs OptionsInfo["RefFileInfo"] = RefFileInfo
def AlignInputObject(FileIndex): """Align input object to reference object.""" RefName = OptionsInfo["RefFileInfo"]["PyMOLObjectName"] FitName = OptionsInfo["InfilesInfo"]["PyMOLObjectNames"][FileIndex] MiscUtil.PrintInfo("\nAligning %s to %s..." % (FitName, RefName)) if re.match("^FirstChain$", OptionsInfo["AlignMode"], re.I): RefFirstChainID = OptionsInfo["RefFileInfo"]["ChainIDs"][0] RefSelection = "(%s and chain %s)" % (RefName, RefFirstChainID) FitFirstChainID = RetrieveFirstChainID(FileIndex) FitSelection = "(%s and chain %s)" % (FitName, FitFirstChainID) else: RefSelection = RefName FitSelection = FitName if re.match("^align$", OptionsInfo["AlignMethod"], re.I): pymol.cmd.align(FitSelection, RefSelection) elif re.match("^cealign$", OptionsInfo["AlignMethod"], re.I): pymol.cmd.cealign(RefSelection, FitSelection) elif re.match("^super$", OptionsInfo["AlignMethod"], re.I): pymol.cmd.super(FitSelection, RefSelection) else: MiscUtil.PrintError("Invalid alignment method: %s" % OptionsInfo["AlignMethod"])
def GetInterfaceChainsAndResiduesInfo(MolName1, ChainIDs1, MolName2, ChainIDs2, Method, Cutoff): """Get interface chains and residues info for chains using a specified methodology.""" InterfaceChainsResiduesInfo1 = None InterfaceChainsResiduesInfo2 = None ChainNames1 = ",".join(ChainIDs1) ChainNames2 = ",".join(ChainIDs2) if re.match("^BySASAChange$", Method, re.I): InterfaceChainsResiduesInfo1, InterfaceChainsResiduesInfo2 = PyMOLUtil.GetInterfaceChainsResiduesBySASAChange( MolName1, ChainNames1, MolName2, ChainNames2, Cutoff) elif re.match("^ByHeavyAtomsDistance$", Method, re.I): InterfaceChainsResiduesInfo1, InterfaceChainsResiduesInfo2 = PyMOLUtil.GetnterfaceChainsResiduesByHeavyAtomsDistance( MolName1, ChainNames1, MolName2, ChainNames2, Cutoff) elif re.match("^ByCAlphaAtomsDistance$", Method, re.I): InterfaceChainsResiduesInfo1, InterfaceChainsResiduesInfo2 = PyMOLUtil.GetInterfaceChainsResiduesByCAlphaAtomsDistance( MolName1, ChainNames1, MolName2, ChainNames2, Cutoff) else: MiscUtil.PrintError( "Failed to retrieve interface residues information: Method %s is not valid..." % Method) return InterfaceChainsResiduesInfo1, InterfaceChainsResiduesInfo2
def ValidateOptions(): """Validate option values""" MiscUtil.ValidateOptionTextValue("--alignMethod", Options["--alignMethod"], "align cealign super") MiscUtil.ValidateOptionTextValue("--alignMode", Options["--alignMode"], "FirstChain Complex") # Expand infiles to handle presence of multiple input files... InfileNames = MiscUtil.ExpandFileNames(Options["--infiles"], ",") if len(InfileNames) < 2: MiscUtil.PrintError( "Number of input files specified for \"-i, --infiles\" option, %d, must be greater than 2..." % (len(InfileNames))) # Validate file extensions... for Infile in InfileNames: MiscUtil.ValidateOptionFilePath("-i, --infiles", Infile) MiscUtil.ValidateOptionFileExt("-i, --infiles", Infile, "pdb cif") Options["--infileNames"] = InfileNames if not re.match("^FirstInputFile$", Options["--alignRefFile"], re.I): AlignRefFile = Options["--alignRefFile"] MiscUtil.ValidateOptionFilePath("--alignRefFile", AlignRefFile) MiscUtil.ValidateOptionFileExt("--alignRefFile", AlignRefFile, "pdb cif")
def ProcessClusteringMethodParameter(): """Process specified clustering method parameter.""" OptionsInfo["SpecifiedHierarchicalClusteringMethod"] = "" OptionsInfo["SpecifiedHierarchicalClusteringMethodID"] = "" if re.match("^Butina$", OptionsInfo["ClusteringMethod"], re.I): # Nothing to process... return # Setup a canonical cluster method name map.. ClusteringMethodInfoMap = {} CanonicalClusteringMethodNameMap = {} for Name in sorted(rdSimDivPickers.ClusterMethod.names): NameID = rdSimDivPickers.ClusterMethod.names[Name] ClusteringMethodInfoMap[Name] = NameID CanonicalName = Name.lower() CanonicalClusteringMethodNameMap[CanonicalName] = Name CanonicalName = OptionsInfo["ClusteringMethod"].lower() if not CanonicalName in CanonicalClusteringMethodNameMap: MiscUtil.PrintError("The clustering method, %s, specified using \"-c, --clusteringMethod\" option is not a valid name." % (OptionsInfo["ClusteringMethod"])) SpecifiedHierarchicalClusteringMethodName = CanonicalClusteringMethodNameMap[CanonicalName] OptionsInfo["SpecifiedHierarchicalClusteringMethod"] = SpecifiedHierarchicalClusteringMethodName OptionsInfo["SpecifiedHierarchicalClusteringMethodID"] = ClusteringMethodInfoMap[SpecifiedHierarchicalClusteringMethodName]
def GetEnergy(Mol, ConfID = None): "Calculate energy." Status = True Energy = None if ConfID is None: ConfID = -1 if OptionsInfo["UseUFF"]: UFFMoleculeForcefield = AllChem.UFFGetMoleculeForceField(Mol, confId = ConfID) if UFFMoleculeForcefield is None: Status = False else: Energy = UFFMoleculeForcefield.CalcEnergy() elif OptionsInfo["UseMMFF"]: MMFFMoleculeProperties = AllChem.MMFFGetMoleculeProperties(Mol, mmffVariant = OptionsInfo["MMFFVariant"]) MMFFMoleculeForcefield = AllChem.MMFFGetMoleculeForceField(Mol, MMFFMoleculeProperties, confId = ConfID) if MMFFMoleculeForcefield is None: Status = False else: Energy = MMFFMoleculeForcefield.CalcEnergy() else: MiscUtil.PrintError("Couldn't retrieve conformer energy: Specified forcefield, %s, is not supported" % OptionsInfo["ForceField"]) return (Status, Energy)
def PerformConstrainedMinimization(): """Perform constrained minimization.""" # Read and validate reference molecule... RefMol = RetrieveReferenceMolecule() # Setup a molecule reader for input file... MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["Infile"]) OptionsInfo["InfileParams"]["AllowEmptyMols"] = True Mols = RDKitUtil.ReadMolecules(OptionsInfo["Infile"], **OptionsInfo["InfileParams"]) # Set up a molecule writer... Writer = RDKitUtil.MoleculesWriter(OptionsInfo["Outfile"], **OptionsInfo["OutfileParams"]) if Writer is None: MiscUtil.PrintError("Failed to setup a writer for output fie %s " % OptionsInfo["Outfile"]) MiscUtil.PrintInfo("Generating file %s..." % OptionsInfo["Outfile"]) MolCount, ValidMolCount, CoreScaffoldMissingCount, MinimizationFailedCount = ProcessMolecules(RefMol, Mols, Writer) if Writer is not None: Writer.close() MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount) MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount) MiscUtil.PrintInfo("Number of molecules with missing core scaffold: %d" % CoreScaffoldMissingCount) MiscUtil.PrintInfo("Number of molecules failed during conformation generation or minimization: %d" % MinimizationFailedCount) MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount + CoreScaffoldMissingCount + MinimizationFailedCount))
def SetupCoreScaffoldsBySMARTSOrSMILES(): """Setup core scaffold molecules(s) using specified SMARTS or SMILES.""" BySMARTS = True if re.match("^BySMARTS$", OptionsInfo["CoreScaffold"], re.I) else False CoreScaffoldList = OptionsInfo["SMARTSOrSMILESCoreScaffoldList"] if BySMARTS: MiscUtil.PrintInfo( "\nSetting up core scaffold(s) using SMARTS...\nSMARTS core scaffold(s): %s" % " ".join(CoreScaffoldList)) else: MiscUtil.PrintInfo( "\nSetting up core scaffold(s) using SMILES...\nSMILES core scaffold(s): %s" % " ".join(CoreScaffoldList)) CoreMols = [] for Core in CoreScaffoldList: if BySMARTS: CoreMol = Chem.MolFromSmarts(Core) else: CoreMol = Chem.MolFromSmiles(Core) if CoreMol is None: MiscUtil.PrintError( "Failed to generate mol for core scaffold: %s" % (Core)) CoreMols.append(CoreMol) return CoreMols