def ProcessOptions(): """Process and validate command line arguments and options""" MiscUtil.PrintInfo("Processing options...") # Validate options... ValidateOptions() OptionsInfo["CalcRMSD"] = Options["--calcRMSD"] OptionsInfo["UseBestRMSD"] = False if re.match("^BestRMSD$", OptionsInfo["CalcRMSD"], re.I): OptionsInfo["UseBestRMSD"] = True OptionsInfo["MaxIters"] = int(Options["--maxIters"]) OptionsInfo["Mode"] = Options["--mode"] OptionsInfo["RefFile"] = Options["--reffile"] OptionsInfo["ProbeFile"] = Options["--probefile"] # No need for any RDKit specific --outfileParams.... OptionsInfo["InfileParams"] = MiscUtil.ProcessOptionInfileParameters("--infileParams", Options["--infileParams"]) OptionsInfo["Outfile"] = Options["--outfile"] OptionsInfo["Overwrite"] = Options["--overwrite"] OptionsInfo["OutDelim"] = " " if MiscUtil.CheckFileExt(OptionsInfo["Outfile"], "csv"): OptionsInfo["OutDelim"] = "," elif MiscUtil.CheckFileExt(OptionsInfo["Outfile"], "tsv txt"): OptionsInfo["OutDelim"] = "\t" else: MiscUtil.PrintError("The file name specified , %s, for option \"--outfile\" is not valid. Supported file formats: csv tsv txt\n" % (OptionsInfo["Outfile"]))
def MoleculesWriter(FileName, **KeyWordArgs): """Set up a molecule writer. Arguments: FileName (str): Name of a file with complete path. **KeyWordArgs (dictionary) : Parameter name and value pairs for writing and processing molecules. Returns: RDKit object : Molecule writer. Notes: The file extension is used to determine type of the file and set up an appropriate file writer. """ # Set default values for possible arguments... WriterArgs = { "Compute2DCoords": False, "Kekulize": False, "SMILESDelimiter": ' ', "SMILESIsomeric": True, "SMILESTitleLine": True, "SMILESMolName": True } # Set specified values for possible arguments... for Arg in WriterArgs: if Arg in KeyWordArgs: WriterArgs[Arg] = KeyWordArgs[Arg] Writer = None if MiscUtil.CheckFileExt(FileName, "sdf sd"): Writer = Chem.SDWriter(FileName) if WriterArgs["Kekulize"]: Writer.SetKekulize(True) elif MiscUtil.CheckFileExt(FileName, "pdb"): Writer = Chem.PDBWriter(FileName) elif MiscUtil.CheckFileExt(FileName, "smi"): # Text for the name column in the title line. Blank indicates not to include name column # in the output file... NameHeader = 'Name' if WriterArgs["SMILESMolName"] else '' Writer = Chem.SmilesWriter(FileName, delimiter=WriterArgs["SMILESDelimiter"], nameHeader=NameHeader, includeHeader=WriterArgs["SMILESTitleLine"], isomericSmiles=WriterArgs["SMILESIsomeric"], kekuleSmiles=WriterArgs["Kekulize"]) else: MiscUtil.PrintWarning( "RDKitUtil.WriteMolecules: Non supported file type: %s" % FileName) return Writer
def ProcessOptions(): """Process and validate command line arguments and options""" MiscUtil.PrintInfo("Processing options...") # Validate options... ValidateOptions() OptionsInfo["Autocorr2DExclude"] = True if not re.match("^Yes$", Options["--autocorr2DExclude"], re.I): OptionsInfo["Autocorr2DExclude"] = False OptionsInfo["FragmentCount"] = True if not re.match("^Yes$", Options["--fragmentCount"], re.I): OptionsInfo["FragmentCount"] = False OptionsInfo["DescriptorNames"] = Options["--descriptorNames"] OptionsInfo["Mode"] = Options["--mode"] OptionsInfo["Infile"] = Options["--infile"] OptionsInfo["InfileParams"] = MiscUtil.ProcessOptionInfileParameters( "--infileParams", Options["--infileParams"], Options["--infile"]) OptionsInfo["Outfile"] = Options["--outfile"] OptionsInfo["OutfileParams"] = MiscUtil.ProcessOptionOutfileParameters( "--outfileParams", Options["--outfileParams"], Options["--infile"], Options["--outfile"]) OptionsInfo["Overwrite"] = Options["--overwrite"] TextOutFileMode = False TextOutFileDelim = "" if MiscUtil.CheckFileExt(Options["--outfile"], "csv"): TextOutFileMode = True TextOutFileDelim = "," elif MiscUtil.CheckFileExt(Options["--outfile"], "tsv txt"): TextOutFileMode = True TextOutFileDelim = "\t" OptionsInfo["TextOutFileMode"] = TextOutFileMode OptionsInfo["TextOutFileDelim"] = TextOutFileDelim OptionsInfo["MPMode"] = True if re.match("^yes$", Options["--mp"], re.I) else False OptionsInfo["MPParams"] = MiscUtil.ProcessOptionMultiprocessingParameters( "--mpParams", Options["--mpParams"]) OptionsInfo["Precision"] = int(Options["--precision"]) OptionsInfo["SMILESOut"] = False if re.match("^Yes$", Options["--smilesOut"], re.I): OptionsInfo["SMILESOut"] = True
def ProcessOptions(): """Process and validate command line arguments and options""" MiscUtil.PrintInfo("Processing options...") # Validate options... ValidateOptions() OptionsInfo["Addhydrogens"] = True if re.match( "^Yes$", Options["--addHydrogens"], re.I) else False OptionsInfo["Infiles"] = Options["--infiles"] OptionsInfo["InfilesNames"] = Options["--infilesNames"] OptionsInfo["Outfile"] = Options["--outfile"] OptionsInfo["Overwrite"] = Options["--overwrite"] OptionsInfo["OutDelim"] = " " if MiscUtil.CheckFileExt(OptionsInfo["Outfile"], "csv"): OptionsInfo["OutDelim"] = "," elif MiscUtil.CheckFileExt(OptionsInfo["Outfile"], "tsv txt"): OptionsInfo["OutDelim"] = "\t" else: MiscUtil.PrintError( "The file name specified , %s, for option \"--outfile\" is not valid. Supported file formats: csv tsv txt\n" % (OptionsInfo["Outfile"])) OptionsInfo["KeepInorganics"] = True if re.match( "^Yes$", Options["--keepInorganics"], re.I) else False OptionsInfo["KeepLigands"] = True if re.match( "^Yes$", Options["--keepLigands"], re.I) else False OptionsInfo["KeepSolvents"] = True if re.match( "^Yes$", Options["--keepSolvents"], re.I) else False ProcessKeepSelectionOptions() OptionsInfo["Overwrite"] = Options["--overwrite"] OptionsInfo["Quiet"] = 1 if re.match("^Yes$", Options["--quiet"], re.I) else 0 OptionsInfo["Precision"] = int(Options["--precision"]) OptionsInfo["Mode"] = Options["--mode"] ProcessSpecifiedPropertyNames() RetrieveInfilesInfo() OptionsInfo["ChainIDs"] = Options["--chainIDs"] OptionsInfo["AllChains"] = True if re.match("^All$", Options["--chainIDs"], re.I) else False ProcessChainIDs()
def DrawMolecules(): """Draw molecules""" Infile = OptionsInfo["Infile"] Outfile = OptionsInfo["Outfile"] # Read molecules... MiscUtil.PrintInfo("\nReading file %s..." % Infile) ValidMols, MolCount, ValidMolCount = RDKitUtil.ReadAndValidateMolecules( Infile, **OptionsInfo["InfileParams"]) MiscUtil.PrintInfo("Total number of molecules: %d" % MolCount) MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount) MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount)) # Compute 2D coordinates... if OptionsInfo["Compute2DCoords"]: MiscUtil.PrintInfo("\nComputing 2D coordinates...") for Mol in ValidMols: AllChem.Compute2DCoords(Mol) MiscUtil.PrintInfo("Generating image grid...") # Setup atoms lists for highlighting atoms and bonds... AtomLists = SetupAtomListsToHighlight(ValidMols) BondLists = None # Set up legends... MolNames = None if OptionsInfo["ShowMolName"]: MolNames = [] MolCount = 0 for Mol in ValidMols: MolCount += 1 MolName = RDKitUtil.GetMolName(Mol, MolCount) MolNames.append(MolName) # Perform alignment to a common template... PerformAlignment(ValidMols) # Generate appropriate output files... if MiscUtil.CheckFileExt(Outfile, "svg"): GenerateSVGImageFile(ValidMols, MolNames, AtomLists, BondLists) elif MiscUtil.CheckFileExt(Outfile, "html htm"): GenerateHTMLTableFile(ValidMols, MolNames, AtomLists, BondLists) else: GenerateImageFile(ValidMols, MolNames, AtomLists, BondLists)
def ValidateOptions(): """Validate option values""" MiscUtil.ValidateOptionTextValue("-a, --autocorr2DExclude", Options["--autocorr2DExclude"], "yes no") MiscUtil.ValidateOptionTextValue("-f, --fragmentCount", Options["--fragmentCount"], "yes no") MiscUtil.ValidateOptionTextValue("-m, --mode", Options["--mode"], "2D 3D All FragmentCountOnly Specify") if re.match("^Specify$", Options["--mode"], re.I): if re.match("^none$", Options["--descriptorNames"], re.I): MiscUtil.PrintError("The name(s) of molecular descriptors must be specified using \"-d, --descriptorNames\" option during \"Specify\" value of \"-m, --mode\" option.") MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"]) MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "sdf sd mol smi csv tsv txt") if re.match("^3D|All$", Options["--mode"], re.I): if MiscUtil.CheckFileExt(Options["--infile"], "smi"): MiscUtil.PrintError("The input SMILES file, %s, is not valid for \"3D or All\" value of \"-m, --mode\" option.") MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "sdf sd csv tsv txt") MiscUtil.ValidateOptionsOutputFileOverwrite("-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"]) MiscUtil.ValidateOptionsDistinctFileNames("-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"]) MiscUtil.ValidateOptionIntegerValue("-p, --precision", Options["--precision"], {">": 0}) MiscUtil.ValidateOptionTextValue("-s, --smilesOut", Options["--smilesOut"], "yes no")
def GenerateImageFile(ValidMols, MolNames, AtomLists, BondLists): """Generate a non SVG image file.""" Outfile = OptionsInfo["Outfile"] NumOfMolsPerRow = OptionsInfo["NumOfMolsPerRow"] Width = OptionsInfo["MolImageWidth"] Height = OptionsInfo["MolImageHeight"] # Setup drawing options... UpdatedDrawingOptions = DrawingOptions() UpdatedDrawingOptions.atomLabelFontSize = int( OptionsInfo["AtomLabelFontSize"]) UpdatedDrawingOptions.bondLineWidth = float(OptionsInfo["BondLineWidth"]) MolsImage = Draw.MolsToGridImage(ValidMols, molsPerRow=NumOfMolsPerRow, subImgSize=(Width, Height), legends=MolNames, highlightAtomLists=AtomLists, highlightBondLists=BondLists, useSVG=False, kekulize=OptionsInfo["Kekulize"], options=UpdatedDrawingOptions) MiscUtil.PrintInfo("\nGenerating image file %s..." % Outfile) if MiscUtil.CheckFileExt(Outfile, "pdf"): if MolsImage.mode == 'RGBA': MolsImage = MolsImage.convert('RGB') MolsImage.save(Outfile)
def ProcessOptions(): """Process and validate command line arguments and options""" MiscUtil.PrintInfo("Processing options...") # Validate options... ValidateOptions() OptionsInfo["Infile"] = Options["--infile"] FileDir, FileName, FileExt = MiscUtil.ParseFileName(OptionsInfo["Infile"]) OptionsInfo["InfileRoot"] = FileName OptionsInfo["Outfile"] = Options["--outfile"] FileDir, FileName, FileExt = MiscUtil.ParseFileName(OptionsInfo["Outfile"]) OptionsInfo["OutfileRoot"] = FileName OptionsInfo["Overwrite"] = Options["--overwrite"] OptionsInfo["OutDelim"] = " " if MiscUtil.CheckFileExt(OptionsInfo["Outfile"], "csv"): OptionsInfo["OutDelim"] = "," elif MiscUtil.CheckFileExt(OptionsInfo["Outfile"], "tsv txt"): OptionsInfo["OutDelim"] = "\t" else: MiscUtil.PrintError( "The file name specified , %s, for option \"--outfile\" is not valid. Supported file formats: csv tsv txt\n" % (OptionsInfo["Outfile"])) OptionsInfo["OutMode"] = Options["--outMode"] OptionsInfo["MultipleOutFiles"] = True if re.match( "^MultipleFiles$", OptionsInfo["OutMode"], re.I) else False OptionsInfo["OutChainID"] = True if re.match( "^Yes$", Options["--outChainID"], re.I) else False OptionsInfo["OutCategory"] = True if re.match( "^Yes$", Options["--outCategory"], re.I) else False OptionsInfo["Overwrite"] = Options["--overwrite"] OptionsInfo["Precision"] = int(Options["--precision"]) RetrieveInfileInfo() OptionsInfo["ChainIDs"] = Options["--chainIDs"] ProcessChainIDs() SetupCategoryOutfiles()
def ProcessSpecifiedDescriptorNames(): """Process and validate specified decriptor names.""" OptionsInfo["SpecifiedDescriptorNames"] = [] if not re.match("^(2D|3D|All|FragmentCountOnly|Specify)$", OptionsInfo["Mode"], re.I): MiscUtil.PrintError("Mode value, %s, using \"-m, --mode\" option is not a valid value." % OptionsInfo["Mode"]) if re.match("^2D$", OptionsInfo["Mode"], re.I): OptionsInfo["SpecifiedDescriptorNames"] = DescriptorNamesMap["2D"]["Names"] if OptionsInfo["FragmentCount"]: OptionsInfo["SpecifiedDescriptorNames"].extend(DescriptorNamesMap["FragmentCount"]["Names"]) return elif re.match("^3D$", OptionsInfo["Mode"], re.I): OptionsInfo["SpecifiedDescriptorNames"] = DescriptorNamesMap["3D"]["Names"] return elif re.match("^All$", OptionsInfo["Mode"], re.I): OptionsInfo["SpecifiedDescriptorNames"] = DescriptorNamesMap["2D"]["Names"] if OptionsInfo["FragmentCount"]: OptionsInfo["SpecifiedDescriptorNames"].extend(DescriptorNamesMap["FragmentCount"]["Names"]) OptionsInfo["SpecifiedDescriptorNames"].extend(DescriptorNamesMap["3D"]["Names"]) return elif re.match("^FragmentCountOnly$", OptionsInfo["Mode"], re.I): OptionsInfo["SpecifiedDescriptorNames"] = DescriptorNamesMap["FragmentCount"]["Names"] return # Set up a canonical descriptor names map for checking specified names... CanonicalNameMap = {} for Name in DescriptorNamesMap["ComputeFunction"]: CanonicalNameMap[Name.lower()] = Name # Parse and validate specified names... DescriptorNames = re.sub(" ", "", OptionsInfo["DescriptorNames"]) if not DescriptorNames: MiscUtil.PrintError("No descriptor names specified for \"-d, --descriptorNames\" option") SMILESInfile = MiscUtil.CheckFileExt(Options["--infile"], "smi") Canonical3DNameMap = {} if SMILESInfile: for Name in DescriptorNamesMap["3D"]["Names"]: Canonical3DNameMap[Name.lower()] = Name SpecifiedDescriptorNames = [] for Name in DescriptorNames.split(","): CanonicalName = Name.lower() if CanonicalName in CanonicalNameMap: SpecifiedDescriptorNames.append(CanonicalNameMap[CanonicalName]) else: MiscUtil.PrintError("The descriptor name, %s, specified using \"-d, --descriptorNames\" option is not a valid name." % (Name)) if SMILESInfile: if CanonicalName in Canonical3DNameMap: MiscUtil.PrintError("The 3D descriptor name, %s, specified using \"-d, --descriptorNames\" option is not a valid for SMILES input file." % (Name)) if not len(SpecifiedDescriptorNames): MiscUtil.PrintError("No valid descriptor name specified for \"-d, --descriptorNames\" option") OptionsInfo["SpecifiedDescriptorNames"] = SpecifiedDescriptorNames
def ProcessOptions(): """Process and validate command line arguments and options""" MiscUtil.PrintInfo("Processing options...") # Validate options... ValidateOptions() OptionsInfo["AlignMethod"] = Options["--alignMethod"].lower() OptionsInfo["AlignMode"] = Options["--alignMode"] OptionsInfo["Mode"] = Options["--mode"] OptionsInfo["ProbeFiles"] = Options["--probefiles"] OptionsInfo["ProbeFilesNames"] = Options["--probeFilesNames"] OptionsInfo["RefFiles"] = Options["--reffiles"] OptionsInfo["RefFilesNames"] = Options["--refFilesNames"] RetrieveProbeFilesInfo() RetrieveRefFilesInfo() OptionsInfo["Outfile"] = Options["--outfile"] OptionsInfo["OutMatrix"] = True if re.match( "^Yes$", Options["--outMatrix"], re.I) else False OptionsInfo["Overwrite"] = Options["--overwrite"] OptionsInfo["OutDelim"] = " " if MiscUtil.CheckFileExt(OptionsInfo["Outfile"], "csv"): OptionsInfo["OutDelim"] = "," elif MiscUtil.CheckFileExt(OptionsInfo["Outfile"], "tsv txt"): OptionsInfo["OutDelim"] = "\t" else: MiscUtil.PrintError( "The file name specified , %s, for option \"--outfile\" is not valid. Supported file formats: csv tsv txt\n" % (OptionsInfo["Outfile"]))
def ReadMolecules(FileName, **KeyWordArgs): """Read molecules from an input file without performing any validation and creation of molecule objects. Arguments: FileName (str): Name of a file with complete path. **KeyWordArgs (dictionary) : Parameter name and value pairs for reading and processing molecules. Returns: list : List of RDKit molecule objects. Notes: The file extension is used to determine type of the file and set up an appropriate file reader. """ # Set default values for possible arguments... ReaderArgs = {"Sanitize": True, "RemoveHydrogens": True, "StrictParsing": True, "SMILESDelimiter" : ' ', "SMILESColumn": 1, "SMILESNameColumn": 2, "SMILESTitleLine": True } # Set specified values for possible arguments... for Arg in ReaderArgs: if Arg in KeyWordArgs: ReaderArgs[Arg] = KeyWordArgs[Arg] # Modify specific valeus for SMILES... if MiscUtil.CheckFileExt(FileName, "smi csv tsv txt"): Args = ["Sanitize", "SMILESTitleLine"] for Arg in Args: if ReaderArgs[Arg] is True: ReaderArgs[Arg] = 1 else: ReaderArgs[Arg] = 0 Mols = [] if MiscUtil.CheckFileExt(FileName, "sdf sd"): return ReadMoleculesFromSDFile(FileName, ReaderArgs["Sanitize"], ReaderArgs["RemoveHydrogens"], ReaderArgs['StrictParsing']) elif MiscUtil.CheckFileExt(FileName, "mol"): return ReadMoleculesFromMolFile(FileName, ReaderArgs["Sanitize"], ReaderArgs["RemoveHydrogens"], ReaderArgs['StrictParsing']) elif MiscUtil.CheckFileExt(FileName, "mol2"): return ReadMoleculesFromMol2File(FileName, ReaderArgs["Sanitize"], ReaderArgs["RemoveHydrogens"]) elif MiscUtil.CheckFileExt(FileName, "pdb"): return ReadMoleculesFromPDBFile(FileName, ReaderArgs["Sanitize"], ReaderArgs["RemoveHydrogens"]) elif MiscUtil.CheckFileExt(FileName, "smi txt csv tsv"): SMILESColumnIndex = ReaderArgs["SMILESColumn"] - 1 SMILESNameColumnIndex = ReaderArgs["SMILESNameColumn"] - 1 return ReadMoleculesFromSMILESFile(FileName, ReaderArgs["SMILESDelimiter"], SMILESColumnIndex, SMILESNameColumnIndex, ReaderArgs["SMILESTitleLine"], ReaderArgs["Sanitize"]) else: MiscUtil.PrintWarning("RDKitUtil.ReadMolecules: Non supported file type: %s" % FileName) return Mols
def ProcessOptions(): """Process and validate command line arguments and options""" MiscUtil.PrintInfo("Processing options...") # Validate options... ValidateOptions() OptionsInfo["ButinaSimilarityCutoff"] = float(Options["--butinaSimilarityCutoff"]) OptionsInfo["ButinaReordering"] = False if re.match("^Yes$", Options["--butinaReordering"], re.I): OptionsInfo["ButinaReordering"] = True OptionsInfo["Fingerprints"] = Options["--fingerprints"] OptionsInfo["FingerprintsType"] = Options["--fingerprintsType"] OptionsInfo["ClusteringMethod"] = Options["--clusteringMethod"] ProcessClusteringMethodParameter() OptionsInfo["NumClusters"] = int(Options["--numClusters"]) OptionsInfo["Infile"] = Options["--infile"] OptionsInfo["InfileParams"] = MiscUtil.ProcessOptionInfileParameters("--infileParams", Options["--infileParams"], Options["--infile"]) OptionsInfo["Outfile"] = Options["--outfile"] OptionsInfo["OutfileParams"] = MiscUtil.ProcessOptionOutfileParameters("--outfileParams", Options["--outfileParams"], Options["--infile"], Options["--outfile"]) OptionsInfo["Overwrite"] = Options["--overwrite"] OptionsInfo["OutFileMode"] = Options["--outfileMode"] SingleOutFileMode = True if not re.match("^SingleFile$", Options["--outfileMode"], re.I): SingleOutFileMode = False OptionsInfo["SingleOutFileMode"] = SingleOutFileMode FileDir, FileName, FileExt = MiscUtil.ParseFileName(Options["--outfile"]) OptionsInfo["OutfileBasename"] = FileName OptionsInfo["OutfileExt"] = FileExt TextOutFileMode = False TextOutFileDelim = "" TextOutFileTitleLine = True if MiscUtil.CheckFileExt(Options["--outfile"], "csv"): TextOutFileMode = True TextOutFileDelim = "," elif MiscUtil.CheckFileExt(Options["--outfile"], "tsv txt"): TextOutFileMode = True TextOutFileDelim = "\t" elif MiscUtil.CheckFileExt(Options["--outfile"], "smi"): TextOutFileMode = True TextOutFileDelim = OptionsInfo["OutfileParams"]["SMILESDelimiter"] TextOutFileTitleLine = OptionsInfo["OutfileParams"]["SMILESTitleLine"] OptionsInfo["TextOutFileMode"] = TextOutFileMode OptionsInfo["TextOutFileDelim"] = TextOutFileDelim OptionsInfo["TextOutFileTitleLine"] = TextOutFileTitleLine OptionsInfo["SimilarityMetric"] = Options["--similarityMetric"] ProcessSimilarityMetricParameter() OptionsInfo["ParamsFingerprints"] = Options["--paramsFingerprints"] ProcessFingerprintsParameters()
def ProcessOptions(): """Process and validate command line arguments and options""" MiscUtil.PrintInfo("Processing options...") # Validate options... ValidateOptions() OptionsInfo["CoreScaffold"] = Options["--coreScaffold"] OptionsInfo["Infile"] = Options["--infile"] OptionsInfo["InfileParams"] = MiscUtil.ProcessOptionInfileParameters( "--infileParams", Options["--infileParams"], Options["--infile"]) OptionsInfo["Outfile"] = Options["--outfile"] OptionsInfo["OutfileParams"] = MiscUtil.ProcessOptionOutfileParameters( "--outfileParams", Options["--outfileParams"], Options["--infile"], Options["--outfile"]) TextOutFileMode = False TextOutFileDelim = "" if MiscUtil.CheckFileExt(Options["--outfile"], "csv"): TextOutFileMode = True TextOutFileDelim = "," elif MiscUtil.CheckFileExt(Options["--outfile"], "tsv txt"): TextOutFileMode = True TextOutFileDelim = "\t" OptionsInfo["TextOutFileMode"] = TextOutFileMode OptionsInfo["TextOutFileDelim"] = TextOutFileDelim TextOutQuote = False if re.match("^auto$", Options["--quote"], re.I): if MiscUtil.CheckFileExt(Options["--outfile"], "csv"): TextOutQuote = True else: if re.match("^yes$", Options["--quote"], re.I): TextOutQuote = True OptionsInfo["TextOutQuote"] = TextOutQuote OptionsInfo["Overwrite"] = Options["--overwrite"] RemoveUnmatchedMode = False UnmatchedOutfile = None if re.match("^yes$", Options["--removeUnmatched"], re.I): RemoveUnmatchedMode = True FileDir, FileName, FileExt = MiscUtil.ParseFileName( OptionsInfo["Outfile"]) UnmatchedOutfile = "%sUnmatched.%s" % (FileName, FileExt) OptionsInfo["RemoveUnmatchedMode"] = RemoveUnmatchedMode OptionsInfo["UnmatchedOutfile"] = UnmatchedOutfile OptionsInfo["SpecifiedDecompositionParams"] = Options[ "--decompositionParams"] ProcessDecompositionParameters() OptionsInfo["SpecifiedMCSParams"] = Options["--mcsParams"] ProcessMCSParameters() SMARTSOrSMILESCoreScaffold = "" SMARTSOrSMILESCoreScaffoldList = [] if not re.match("^none$", Options["--smartsOrSmilesCoreScaffold"], re.I) or len(Options["--smartsOrSmilesCoreScaffold"]): if re.match("^(BySMARTS|BySMILES)$", Options["--coreScaffold"], re.I): SMARTSOrSMILESCoreScaffold = re.sub( " ", "", Options["--smartsOrSmilesCoreScaffold"]) if not SMARTSOrSMILESCoreScaffold: MiscUtil.PrintError( "A non empty value must be specified for \"-s, --smartsOrSmilesCoreScaffold\" during %s value of \"-c, --coreScaffold\" option " % (Options["--coreScaffold"])) SMARTSOrSMILESCoreScaffoldList = SMARTSOrSMILESCoreScaffold.split( ",") OptionsInfo["SMARTSOrSMILESCoreScaffold"] = SMARTSOrSMILESCoreScaffold OptionsInfo[ "SMARTSOrSMILESCoreScaffoldList"] = SMARTSOrSMILESCoreScaffoldList
def ValidateOptions(): """Validate option values""" MiscUtil.ValidateOptionTextValue("--compute2DCoords", Options["--compute2DCoords"], "yes no") MiscUtil.ValidateOptionTextValue("-m, --mode", Options["--mode"], "RxnByName RxnBySMIRKS") MiscUtil.ValidateOptionTextValue("-p, --prodMolNames", Options["--prodMolNames"], "UseReactants Sequential") if not re.match("^auto$", Options["--rxnNamesFile"], re.I): MiscUtil.ValidateOptionFilePath("--rxnNamesFile", Options["--rxnNamesFile"]) MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "sdf sd smi") MiscUtil.ValidateOptionsOutputFileOverwrite("-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"]) ReactantFiles = re.sub(" ", "", Options["--infiles"]) if not ReactantFiles: MiscUtil.PrintError( "No reactant files specified for \"-i, --infiles\" option") # Validate file extensions... for ReactantFile in ReactantFiles.split(","): MiscUtil.ValidateOptionFilePath("-i, --infiles", ReactantFile) MiscUtil.ValidateOptionFileExt("-i, --infiles", ReactantFile, "sdf sd smi csv tsv txt") MiscUtil.ValidateOptionsDistinctFileNames("-i, --infiles", ReactantFile, "-o, --outfile", Options["--outfile"]) # Match file formats... FirstFile = True FirstFileFormat = "" for ReactantFile in ReactantFiles.split(","): FileFormat = "" if MiscUtil.CheckFileExt(ReactantFile, "sdf sd"): FileFormat = "SD" elif MiscUtil.CheckFileExt(ReactantFile, "smi csv tsv txt"): FileFormat = "SMILES" else: MiscUtil.PrintError( "The file name specified , %s, for option \"-i, --infiles\" is not valid. Supported file formats: sdf sd smi csv tsv txt\n" % ReactantFile) if FirstFile: FirstFile = False FirstFileFormat = FileFormat continue if not re.match("^%s$" % FirstFileFormat, FileFormat, re.IGNORECASE): MiscUtil.PrintError( "All reactant file names - %s - specified using option \"-i, --infiles\" must have the same file format.\n" % ReactantFiles) MiscUtil.ValidateOptionTextValue("--sanitize", Options["--sanitize"], "yes no")