def ProcessOptions():
    """Process and validate command line arguments and options"""
    
    MiscUtil.PrintInfo("Processing options...")
    
    # Validate options...
    ValidateOptions()
    
    OptionsInfo["CalcRMSD"] = Options["--calcRMSD"]
    OptionsInfo["UseBestRMSD"] = False
    if re.match("^BestRMSD$", OptionsInfo["CalcRMSD"], re.I):
        OptionsInfo["UseBestRMSD"] = True
    
    OptionsInfo["MaxIters"] = int(Options["--maxIters"])
    
    OptionsInfo["Mode"] = Options["--mode"]
    
    OptionsInfo["RefFile"] = Options["--reffile"]
    OptionsInfo["ProbeFile"] = Options["--probefile"]

    # No need for any RDKit specific --outfileParams....
    OptionsInfo["InfileParams"] = MiscUtil.ProcessOptionInfileParameters("--infileParams", Options["--infileParams"])
    
    OptionsInfo["Outfile"] = Options["--outfile"]
    
    OptionsInfo["Overwrite"] = Options["--overwrite"]
    
    OptionsInfo["OutDelim"] = " "
    if MiscUtil.CheckFileExt(OptionsInfo["Outfile"], "csv"):
        OptionsInfo["OutDelim"] = ","
    elif MiscUtil.CheckFileExt(OptionsInfo["Outfile"], "tsv txt"):
        OptionsInfo["OutDelim"] = "\t"
    else:
        MiscUtil.PrintError("The file name specified , %s, for option \"--outfile\" is not valid. Supported file formats: csv tsv txt\n" % (OptionsInfo["Outfile"]))
Beispiel #2
0
def MoleculesWriter(FileName, **KeyWordArgs):
    """Set up a molecule writer.
    
    Arguments:
        FileName (str): Name of a file with complete path.
        **KeyWordArgs (dictionary) : Parameter name and value pairs for writing and
            processing molecules.

    Returns:
        RDKit object : Molecule writer.

    Notes:
        The file extension is used to determine type of the file and set up an appropriate
        file writer.

    """

    # Set default values for possible arguments...
    WriterArgs = {
        "Compute2DCoords": False,
        "Kekulize": False,
        "SMILESDelimiter": ' ',
        "SMILESIsomeric": True,
        "SMILESTitleLine": True,
        "SMILESMolName": True
    }

    # Set specified values for possible arguments...
    for Arg in WriterArgs:
        if Arg in KeyWordArgs:
            WriterArgs[Arg] = KeyWordArgs[Arg]

    Writer = None
    if MiscUtil.CheckFileExt(FileName, "sdf sd"):
        Writer = Chem.SDWriter(FileName)
        if WriterArgs["Kekulize"]:
            Writer.SetKekulize(True)
    elif MiscUtil.CheckFileExt(FileName, "pdb"):
        Writer = Chem.PDBWriter(FileName)
    elif MiscUtil.CheckFileExt(FileName, "smi"):
        # Text for the name column in the title line. Blank indicates not to include name column
        # in the output file...
        NameHeader = 'Name' if WriterArgs["SMILESMolName"] else ''
        Writer = Chem.SmilesWriter(FileName,
                                   delimiter=WriterArgs["SMILESDelimiter"],
                                   nameHeader=NameHeader,
                                   includeHeader=WriterArgs["SMILESTitleLine"],
                                   isomericSmiles=WriterArgs["SMILESIsomeric"],
                                   kekuleSmiles=WriterArgs["Kekulize"])
    else:
        MiscUtil.PrintWarning(
            "RDKitUtil.WriteMolecules: Non supported file type: %s" % FileName)

    return Writer
def ProcessOptions():
    """Process and validate command line arguments and options"""

    MiscUtil.PrintInfo("Processing options...")

    # Validate options...
    ValidateOptions()

    OptionsInfo["Autocorr2DExclude"] = True
    if not re.match("^Yes$", Options["--autocorr2DExclude"], re.I):
        OptionsInfo["Autocorr2DExclude"] = False

    OptionsInfo["FragmentCount"] = True
    if not re.match("^Yes$", Options["--fragmentCount"], re.I):
        OptionsInfo["FragmentCount"] = False

    OptionsInfo["DescriptorNames"] = Options["--descriptorNames"]
    OptionsInfo["Mode"] = Options["--mode"]

    OptionsInfo["Infile"] = Options["--infile"]
    OptionsInfo["InfileParams"] = MiscUtil.ProcessOptionInfileParameters(
        "--infileParams", Options["--infileParams"], Options["--infile"])

    OptionsInfo["Outfile"] = Options["--outfile"]
    OptionsInfo["OutfileParams"] = MiscUtil.ProcessOptionOutfileParameters(
        "--outfileParams", Options["--outfileParams"], Options["--infile"],
        Options["--outfile"])

    OptionsInfo["Overwrite"] = Options["--overwrite"]

    TextOutFileMode = False
    TextOutFileDelim = ""
    if MiscUtil.CheckFileExt(Options["--outfile"], "csv"):
        TextOutFileMode = True
        TextOutFileDelim = ","
    elif MiscUtil.CheckFileExt(Options["--outfile"], "tsv txt"):
        TextOutFileMode = True
        TextOutFileDelim = "\t"
    OptionsInfo["TextOutFileMode"] = TextOutFileMode
    OptionsInfo["TextOutFileDelim"] = TextOutFileDelim

    OptionsInfo["MPMode"] = True if re.match("^yes$", Options["--mp"],
                                             re.I) else False
    OptionsInfo["MPParams"] = MiscUtil.ProcessOptionMultiprocessingParameters(
        "--mpParams", Options["--mpParams"])

    OptionsInfo["Precision"] = int(Options["--precision"])

    OptionsInfo["SMILESOut"] = False
    if re.match("^Yes$", Options["--smilesOut"], re.I):
        OptionsInfo["SMILESOut"] = True
Beispiel #4
0
def ProcessOptions():
    """Process and validate command line arguments and options"""

    MiscUtil.PrintInfo("Processing options...")

    # Validate options...
    ValidateOptions()

    OptionsInfo["Addhydrogens"] = True if re.match(
        "^Yes$", Options["--addHydrogens"], re.I) else False

    OptionsInfo["Infiles"] = Options["--infiles"]
    OptionsInfo["InfilesNames"] = Options["--infilesNames"]

    OptionsInfo["Outfile"] = Options["--outfile"]
    OptionsInfo["Overwrite"] = Options["--overwrite"]

    OptionsInfo["OutDelim"] = " "
    if MiscUtil.CheckFileExt(OptionsInfo["Outfile"], "csv"):
        OptionsInfo["OutDelim"] = ","
    elif MiscUtil.CheckFileExt(OptionsInfo["Outfile"], "tsv txt"):
        OptionsInfo["OutDelim"] = "\t"
    else:
        MiscUtil.PrintError(
            "The file name specified , %s, for option \"--outfile\" is not valid. Supported file formats: csv tsv txt\n"
            % (OptionsInfo["Outfile"]))

    OptionsInfo["KeepInorganics"] = True if re.match(
        "^Yes$", Options["--keepInorganics"], re.I) else False
    OptionsInfo["KeepLigands"] = True if re.match(
        "^Yes$", Options["--keepLigands"], re.I) else False
    OptionsInfo["KeepSolvents"] = True if re.match(
        "^Yes$", Options["--keepSolvents"], re.I) else False
    ProcessKeepSelectionOptions()

    OptionsInfo["Overwrite"] = Options["--overwrite"]
    OptionsInfo["Quiet"] = 1 if re.match("^Yes$", Options["--quiet"],
                                         re.I) else 0

    OptionsInfo["Precision"] = int(Options["--precision"])

    OptionsInfo["Mode"] = Options["--mode"]
    ProcessSpecifiedPropertyNames()

    RetrieveInfilesInfo()
    OptionsInfo["ChainIDs"] = Options["--chainIDs"]
    OptionsInfo["AllChains"] = True if re.match("^All$", Options["--chainIDs"],
                                                re.I) else False
    ProcessChainIDs()
def DrawMolecules():
    """Draw molecules"""

    Infile = OptionsInfo["Infile"]
    Outfile = OptionsInfo["Outfile"]

    # Read molecules...
    MiscUtil.PrintInfo("\nReading file %s..." % Infile)

    ValidMols, MolCount, ValidMolCount = RDKitUtil.ReadAndValidateMolecules(
        Infile, **OptionsInfo["InfileParams"])

    MiscUtil.PrintInfo("Total number of molecules: %d" % MolCount)
    MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount)
    MiscUtil.PrintInfo("Number of ignored molecules: %d" %
                       (MolCount - ValidMolCount))

    # Compute 2D coordinates...
    if OptionsInfo["Compute2DCoords"]:
        MiscUtil.PrintInfo("\nComputing 2D coordinates...")
        for Mol in ValidMols:
            AllChem.Compute2DCoords(Mol)

    MiscUtil.PrintInfo("Generating image grid...")

    # Setup atoms lists for highlighting atoms and bonds...
    AtomLists = SetupAtomListsToHighlight(ValidMols)
    BondLists = None

    # Set up legends...
    MolNames = None
    if OptionsInfo["ShowMolName"]:
        MolNames = []
        MolCount = 0
        for Mol in ValidMols:
            MolCount += 1
            MolName = RDKitUtil.GetMolName(Mol, MolCount)
            MolNames.append(MolName)

    # Perform alignment to a common template...
    PerformAlignment(ValidMols)

    # Generate appropriate output files...
    if MiscUtil.CheckFileExt(Outfile, "svg"):
        GenerateSVGImageFile(ValidMols, MolNames, AtomLists, BondLists)
    elif MiscUtil.CheckFileExt(Outfile, "html htm"):
        GenerateHTMLTableFile(ValidMols, MolNames, AtomLists, BondLists)
    else:
        GenerateImageFile(ValidMols, MolNames, AtomLists, BondLists)
def ValidateOptions():
    """Validate option values"""

    MiscUtil.ValidateOptionTextValue("-a, --autocorr2DExclude", Options["--autocorr2DExclude"], "yes no")
    MiscUtil.ValidateOptionTextValue("-f, --fragmentCount", Options["--fragmentCount"], "yes no")
    
    MiscUtil.ValidateOptionTextValue("-m, --mode", Options["--mode"], "2D 3D All FragmentCountOnly Specify")
    
    if re.match("^Specify$", Options["--mode"], re.I):
        if re.match("^none$", Options["--descriptorNames"], re.I):
            MiscUtil.PrintError("The name(s) of molecular descriptors must be specified using \"-d, --descriptorNames\" option during \"Specify\" value of \"-m, --mode\" option.")
    
    MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"])
    MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "sdf sd mol smi csv tsv txt")
    
    if re.match("^3D|All$", Options["--mode"], re.I):
        if MiscUtil.CheckFileExt(Options["--infile"], "smi"):
            MiscUtil.PrintError("The input SMILES file, %s, is not valid for  \"3D or All\" value of \"-m, --mode\" option.")
    
    MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "sdf sd csv tsv txt")
    MiscUtil.ValidateOptionsOutputFileOverwrite("-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"])
    MiscUtil.ValidateOptionsDistinctFileNames("-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"])
    
    MiscUtil.ValidateOptionIntegerValue("-p, --precision", Options["--precision"], {">": 0})
    MiscUtil.ValidateOptionTextValue("-s, --smilesOut", Options["--smilesOut"], "yes no")
def GenerateImageFile(ValidMols, MolNames, AtomLists, BondLists):
    """Generate a non SVG image file."""

    Outfile = OptionsInfo["Outfile"]

    NumOfMolsPerRow = OptionsInfo["NumOfMolsPerRow"]
    Width = OptionsInfo["MolImageWidth"]
    Height = OptionsInfo["MolImageHeight"]

    # Setup drawing options...
    UpdatedDrawingOptions = DrawingOptions()
    UpdatedDrawingOptions.atomLabelFontSize = int(
        OptionsInfo["AtomLabelFontSize"])
    UpdatedDrawingOptions.bondLineWidth = float(OptionsInfo["BondLineWidth"])

    MolsImage = Draw.MolsToGridImage(ValidMols,
                                     molsPerRow=NumOfMolsPerRow,
                                     subImgSize=(Width, Height),
                                     legends=MolNames,
                                     highlightAtomLists=AtomLists,
                                     highlightBondLists=BondLists,
                                     useSVG=False,
                                     kekulize=OptionsInfo["Kekulize"],
                                     options=UpdatedDrawingOptions)

    MiscUtil.PrintInfo("\nGenerating image file %s..." % Outfile)

    if MiscUtil.CheckFileExt(Outfile, "pdf"):
        if MolsImage.mode == 'RGBA':
            MolsImage = MolsImage.convert('RGB')

    MolsImage.save(Outfile)
def ProcessOptions():
    """Process and validate command line arguments and options"""

    MiscUtil.PrintInfo("Processing options...")

    # Validate options...
    ValidateOptions()

    OptionsInfo["Infile"] = Options["--infile"]
    FileDir, FileName, FileExt = MiscUtil.ParseFileName(OptionsInfo["Infile"])
    OptionsInfo["InfileRoot"] = FileName

    OptionsInfo["Outfile"] = Options["--outfile"]
    FileDir, FileName, FileExt = MiscUtil.ParseFileName(OptionsInfo["Outfile"])
    OptionsInfo["OutfileRoot"] = FileName

    OptionsInfo["Overwrite"] = Options["--overwrite"]

    OptionsInfo["OutDelim"] = " "
    if MiscUtil.CheckFileExt(OptionsInfo["Outfile"], "csv"):
        OptionsInfo["OutDelim"] = ","
    elif MiscUtil.CheckFileExt(OptionsInfo["Outfile"], "tsv txt"):
        OptionsInfo["OutDelim"] = "\t"
    else:
        MiscUtil.PrintError(
            "The file name specified , %s, for option \"--outfile\" is not valid. Supported file formats: csv tsv txt\n"
            % (OptionsInfo["Outfile"]))

    OptionsInfo["OutMode"] = Options["--outMode"]
    OptionsInfo["MultipleOutFiles"] = True if re.match(
        "^MultipleFiles$", OptionsInfo["OutMode"], re.I) else False

    OptionsInfo["OutChainID"] = True if re.match(
        "^Yes$", Options["--outChainID"], re.I) else False
    OptionsInfo["OutCategory"] = True if re.match(
        "^Yes$", Options["--outCategory"], re.I) else False

    OptionsInfo["Overwrite"] = Options["--overwrite"]
    OptionsInfo["Precision"] = int(Options["--precision"])

    RetrieveInfileInfo()

    OptionsInfo["ChainIDs"] = Options["--chainIDs"]
    ProcessChainIDs()

    SetupCategoryOutfiles()
def ProcessSpecifiedDescriptorNames():
    """Process and validate specified decriptor names."""

    OptionsInfo["SpecifiedDescriptorNames"] = []

    if not re.match("^(2D|3D|All|FragmentCountOnly|Specify)$", OptionsInfo["Mode"], re.I):
        MiscUtil.PrintError("Mode value, %s, using \"-m, --mode\" option is not a valid value." % OptionsInfo["Mode"])
    
    if re.match("^2D$", OptionsInfo["Mode"], re.I):
        OptionsInfo["SpecifiedDescriptorNames"] = DescriptorNamesMap["2D"]["Names"]
        if OptionsInfo["FragmentCount"]:
            OptionsInfo["SpecifiedDescriptorNames"].extend(DescriptorNamesMap["FragmentCount"]["Names"])
        return
    elif re.match("^3D$", OptionsInfo["Mode"], re.I):
        OptionsInfo["SpecifiedDescriptorNames"] = DescriptorNamesMap["3D"]["Names"]
        return
    elif re.match("^All$", OptionsInfo["Mode"], re.I):
        OptionsInfo["SpecifiedDescriptorNames"] = DescriptorNamesMap["2D"]["Names"]
        if OptionsInfo["FragmentCount"]:
            OptionsInfo["SpecifiedDescriptorNames"].extend(DescriptorNamesMap["FragmentCount"]["Names"])
        OptionsInfo["SpecifiedDescriptorNames"].extend(DescriptorNamesMap["3D"]["Names"])
        return
    elif re.match("^FragmentCountOnly$", OptionsInfo["Mode"], re.I):
        OptionsInfo["SpecifiedDescriptorNames"] = DescriptorNamesMap["FragmentCount"]["Names"]
        return

    # Set up a canonical descriptor names map for checking specified names...
    CanonicalNameMap = {}
    for Name in  DescriptorNamesMap["ComputeFunction"]:
        CanonicalNameMap[Name.lower()] = Name
    
    # Parse and validate specified names...
    DescriptorNames = re.sub(" ", "", OptionsInfo["DescriptorNames"])
    if not DescriptorNames:
        MiscUtil.PrintError("No descriptor names specified for \"-d, --descriptorNames\" option")

    SMILESInfile = MiscUtil.CheckFileExt(Options["--infile"], "smi")
    Canonical3DNameMap = {}
    if SMILESInfile:
        for Name in DescriptorNamesMap["3D"]["Names"]:
            Canonical3DNameMap[Name.lower()] = Name
            
    SpecifiedDescriptorNames = []
    for Name in DescriptorNames.split(","):
        CanonicalName = Name.lower()
        if CanonicalName in CanonicalNameMap:
            SpecifiedDescriptorNames.append(CanonicalNameMap[CanonicalName])
        else:
            MiscUtil.PrintError("The descriptor name, %s, specified using \"-d, --descriptorNames\" option is not a valid name." % (Name))
        if SMILESInfile:
            if CanonicalName in Canonical3DNameMap:
                MiscUtil.PrintError("The 3D descriptor name, %s, specified using \"-d, --descriptorNames\" option is not a valid for SMILES input file." % (Name))
                
    if not len(SpecifiedDescriptorNames):
        MiscUtil.PrintError("No valid descriptor name specified for \"-d, --descriptorNames\" option")
    
    OptionsInfo["SpecifiedDescriptorNames"] = SpecifiedDescriptorNames
Beispiel #10
0
def ProcessOptions():
    """Process and validate command line arguments and options"""

    MiscUtil.PrintInfo("Processing options...")

    # Validate options...
    ValidateOptions()

    OptionsInfo["AlignMethod"] = Options["--alignMethod"].lower()
    OptionsInfo["AlignMode"] = Options["--alignMode"]

    OptionsInfo["Mode"] = Options["--mode"]

    OptionsInfo["ProbeFiles"] = Options["--probefiles"]
    OptionsInfo["ProbeFilesNames"] = Options["--probeFilesNames"]

    OptionsInfo["RefFiles"] = Options["--reffiles"]
    OptionsInfo["RefFilesNames"] = Options["--refFilesNames"]

    RetrieveProbeFilesInfo()
    RetrieveRefFilesInfo()

    OptionsInfo["Outfile"] = Options["--outfile"]
    OptionsInfo["OutMatrix"] = True if re.match(
        "^Yes$", Options["--outMatrix"], re.I) else False

    OptionsInfo["Overwrite"] = Options["--overwrite"]

    OptionsInfo["OutDelim"] = " "
    if MiscUtil.CheckFileExt(OptionsInfo["Outfile"], "csv"):
        OptionsInfo["OutDelim"] = ","
    elif MiscUtil.CheckFileExt(OptionsInfo["Outfile"], "tsv txt"):
        OptionsInfo["OutDelim"] = "\t"
    else:
        MiscUtil.PrintError(
            "The file name specified , %s, for option \"--outfile\" is not valid. Supported file formats: csv tsv txt\n"
            % (OptionsInfo["Outfile"]))
Beispiel #11
0
def ReadMolecules(FileName, **KeyWordArgs):
    """Read molecules from an input file without performing any validation
    and creation of molecule objects.
    
    Arguments:
        FileName (str): Name of a file with complete path.
        **KeyWordArgs (dictionary) : Parameter name and value pairs for reading and
            processing molecules.

    Returns:
        list : List of RDKit molecule objects.

    Notes:
        The file extension is used to determine type of the file and set up an appropriate
        file reader.

    """

    # Set default values for possible arguments...
    ReaderArgs = {"Sanitize": True, "RemoveHydrogens": True, "StrictParsing": True,  "SMILESDelimiter" : ' ', "SMILESColumn": 1, "SMILESNameColumn": 2, "SMILESTitleLine": True }

    # Set specified values for possible arguments...
    for Arg in ReaderArgs:
        if Arg in KeyWordArgs:
            ReaderArgs[Arg] = KeyWordArgs[Arg]

    # Modify specific valeus for SMILES...
    if MiscUtil.CheckFileExt(FileName, "smi csv tsv txt"):
        Args = ["Sanitize", "SMILESTitleLine"]
        for Arg in Args:
            if ReaderArgs[Arg] is True:
                ReaderArgs[Arg] = 1
            else:
                ReaderArgs[Arg] = 0
    
    Mols = []
    if MiscUtil.CheckFileExt(FileName, "sdf sd"):
        return ReadMoleculesFromSDFile(FileName, ReaderArgs["Sanitize"], ReaderArgs["RemoveHydrogens"], ReaderArgs['StrictParsing'])
    elif MiscUtil.CheckFileExt(FileName, "mol"):
        return ReadMoleculesFromMolFile(FileName, ReaderArgs["Sanitize"], ReaderArgs["RemoveHydrogens"], ReaderArgs['StrictParsing'])
    elif MiscUtil.CheckFileExt(FileName, "mol2"):
        return ReadMoleculesFromMol2File(FileName, ReaderArgs["Sanitize"], ReaderArgs["RemoveHydrogens"])
    elif MiscUtil.CheckFileExt(FileName, "pdb"):
        return ReadMoleculesFromPDBFile(FileName, ReaderArgs["Sanitize"], ReaderArgs["RemoveHydrogens"])
    elif MiscUtil.CheckFileExt(FileName, "smi txt csv tsv"):
        SMILESColumnIndex = ReaderArgs["SMILESColumn"] - 1
        SMILESNameColumnIndex = ReaderArgs["SMILESNameColumn"] - 1
        return ReadMoleculesFromSMILESFile(FileName, ReaderArgs["SMILESDelimiter"], SMILESColumnIndex, SMILESNameColumnIndex, ReaderArgs["SMILESTitleLine"], ReaderArgs["Sanitize"])
    else:
        MiscUtil.PrintWarning("RDKitUtil.ReadMolecules: Non supported file type: %s" % FileName)
    
    return Mols
def ProcessOptions():
    """Process and validate command line arguments and options"""
    
    MiscUtil.PrintInfo("Processing options...")
    
    # Validate options...
    ValidateOptions()
    
    OptionsInfo["ButinaSimilarityCutoff"] = float(Options["--butinaSimilarityCutoff"])
    OptionsInfo["ButinaReordering"] = False
    if re.match("^Yes$", Options["--butinaReordering"], re.I):
        OptionsInfo["ButinaReordering"] = True
    
    OptionsInfo["Fingerprints"] = Options["--fingerprints"]
    OptionsInfo["FingerprintsType"] = Options["--fingerprintsType"]
    
    OptionsInfo["ClusteringMethod"] = Options["--clusteringMethod"]
    ProcessClusteringMethodParameter()

    OptionsInfo["NumClusters"] = int(Options["--numClusters"])
    
    OptionsInfo["Infile"] = Options["--infile"]
    OptionsInfo["InfileParams"] = MiscUtil.ProcessOptionInfileParameters("--infileParams", Options["--infileParams"], Options["--infile"])
    
    OptionsInfo["Outfile"] = Options["--outfile"]
    OptionsInfo["OutfileParams"] = MiscUtil.ProcessOptionOutfileParameters("--outfileParams", Options["--outfileParams"], Options["--infile"], Options["--outfile"])
    
    OptionsInfo["Overwrite"] = Options["--overwrite"]

    OptionsInfo["OutFileMode"] = Options["--outfileMode"]
    SingleOutFileMode = True
    if not re.match("^SingleFile$", Options["--outfileMode"], re.I):
        SingleOutFileMode = False
    OptionsInfo["SingleOutFileMode"] = SingleOutFileMode
    
    FileDir, FileName, FileExt = MiscUtil.ParseFileName(Options["--outfile"])
    OptionsInfo["OutfileBasename"] = FileName
    OptionsInfo["OutfileExt"] = FileExt

    TextOutFileMode = False
    TextOutFileDelim = ""
    TextOutFileTitleLine = True
    
    if MiscUtil.CheckFileExt(Options["--outfile"], "csv"):
        TextOutFileMode = True
        TextOutFileDelim = ","
    elif MiscUtil.CheckFileExt(Options["--outfile"], "tsv txt"):
        TextOutFileMode = True
        TextOutFileDelim = "\t"
    elif MiscUtil.CheckFileExt(Options["--outfile"], "smi"):
        TextOutFileMode = True
        TextOutFileDelim = OptionsInfo["OutfileParams"]["SMILESDelimiter"]
        TextOutFileTitleLine = OptionsInfo["OutfileParams"]["SMILESTitleLine"]
        
    OptionsInfo["TextOutFileMode"] = TextOutFileMode
    OptionsInfo["TextOutFileDelim"] = TextOutFileDelim
    OptionsInfo["TextOutFileTitleLine"] = TextOutFileTitleLine
    
    OptionsInfo["SimilarityMetric"] = Options["--similarityMetric"]
    ProcessSimilarityMetricParameter()

    OptionsInfo["ParamsFingerprints"] = Options["--paramsFingerprints"]
    ProcessFingerprintsParameters()
def ProcessOptions():
    """Process and validate command line arguments and options"""

    MiscUtil.PrintInfo("Processing options...")

    # Validate options...
    ValidateOptions()

    OptionsInfo["CoreScaffold"] = Options["--coreScaffold"]

    OptionsInfo["Infile"] = Options["--infile"]
    OptionsInfo["InfileParams"] = MiscUtil.ProcessOptionInfileParameters(
        "--infileParams", Options["--infileParams"], Options["--infile"])

    OptionsInfo["Outfile"] = Options["--outfile"]
    OptionsInfo["OutfileParams"] = MiscUtil.ProcessOptionOutfileParameters(
        "--outfileParams", Options["--outfileParams"], Options["--infile"],
        Options["--outfile"])

    TextOutFileMode = False
    TextOutFileDelim = ""

    if MiscUtil.CheckFileExt(Options["--outfile"], "csv"):
        TextOutFileMode = True
        TextOutFileDelim = ","
    elif MiscUtil.CheckFileExt(Options["--outfile"], "tsv txt"):
        TextOutFileMode = True
        TextOutFileDelim = "\t"

    OptionsInfo["TextOutFileMode"] = TextOutFileMode
    OptionsInfo["TextOutFileDelim"] = TextOutFileDelim

    TextOutQuote = False
    if re.match("^auto$", Options["--quote"], re.I):
        if MiscUtil.CheckFileExt(Options["--outfile"], "csv"):
            TextOutQuote = True
    else:
        if re.match("^yes$", Options["--quote"], re.I):
            TextOutQuote = True
    OptionsInfo["TextOutQuote"] = TextOutQuote

    OptionsInfo["Overwrite"] = Options["--overwrite"]

    RemoveUnmatchedMode = False
    UnmatchedOutfile = None
    if re.match("^yes$", Options["--removeUnmatched"], re.I):
        RemoveUnmatchedMode = True
        FileDir, FileName, FileExt = MiscUtil.ParseFileName(
            OptionsInfo["Outfile"])
        UnmatchedOutfile = "%sUnmatched.%s" % (FileName, FileExt)
    OptionsInfo["RemoveUnmatchedMode"] = RemoveUnmatchedMode
    OptionsInfo["UnmatchedOutfile"] = UnmatchedOutfile

    OptionsInfo["SpecifiedDecompositionParams"] = Options[
        "--decompositionParams"]
    ProcessDecompositionParameters()

    OptionsInfo["SpecifiedMCSParams"] = Options["--mcsParams"]
    ProcessMCSParameters()

    SMARTSOrSMILESCoreScaffold = ""
    SMARTSOrSMILESCoreScaffoldList = []
    if not re.match("^none$", Options["--smartsOrSmilesCoreScaffold"],
                    re.I) or len(Options["--smartsOrSmilesCoreScaffold"]):
        if re.match("^(BySMARTS|BySMILES)$", Options["--coreScaffold"], re.I):
            SMARTSOrSMILESCoreScaffold = re.sub(
                " ", "", Options["--smartsOrSmilesCoreScaffold"])
            if not SMARTSOrSMILESCoreScaffold:
                MiscUtil.PrintError(
                    "A non empty value must be specified for \"-s, --smartsOrSmilesCoreScaffold\" during %s value of \"-c, --coreScaffold\" option "
                    % (Options["--coreScaffold"]))
            SMARTSOrSMILESCoreScaffoldList = SMARTSOrSMILESCoreScaffold.split(
                ",")
    OptionsInfo["SMARTSOrSMILESCoreScaffold"] = SMARTSOrSMILESCoreScaffold
    OptionsInfo[
        "SMARTSOrSMILESCoreScaffoldList"] = SMARTSOrSMILESCoreScaffoldList
Beispiel #14
0
def ValidateOptions():
    """Validate option values"""

    MiscUtil.ValidateOptionTextValue("--compute2DCoords",
                                     Options["--compute2DCoords"], "yes no")

    MiscUtil.ValidateOptionTextValue("-m, --mode", Options["--mode"],
                                     "RxnByName RxnBySMIRKS")
    MiscUtil.ValidateOptionTextValue("-p, --prodMolNames",
                                     Options["--prodMolNames"],
                                     "UseReactants Sequential")

    if not re.match("^auto$", Options["--rxnNamesFile"], re.I):
        MiscUtil.ValidateOptionFilePath("--rxnNamesFile",
                                        Options["--rxnNamesFile"])

    MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"],
                                   "sdf sd smi")
    MiscUtil.ValidateOptionsOutputFileOverwrite("-o, --outfile",
                                                Options["--outfile"],
                                                "--overwrite",
                                                Options["--overwrite"])

    ReactantFiles = re.sub(" ", "", Options["--infiles"])
    if not ReactantFiles:
        MiscUtil.PrintError(
            "No reactant files specified for \"-i, --infiles\" option")

    # Validate file extensions...
    for ReactantFile in ReactantFiles.split(","):
        MiscUtil.ValidateOptionFilePath("-i, --infiles", ReactantFile)
        MiscUtil.ValidateOptionFileExt("-i, --infiles", ReactantFile,
                                       "sdf sd smi csv tsv txt")
        MiscUtil.ValidateOptionsDistinctFileNames("-i, --infiles",
                                                  ReactantFile,
                                                  "-o, --outfile",
                                                  Options["--outfile"])

    # Match file formats...
    FirstFile = True
    FirstFileFormat = ""
    for ReactantFile in ReactantFiles.split(","):
        FileFormat = ""
        if MiscUtil.CheckFileExt(ReactantFile, "sdf sd"):
            FileFormat = "SD"
        elif MiscUtil.CheckFileExt(ReactantFile, "smi csv tsv txt"):
            FileFormat = "SMILES"
        else:
            MiscUtil.PrintError(
                "The file name specified , %s, for option \"-i, --infiles\" is not valid. Supported file formats: sdf sd smi csv tsv txt\n"
                % ReactantFile)

        if FirstFile:
            FirstFile = False
            FirstFileFormat = FileFormat
            continue

        if not re.match("^%s$" % FirstFileFormat, FileFormat, re.IGNORECASE):
            MiscUtil.PrintError(
                "All reactant file names -  %s - specified using option \"-i, --infiles\" must have the same file format.\n"
                % ReactantFiles)

    MiscUtil.ValidateOptionTextValue("--sanitize", Options["--sanitize"],
                                     "yes no")