def ProcessSpecifiedPropertyNames():
    """Process specified property names. """

    PropertyNames = RetrievePropertyNames()
    
    OptionsInfo["SpecifiedPropertyNames"] = []
    
    SpecifiedNames = re.sub(" ", "", OptionsInfo["Mode"])
    if not SpecifiedNames:
        MiscUtil.PrintError("No valid property names specifed  using \"-m, --mode\" option")
    
    if re.match("^All$", SpecifiedNames, re.I):
        OptionsInfo["SpecifiedPropertyNames"] = PropertyNames
        return

    # Validate propery names...
    CanonicalPropertyNamesMap = {}
    for Name in PropertyNames:
        CanonicalPropertyNamesMap[Name.lower()] = Name

    SpecifiedNamesWords = SpecifiedNames.split(",")
    for Name in SpecifiedNamesWords:
        CanonicalName = Name.lower()
        if CanonicalName not in CanonicalPropertyNamesMap:
            MiscUtil.PrintError("The property name specified, %s, using \"-m, --mode\" option is not a valid name." % Name)
        
        PropertyName = CanonicalPropertyNamesMap[CanonicalName]
        OptionsInfo["SpecifiedPropertyNames"].append(PropertyName)
def AlignMolecules():
    """Align molecules."""

    if not re.match("^(OneToOne|FirstToAll)$", OptionsInfo["Mode"], re.I):
        MiscUtil.PrintError(
            "Alignment couldn't be performed: Specified mode, %s, is not supported"
            % OptionsInfo["Mode"])

    RefFile = OptionsInfo["RefFile"]
    ProbeFile = OptionsInfo["ProbeFile"]

    Outfile = OptionsInfo["Outfile"]

    # Read reference and probe molecules...
    OptionsInfo["InfileParams"]["AllowEmptyMols"] = False

    MiscUtil.PrintInfo("\nProcessing file %s..." % (RefFile))
    ValidRefMols, RefMolCount, ValidRefMolCount = RDKitUtil.ReadAndValidateMolecules(
        RefFile, **OptionsInfo["InfileParams"])

    MiscUtil.PrintInfo("Processing file %s..." % (ProbeFile))
    ValidProbeMols, ProbeMolCount, ValidProbeMolCount = RDKitUtil.ReadAndValidateMolecules(
        ProbeFile, **OptionsInfo["InfileParams"])

    # Set up a molecule writer...
    Writer = RDKitUtil.MoleculesWriter(OptionsInfo["Outfile"],
                                       **OptionsInfo["OutfileParams"])
    if Writer is None:
        MiscUtil.PrintError("Failed to setup a writer for output fie %s " %
                            OptionsInfo["Outfile"])
    MiscUtil.PrintInfo("Generating file %s..." % OptionsInfo["Outfile"])

    AlignmentFailedCount = 0
    if re.match("^OneToOne$", OptionsInfo["Mode"], re.I):
        AlignmentFailedCount = PerformOneToOneAlignment(
            ValidRefMols, ValidProbeMols, Writer)
    elif re.match("^FirstToAll$", OptionsInfo["Mode"], re.I):
        AlignmentFailedCount = PerformFirstToAllAlignment(
            ValidRefMols, ValidProbeMols, Writer)
    else:
        MiscUtil.PrintError(
            "Alignment couldn't be performed: Specified mode, %s, is not supported"
            % OptionsInfo["Mode"])

    if Writer is not None:
        Writer.close()

    MiscUtil.PrintInfo(
        "\nTotal number of molecules: Reference - %d; Probe - %d" %
        (RefMolCount, ProbeMolCount))
    MiscUtil.PrintInfo(
        "Number of valid molecules: Reference - %d; Probe - %d" %
        (ValidRefMolCount, ValidProbeMolCount))
    MiscUtil.PrintInfo(
        "Number of probe molecules failed during alignment: %d" %
        AlignmentFailedCount)
    MiscUtil.PrintInfo(
        "Number of ignored molecules:  Reference - %d; Probe - %d" %
        ((RefMolCount - ValidRefMolCount),
         (ProbeMolCount - ValidProbeMolCount + AlignmentFailedCount)))
def PerformHierarchicalClustering(Mols, MolsFingerprints):
    """Perform hierarchical clustering."""

    try:
        import numpy
    except ImportError:
        MiscUtil.PrintError("Failed to import numpy python module. This is required to cluster molecules using hierarchical clustering methodology.")
    
    if OptionsInfo["NumClusters"] > len(Mols):
        MiscUtil.PrintError("The number of clusters, %d, specified using \"-n, --numClusters\" must be less than total number of valid molecules, %d" % (OptionsInfo["NumClusters"], len(Mols)))
    
    MiscUtil.PrintInfo("\nCluster molecules using %s hierarchical clustering methodology and %s similarity metric..." % (OptionsInfo["SpecifiedHierarchicalClusteringMethod"], OptionsInfo["SimilarityMetric"]))
    
    NumFingerprints = len(MolsFingerprints)
    NumClusters = OptionsInfo["NumClusters"]
    DistanceMatrix = GenerateLowerTriangularDistanceMatrix(MolsFingerprints)
    
    ClusterPicker = HierarchicalClusterPicker(OptionsInfo["SpecifiedHierarchicalClusteringMethodID"])
    ClusteredMolIndices = ClusterPicker.Cluster(numpy.asarray(DistanceMatrix), NumFingerprints, NumClusters)

    MolsClusters = []
    for Cluster in ClusteredMolIndices:
        MolsCluster = [Mols[MolIndex] for MolIndex in Cluster]
        MolsClusters.append(MolsCluster)
    
    return MolsClusters
Esempio n. 4
0
def SetupOutputFiles():
    """Open output files."""

    if OptionsInfo["MultipleOutFiles"]:
        MiscUtil.PrintInfo("\nGenerating output files: %s" % (", ".join(OptionsInfo["OutfilesList"])))
    else:
        MiscUtil.PrintInfo("\nGenerating output file %s..." % (OptionsInfo["Outfile"]))
        
    # Open combined output file...
    Outfile = OptionsInfo["Outfile"]
    OutFH = open(Outfile, "w")
    if OutFH is None:
        MiscUtil.PrintError("Couldn't open output file: %s.\n" % (Outfile))
    OptionsInfo["OutFH"] = OutFH
    OptionsInfo["OutfileResCount"] = 0

    if not OptionsInfo["MultipleOutFiles"]:
        return
    
    # Open output files for different categories...
    OptionsInfo["CategoriesOutFHs"] = {}
    OptionsInfo["CategoriesResCount"] = {}
    for Category in OptionsInfo["Categories"]:
        CategoryOutfile = OptionsInfo["CategoriesOutfiles"][Category]
        CategoryOutFH = open(CategoryOutfile, "w")
        if CategoryOutfile is None:
            MiscUtil.PrintError("Couldn't open output file: %s.\n" % (CategoryOutfile))
        
        OptionsInfo["CategoriesOutFHs"][Category] = CategoryOutFH
        OptionsInfo["CategoriesResCount"][Category] = 0
def SetupMoleculeWriters(CombineMatchResults, Outfile, GroupsOutfiles):
    """Set up molecule writers for output files."""

    Writer = None
    GroupOutfilesWriters = []

    if CombineMatchResults:
        Writer = RDKitUtil.MoleculesWriter(Outfile,
                                           **OptionsInfo["OutfileParams"])
        if Writer is None:
            MiscUtil.PrintError("Failed to setup a writer for output fie %s " %
                                Outfile)
        MiscUtil.PrintInfo("Generating file %s..." % Outfile)
    else:
        for GroupOutfile in GroupsOutfiles:
            GroupOutfileWriter = RDKitUtil.MoleculesWriter(
                GroupOutfile, **OptionsInfo["OutfileParams"])
            if GroupOutfileWriter is None:
                MiscUtil.PrintError(
                    "Failed to setup a writer for output fie %s " % Writer)
            GroupOutfilesWriters.append(GroupOutfileWriter)

        GroupsCount = len(GroupsOutfiles)
        if GroupsCount > 4:
            MiscUtil.PrintInfo(
                "Generating %d output files with the following file name format: %s<GroupName>.%s"
                % (GroupsCount, OptionsInfo["OutfileBasename"],
                   OptionsInfo["OutfileExt"]))
        else:
            Delmiter = ', '
            OutfileNames = Delmiter.join(GroupsOutfiles)
            MiscUtil.PrintInfo("Generating %d output files: %s..." %
                               (GroupsCount, OutfileNames))

    return (Writer, GroupOutfilesWriters)
def ProcessSpecifiedFingerprintsType():
    """Process specified fingerprints type."""

    FingerprintsName = OptionsInfo["SpecifiedFingerprints"]
    FingerprintsType = OptionsInfo["FingerprintsType"]
    SimilarityName = OptionsInfo["SimilarityMetric"]
    
    if re.match("^auto$", FingerprintsType, re.I):
        if re.match("^(MACCS166Keys|PathLength)$", FingerprintsName, re.I):
            SpecifiedFingerprintsType = "BitVect"
        else:
            if re.match("^(Tanimoto|Dice)$", SimilarityName, re.I):
                SpecifiedFingerprintsType = "IntVect"
            else:
                SpecifiedFingerprintsType = "BitVect"
    elif re.match("^IntVect$", FingerprintsType, re.I):
        SpecifiedFingerprintsType = "IntVect"
        
        if re.match("^(MACCS166Keys|PathLength)$", FingerprintsName, re.I):
            MiscUtil.PrintError("The fingerprints Type, %s, specified using \"--fingerprintsType\" is not allowed for fingerprints %s." % (FingerprintsType, FingerprintsName))
            
        # RDKit similarity functions, besides Dice and Tanimoto, are not able to handle int bit vectors...
        if not re.match("^(Tanimoto|Dice)$", SimilarityName, re.I):
            MiscUtil.PrintError("The fingerprints Type, %s, specified using \"--fingerprintsType\" is not allowed for similarity metric %s.\nSupported similarity metrics: Tanimoto or Dice" % (FingerprintsType, SimilarityName))
    elif re.match("^BitVect$", FingerprintsType, re.I):
        SpecifiedFingerprintsType = "BitVect"
    else:
        MiscUtil.PrintError("The fingerprints Type, %s, is not supported." % (FingerprintsType))
    
    OptionsInfo["SpecifiedFingerprintsType"] = SpecifiedFingerprintsType
def SetupChainAndLigandOutfiles():
    """Setup output file names for chains and ligands."""

    OptionsInfo["SpecifiedChainsAndLigandsInfo"]["ChainOutfiles"] = {}
    OptionsInfo["SpecifiedChainsAndLigandsInfo"]["LigandOutfiles"] = {}

    InfileRoot = OptionsInfo["InfileRoot"]
    LigandFileExt = OptionsInfo["LigandFileExt"]

    for ChainID in OptionsInfo["SpecifiedChainsAndLigandsInfo"]["ChainIDs"]:
        ChainOutfileRoot = "%s_Chain%s" % (InfileRoot, ChainID)
        ChainOutfile = "%s.pdb" % (ChainOutfileRoot)
        OptionsInfo["SpecifiedChainsAndLigandsInfo"]["ChainOutfiles"][
            ChainID] = ChainOutfile
        if os.path.exists(ChainOutfile):
            if not OptionsInfo["Overwrite"]:
                MiscUtil.PrintError(
                    "\nThe chain output file, %s, already exist. Use option \"--ov\" or \"--overwrite\" and try again.\n"
                    % (ChainOutfile))

        OptionsInfo["SpecifiedChainsAndLigandsInfo"]["LigandOutfiles"][
            ChainID] = {}
        for LigandID in OptionsInfo["SpecifiedChainsAndLigandsInfo"][
                "LigandIDs"][ChainID]:
            LigandOutfile = "%s_%s.%s" % (ChainOutfileRoot, LigandID,
                                          LigandFileExt)
            OptionsInfo["SpecifiedChainsAndLigandsInfo"]["LigandOutfiles"][
                ChainID][LigandID] = LigandOutfile
            if os.path.exists(LigandOutfile):
                if not OptionsInfo["Overwrite"]:
                    MiscUtil.PrintError(
                        "\nThe ligand output file, %s, already exist. Use option \"--ov\" or \"--overwrite\" and try again.\n"
                        % (LigandOutfile))
def RetrieveReferenceMolecule():
    """Retrieve and validate reference molecule """
    
    RefFile = OptionsInfo["RefFile"]
    
    MiscUtil.PrintInfo("\nProcessing file %s..." % (RefFile))
    OptionsInfo["InfileParams"]["AllowEmptyMols"] = False
    ValidRefMols, RefMolCount, ValidRefMolCount  = RDKitUtil.ReadAndValidateMolecules(RefFile, **OptionsInfo["InfileParams"])
    
    if ValidRefMolCount == 0:
        MiscUtil.PrintError("The reference file, %s, contains no valid molecules." % RefFile)
    elif ValidRefMolCount > 1:
        MiscUtil.PrintWarning("The reference file, %s, contains, %d, valid molecules. Using first molecule as the reference molecule..." % (RefFile, ValidRefMolCount))
    
    RefMol = ValidRefMols[0]

    if OptionsInfo["UseScaffoldSMARTS"]:
        ScaffoldPatternMol = Chem.MolFromSmarts(OptionsInfo["ScaffoldSMARTS"])
        if ScaffoldPatternMol is None:
            MiscUtil.PrintError("Failed to create scaffold pattern molecule. The scaffold SMARTS pattern, %s, specified using \"-s, --scaffold\" option is not valid." % (OptionsInfo["ScaffoldSMARTS"]))
        
        if not RefMol.HasSubstructMatch(ScaffoldPatternMol):
            MiscUtil.PrintError("The scaffold SMARTS pattern, %s, specified using \"-s, --scaffold\" option, is missing in the first valid reference molecule." % (OptionsInfo["ScaffoldSMARTS"]))
            
    return RefMol
Esempio n. 9
0
def SetupMoleculeWriters():
    """Setup molecule writers."""

    Writer = None
    WriterFiltered = None

    if OptionsInfo["CountMode"]:
        return (Writer, WriterFiltered)

    Writer = RDKitUtil.MoleculesWriter(OptionsInfo["Outfile"],
                                       **OptionsInfo["OutfileParams"])
    if Writer is None:
        MiscUtil.PrintError("Failed to setup a writer for output fie %s " %
                            OptionsInfo["Outfile"])
    MiscUtil.PrintInfo("Generating file %s..." % OptionsInfo["Outfile"])

    if OptionsInfo["OutfileFilteredMode"]:
        WriterFiltered = RDKitUtil.MoleculesWriter(
            OptionsInfo["OutfileFiltered"], **OptionsInfo["OutfileParams"])
        if WriterFiltered is None:
            MiscUtil.PrintError("Failed to setup a writer for output fie %s " %
                                OptionsInfo["OutfileFiltered"])
        MiscUtil.PrintInfo("Generating file %s..." %
                           OptionsInfo["OutfileFiltered"])

    return (Writer, WriterFiltered)
Esempio n. 10
0
def CalculateRMSDValues():
    """Calculate RMSD between reference and probe files."""

    Outfile = OptionsInfo["Outfile"]
    OutDelim = OptionsInfo["OutDelim"]

    MiscUtil.PrintInfo("\nGenerating file %s..." % Outfile)
    OutFH = open(Outfile, "w")
    if OutFH is None:
        MiscUtil.PrintError("Couldn't open output file: %s.\n" % (Outfile))

    WriteColumnLabels(OutFH, OutDelim)

    pymol.cmd.reinitialize()
    if re.match("^OneToOne$", OptionsInfo["Mode"], re.I):
        CalculateOneToOneRMSDValues(OutFH, OutDelim)
    elif re.match("^AllToAll$", OptionsInfo["Mode"], re.I):
        CalculateAllToAllRMSDValues(OutFH, OutDelim)
    elif re.match("^FirstToAll$", OptionsInfo["Mode"], re.I):
        CalculateFirstToAllRMSDValues(OutFH, OutDelim)
    else:
        MiscUtil.PrintError(
            "RMSD couldn't be calculated: Specified mode, %s, is not supported"
            % OptionsInfo["Mode"])

    OutFH.close()
Esempio n. 11
0
def ProcessChEMBLAlertsMode():
    """Process specified alerts mode. """

    # Retrieve filetrs information...
    RetrieveChEMBLFiltersInfo()

    # Process alerts mode...
    OptionsInfo["SpecifiedFilterTypes"] = OptionsInfo["ChEMBLFiltersMap"][
        "FilterTypes"]
    if re.match("^All$", OptionsInfo["AlertsMode"], re.I):
        return

    AlertsMode = re.sub(" ", "", OptionsInfo["AlertsMode"])
    if not len(AlertsMode):
        MiscUtil.PrintError(
            "The alerts mode specified using \"-a, --alertsMode\" option are empty."
        )

    CanonicalFilterTypesMap = {}
    for FilterType in OptionsInfo["ChEMBLFiltersMap"]["FilterTypes"]:
        CanonicalFilterTypesMap[FilterType.lower()] = FilterType

    SpecifiedFilterTypes = []
    for FilterType in AlertsMode.split(","):
        CanonicalFilterType = FilterType.lower()
        if not CanonicalFilterType in CanonicalFilterTypesMap:
            MiscUtil.PrintError(
                "The altert mode, %s, specified using \"-a, --alertsMode\" is not valid. Supported alert modes: %s"
                % (FilterType, ", ".join(
                    OptionsInfo["ChEMBLFiltersMap"]["FilterTypes"])))

        SpecifiedFilterTypes.append(
            CanonicalFilterTypesMap[CanonicalFilterType])

    OptionsInfo["SpecifiedFilterTypes"] = SpecifiedFilterTypes
def ValidateOptions():
    """Validate option values"""

    MiscUtil.ValidateOptionTextValue("-a, --autocorr2DExclude", Options["--autocorr2DExclude"], "yes no")
    MiscUtil.ValidateOptionTextValue("-f, --fragmentCount", Options["--fragmentCount"], "yes no")
    
    MiscUtil.ValidateOptionTextValue("-m, --mode", Options["--mode"], "2D 3D All FragmentCountOnly Specify")
    
    if re.match("^Specify$", Options["--mode"], re.I):
        if re.match("^none$", Options["--descriptorNames"], re.I):
            MiscUtil.PrintError("The name(s) of molecular descriptors must be specified using \"-d, --descriptorNames\" option during \"Specify\" value of \"-m, --mode\" option.")
    
    MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"])
    MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "sdf sd mol smi csv tsv txt")
    
    if re.match("^3D|All$", Options["--mode"], re.I):
        if MiscUtil.CheckFileExt(Options["--infile"], "smi"):
            MiscUtil.PrintError("The input SMILES file, %s, is not valid for  \"3D or All\" value of \"-m, --mode\" option.")
    
    MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "sdf sd csv tsv txt")
    MiscUtil.ValidateOptionsOutputFileOverwrite("-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"])
    MiscUtil.ValidateOptionsDistinctFileNames("-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"])
    
    MiscUtil.ValidateOptionIntegerValue("-p, --precision", Options["--precision"], {">": 0})
    MiscUtil.ValidateOptionTextValue("-s, --smilesOut", Options["--smilesOut"], "yes no")
Esempio n. 13
0
def ProcessSpecifiedFingerprintsParameters():
    """Process specified fingerprints parameters."""

    if re.match("^auto$", OptionsInfo["ParamsFingerprints"], re.I):
        # Nothing to process...
        return

    SpecifiedFingerprintsName = OptionsInfo["SpecifiedFingerprints"]

    # Parse specified fingerprints parameters...
    ParamsFingerprints = re.sub(" ", "", OptionsInfo["ParamsFingerprints"])
    if not ParamsFingerprints:
        MiscUtil.PrintError(
            "No valid parameter name and value pairs specified using \"-p, --paramsFingerprints\" option corrresponding to fingerprints %s."
            % (SpecifiedFingerprintsName))

    ParamsFingerprintsWords = ParamsFingerprints.split(",")
    if len(ParamsFingerprintsWords) % 2:
        MiscUtil.PrintError(
            "The number of comma delimited paramater names and values, %d, specified using \"-p, --paramsFingerprints\" option must be an even number."
            % (len(ParamsFingerprintsWords)))

    # Setup canonical parameter names for specified fingerprints...
    ValidParamNames = []
    CanonicalParamNamesMap = {}
    for ParamName in sorted(
            OptionsInfo["FingerprintsParams"][SpecifiedFingerprintsName]):
        ValidParamNames.append(ParamName)
        CanonicalParamNamesMap[ParamName.lower()] = ParamName

    # Validate and set paramater names and value...
    for Index in range(0, len(ParamsFingerprintsWords), 2):
        Name = ParamsFingerprintsWords[Index]
        Value = ParamsFingerprintsWords[Index + 1]

        CanonicalName = Name.lower()
        if not CanonicalName in CanonicalParamNamesMap:
            MiscUtil.PrintError(
                "The parameter name, %s, specified using \"-p, --paramsFingerprints\" option for fingerprints, %s, is not a valid name. Supported parameter names: %s"
                % (Name, SpecifiedFingerprintsName, " ".join(ValidParamNames)))

        ParamName = CanonicalParamNamesMap[CanonicalName]
        if re.match("^UseChirality$", ParamName, re.I):
            if not re.match("^(Yes|No|True|False)$", Value, re.I):
                MiscUtil.PrintError(
                    "The parameter value, %s, specified using \"-p, --paramsFingerprints\" option for fingerprints, %s, is not a valid value. Supported values: Yes No True False"
                    % (Value, SpecifiedFingerprintsName))
            ParamValue = False
            if re.match("^(Yes|True)$", Value, re.I):
                ParamValue = True
        else:
            ParamValue = int(Value)
            if ParamValue <= 0:
                MiscUtil.PrintError(
                    "The parameter value, %s, specified using \"-p, --paramsFingerprints\" option for fingerprints, %s, is not a valid value. Supported values: > 0"
                    % (Value, SpecifiedFingerprintsName))

        # Set value...
        OptionsInfo["FingerprintsParams"][SpecifiedFingerprintsName][
            ParamName] = ParamValue
def ProcessSpecifiedDescriptorNames():
    """Process and validate specified decriptor names."""

    OptionsInfo["SpecifiedDescriptorNames"] = []

    if not re.match("^(2D|3D|All|FragmentCountOnly|Specify)$", OptionsInfo["Mode"], re.I):
        MiscUtil.PrintError("Mode value, %s, using \"-m, --mode\" option is not a valid value." % OptionsInfo["Mode"])
    
    if re.match("^2D$", OptionsInfo["Mode"], re.I):
        OptionsInfo["SpecifiedDescriptorNames"] = DescriptorNamesMap["2D"]["Names"]
        if OptionsInfo["FragmentCount"]:
            OptionsInfo["SpecifiedDescriptorNames"].extend(DescriptorNamesMap["FragmentCount"]["Names"])
        return
    elif re.match("^3D$", OptionsInfo["Mode"], re.I):
        OptionsInfo["SpecifiedDescriptorNames"] = DescriptorNamesMap["3D"]["Names"]
        return
    elif re.match("^All$", OptionsInfo["Mode"], re.I):
        OptionsInfo["SpecifiedDescriptorNames"] = DescriptorNamesMap["2D"]["Names"]
        if OptionsInfo["FragmentCount"]:
            OptionsInfo["SpecifiedDescriptorNames"].extend(DescriptorNamesMap["FragmentCount"]["Names"])
        OptionsInfo["SpecifiedDescriptorNames"].extend(DescriptorNamesMap["3D"]["Names"])
        return
    elif re.match("^FragmentCountOnly$", OptionsInfo["Mode"], re.I):
        OptionsInfo["SpecifiedDescriptorNames"] = DescriptorNamesMap["FragmentCount"]["Names"]
        return

    # Set up a canonical descriptor names map for checking specified names...
    CanonicalNameMap = {}
    for Name in  DescriptorNamesMap["ComputeFunction"]:
        CanonicalNameMap[Name.lower()] = Name
    
    # Parse and validate specified names...
    DescriptorNames = re.sub(" ", "", OptionsInfo["DescriptorNames"])
    if not DescriptorNames:
        MiscUtil.PrintError("No descriptor names specified for \"-d, --descriptorNames\" option")

    SMILESInfile = MiscUtil.CheckFileExt(Options["--infile"], "smi")
    Canonical3DNameMap = {}
    if SMILESInfile:
        for Name in DescriptorNamesMap["3D"]["Names"]:
            Canonical3DNameMap[Name.lower()] = Name
            
    SpecifiedDescriptorNames = []
    for Name in DescriptorNames.split(","):
        CanonicalName = Name.lower()
        if CanonicalName in CanonicalNameMap:
            SpecifiedDescriptorNames.append(CanonicalNameMap[CanonicalName])
        else:
            MiscUtil.PrintError("The descriptor name, %s, specified using \"-d, --descriptorNames\" option is not a valid name." % (Name))
        if SMILESInfile:
            if CanonicalName in Canonical3DNameMap:
                MiscUtil.PrintError("The 3D descriptor name, %s, specified using \"-d, --descriptorNames\" option is not a valid for SMILES input file." % (Name))
                
    if not len(SpecifiedDescriptorNames):
        MiscUtil.PrintError("No valid descriptor name specified for \"-d, --descriptorNames\" option")
    
    OptionsInfo["SpecifiedDescriptorNames"] = SpecifiedDescriptorNames
Esempio n. 15
0
def SetupMoleculeWriters(ClustersCount):
    """Set up molecule writers for SD and text files."""

    Writer = None
    ClustersOutfilesWriters = []

    TextOutFileMode = OptionsInfo["TextOutFileMode"]
    TextOutFileDelim = OptionsInfo["TextOutFileDelim"]
    TextOutFileTitleLine = OptionsInfo["TextOutFileTitleLine"]

    if OptionsInfo["SingleOutFileMode"]:
        Outfile = OptionsInfo["Outfile"]
        if TextOutFileMode:
            Writer = open(Outfile, "w")
        else:
            Writer = RDKitUtil.MoleculesWriter(Outfile,
                                               **OptionsInfo["OutfileParams"])
        if Writer is None:
            MiscUtil.PrintError("Failed to setup a writer for output fie %s " %
                                Outfile)

        if TextOutFileMode:
            if TextOutFileTitleLine:
                WriteTextFileHeaderLine(Writer, TextOutFileDelim)

        MiscUtil.PrintInfo("Generating file %s..." % Outfile)
    else:
        for ClusterIndex in range(0, ClustersCount):
            Outfile = OptionsInfo["ClustersOutfiles"][ClusterIndex]
            if TextOutFileMode:
                ClusterWriter = open(Outfile, "w")
            else:
                ClusterWriter = RDKitUtil.MoleculesWriter(
                    Outfile, **OptionsInfo["OutfileParams"])
            if ClusterWriter is None:
                MiscUtil.PrintError(
                    "Failed to setup a writer for output fie %s " % Outfile)

            if TextOutFileMode:
                if TextOutFileTitleLine:
                    WriteTextFileHeaderLine(ClusterWriter, TextOutFileDelim)

            ClustersOutfilesWriters.append(ClusterWriter)

        if ClustersCount > 4:
            MiscUtil.PrintInfo(
                "Generating %d output files with the following file name format: %s_Cluster<Num>.%s"
                % (ClustersCount, OptionsInfo["OutfileBasename"],
                   OptionsInfo["OutfileExt"]))
        else:
            Delmiter = ','
            OutfileNames = Delmiter.join(OptionsInfo["ClustersOutfiles"])
            MiscUtil.PrintInfo("Generating %d output files: %s..." %
                               (ClustersCount, OutfileNames))

    return (Writer, ClustersOutfilesWriters)
Esempio n. 16
0
def CompareMoleculeShapes():
    """Compare shape of molecules."""
    
    if not re.match("^(OneToOne|AllToAll|FirstToAll)$", OptionsInfo["Mode"], re.I):
        MiscUtil.PrintError("Shape comparison couldn't be performed: Specified mode, %s, is not supported" % OptionsInfo["Mode"])
        
    if not re.match("^(Open3A|CrippenOpen3A)$", OptionsInfo["Alignment"], re.I):
        MiscUtil.PrintError("Shape couldn't be performed: Specified alignment mode, %s, is not supported" % OptionsInfo["Alignment"])
        
    RefFile = OptionsInfo["RefFile"]
    ProbeFile = OptionsInfo["ProbeFile"]
    
    Outfile = OptionsInfo["Outfile"]
    OutDelim = OptionsInfo["OutDelim"]

    # Read reference and probe molecules...
    OptionsInfo["InfileParams"]["AllowEmptyMols"] = False
    
    MiscUtil.PrintInfo("\nProcessing file %s..." % (RefFile))
    ValidRefMols, RefMolCount, ValidRefMolCount  = RDKitUtil.ReadAndValidateMolecules(RefFile, **OptionsInfo["InfileParams"])
    
    MiscUtil.PrintInfo("Processing file %s..." % (ProbeFile))
    ValidProbeMols, ProbeMolCount, ValidProbeMolCount  = RDKitUtil.ReadAndValidateMolecules(ProbeFile, **OptionsInfo["InfileParams"])

    # Set up output file...
    MiscUtil.PrintInfo("Generating file %s...\n" % Outfile)
    OutFH = open(Outfile, "w")
    if OutFH is None:
        MiscUtil.PrintError("Couldn't open output file: %s.\n" % (Outfile))

    if OptionsInfo["UseCrippenOpen3A"]:
        Line = "RefMolID%sProbeMolID%sCrippenOpen3AScore" % (OutDelim, OutDelim)
    else:
        Line = "RefMolID%sProbeMolID%sOpen3AScore" % (OutDelim, OutDelim)
        
    if OptionsInfo["CalcTanimotoDistance"]:
        Line = "%s%sTanimotoDistance" % (Line, OutDelim)
    if OptionsInfo["CalcProtrudeDistance"]:
        Line = "%s%sProtrudeDistance" % (Line, OutDelim)
    OutFH.write("%s\n" % Line)
        
    if re.match("^OneToOne$", OptionsInfo["Mode"], re.I):
        PerformOneToOneShapeComparison(ValidRefMols, ValidProbeMols, OutFH, OutDelim)
    elif re.match("^AllToAll$", OptionsInfo["Mode"], re.I):
        PerformAllToAllShapeComparison(ValidRefMols, ValidProbeMols, OutFH, OutDelim)
    elif re.match("^FirstToAll$", OptionsInfo["Mode"], re.I):
        PerformFirstToAllShapeComparison(ValidRefMols, ValidProbeMols, OutFH, OutDelim)
    else:
        MiscUtil.PrintError("Shape comaprison couldn't be performed: Specified mode, %s, is not supported" % OptionsInfo["Mode"])

    OutFH.close()
    
    MiscUtil.PrintInfo("\nTotal number of molecules: Reference - %d; Probe - %d" % (RefMolCount, ProbeMolCount))
    MiscUtil.PrintInfo("Number of valid molecules: Reference - %d; Probe - %d" % (ValidRefMolCount, ValidProbeMolCount))
    MiscUtil.PrintInfo("Number of ignored molecules:  Reference - %d; Probe - %d" % ((RefMolCount - ValidRefMolCount), (ProbeMolCount - ValidProbeMolCount)))
def SetupCoreScaffoldsByMCS(Mols):
    """Setup core scaffold molecule using MCS."""

    MiscUtil.PrintInfo("\nSetting up core scaffold using MCS...")

    MCSParams = OptionsInfo["MCSParams"]

    CoreMols = []

    MCSResultObject = rdFMCS.FindMCS(
        Mols,
        maximizeBonds=MCSParams["MaximizeBonds"],
        threshold=MCSParams["Threshold"],
        timeout=MCSParams["TimeOut"],
        verbose=MCSParams["Verbose"],
        matchValences=MCSParams["MatchValences"],
        ringMatchesRingOnly=MCSParams["RingMatchesRingOnly"],
        completeRingsOnly=MCSParams["CompleteRingsOnly"],
        matchChiralTag=MCSParams["MatchChiralTag"],
        atomCompare=MCSParams["AtomCompare"],
        bondCompare=MCSParams["BondCompare"],
        seedSmarts=MCSParams["SeedSMARTS"])

    if MCSResultObject.canceled:
        MiscUtil.PrintError(
            "MCS failed to identify a core scaffold. Specify a different set of parameters using \"-m, --mcsParams\" option and try again."
        )

    CoreNumAtoms = MCSResultObject.numAtoms
    CoreNumBonds = MCSResultObject.numBonds
    SMARTSCore = MCSResultObject.smartsString

    if not len(SMARTSCore):
        MiscUtil.PrintError(
            "MCS failed to identify a core scaffold. Specify a different set of parameters using \"-m, --mcsParams\" option and try again."
        )

    MiscUtil.PrintInfo(
        "SMARTS core scaffold: %s\nNumber of atoms in core scaffold: %s\nNumber of bonds in core scaffold: %s"
        % (SMARTSCore, CoreNumAtoms, CoreNumBonds))

    if CoreNumAtoms < MCSParams["MinNumAtoms"]:
        MiscUtil.PrintError(
            "Number of atoms, %d, in core scaffold identified by MCS is less than, %d, as specified by \"minNumAtoms\" parameter in  \"-m, --mcsParams\" option."
            % (CoreNumAtoms, MCSParams["MinNumAtoms"]))

    if CoreNumBonds < MCSParams["MinNumBonds"]:
        MiscUtil.PrintError(
            "Number of bonds, %d, in core scaffold identified by MCS is less than, %d, as specified by \"minNumBonds\" parameter in  \"-m, --mcsParams\" option."
            % (CoreNumBonds, MCSParams["MinNumBonds"]))

    CoreMol = Chem.MolFromSmarts(SMARTSCore)
    CoreMols.append(CoreMol)

    return CoreMols
Esempio n. 18
0
def ProcessOptions():
    """Process and validate command line arguments and options"""
    
    MiscUtil.PrintInfo("Processing options...")
    
    # Validate options...
    ValidateOptions()
    
    OptionsInfo["Infile"] = Options["--infile"]
    OptionsInfo["InfileParams"] = MiscUtil.ProcessOptionInfileParameters("--infileParams", Options["--infileParams"], Options["--infile"])
    
    OptionsInfo["Outfile"] = Options["--outfile"]
    OptionsInfo["OutfileParams"] = MiscUtil.ProcessOptionOutfileParameters("--outfileParams", Options["--outfileParams"], Options["--infile"], Options["--outfile"])

    OptionsInfo["Overwrite"] = Options["--overwrite"]

    OptionsInfo["CountMode"] = False
    if re.match("^count$", Options["--mode"], re.I):
        OptionsInfo["CountMode"] = True
        
    OptionsInfo["MPMode"] = True if re.match("^yes$", Options["--mp"], re.I) else False
    OptionsInfo["MPParams"] = MiscUtil.ProcessOptionMultiprocessingParameters("--mpParams", Options["--mpParams"])

    SaltsByComponentsMode = False
    SaltsBySMARTSFileMode = False
    SaltsBySMARTSMode = False
    if re.match("^ByComponent$", Options["--saltsMode"], re.I):
        SaltsByComponentsMode = True
    elif re.match("^BySMARTSFile$", Options["--saltsMode"], re.I):
        SaltsBySMARTSFileMode = False
    elif re.match("^BySMARTS$", Options["--saltsMode"], re.I):
        SaltsBySMARTSMode = True
    else:
        MiscUtil.PrintError("The salts mode specified, %s, using \"--saltsMode\" option is not valid." % Options["--saltsMode"])
    OptionsInfo["SaltsByComponentsMode"]  = SaltsByComponentsMode
    OptionsInfo["SaltsBySMARTSFileMode"]  = SaltsBySMARTSFileMode
    OptionsInfo["SaltsBySMARTSMode"]  = SaltsBySMARTSMode

    SaltsFile = None
    if re.match("^BySMARTSFile$", Options["--saltsMode"], re.I):
        if not re.match("^auto$", Options["--saltsFile"], re.I):
            SaltsFile = Options["--saltsFile"]
    OptionsInfo["SaltsFile"] = SaltsFile
    
    SaltsSMARTS = None
    if re.match("^BySMARTS$", Options["--saltsMode"], re.I):
        if not Options["--saltsSMARTS"]:
            MiscUtil.PrintError("No salts SMARTS pattern specified using \"--saltsSMARTS\" option during \"BySMARTS\" value of \"-s, --saltsMode\" option")
        SaltsSMARTS = Options["--saltsSMARTS"].strip(" ")
        if not len(SaltsSMARTS):
            MiscUtil.PrintError("Empty SMARTS pattern specified using \"--saltsSMARTS\" option during \"BySMARTS\" value of \"-s, --saltsMode\" option")
        if re.search(" ", SaltsSMARTS):
            SaltsSMARTS = re.sub('[ ]+', '\n', SaltsSMARTS)
        
    OptionsInfo["SaltsSMARTS"] = SaltsSMARTS
def ValidateOptions():
    """Validate option values"""
    
    MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"])
    MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "sdf sd mol smi csv tsv txt")

    MiscUtil.ValidateOptionsOutputFileOverwrite("-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"])
    MiscUtil.ValidateOptionsDistinctFileNames("-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"])
    
    if not re.match("^None$", Options["--alignmentSMARTS"], re.I):
        PatternMol = Chem.MolFromSmarts(Options["--alignmentSMARTS"])
        if PatternMol is None:
            MiscUtil.PrintError("The value specified, %s, using option \"--alignmentSMARTS\" is not a valid SMARTS: Failed to create pattern molecule" % Options["--alignmentSMARTS"])
    
    MiscUtil.ValidateOptionIntegerValue("--atomLabelFontSize", Options["--atomLabelFontSize"], {">": 0})
    MiscUtil.ValidateOptionFloatValue("-b, --bondLineWidth", Options["--bondLineWidth"], {">": 0.0})
    
    MiscUtil.ValidateOptionTextValue("--compute2DCoords", Options["--compute2DCoords"], "yes no auto")
    
    MiscUtil.ValidateOptionTextValue("--counterCol", Options["--counterCol"], "yes no")
    MiscUtil.ValidateOptionTextValue("--colVisibility", Options["--colVisibility"], "yes no")
    
    MiscUtil.ValidateOptionTextValue("--f, -fontBold", Options["--fontBold"], "yes no")
    
    if not re.match("^None$", Options["--highlightSMARTS"], re.I):
        PatternMol = Chem.MolFromSmarts(Options["--highlightSMARTS"])
        if PatternMol is None:
            MiscUtil.PrintError("The value specified, %s, using option \"--highlightSMARTS\" is not a valid SMARTS: Failed to create pattern molecule" % Options["--highlightSMARTS"])
    
    MiscUtil.ValidateOptionTextValue("--kekulize", Options["--kekulize"], "yes no")
    
    MiscUtil.ValidateOptionTextValue("-k, --keysNavigation", Options["--keysNavigation"], "yes no")
    
    MiscUtil.ValidateOptionNumberValues("-m, --molImageSize", Options["--molImageSize"], 2, ",", "integer", {">": 0})
    MiscUtil.ValidateOptionTextValue("--molImageEncoded", Options["--molImageEncoded"], "yes no")
    
    MiscUtil.ValidateOptionIntegerValue("--numOfMolsPerRow", Options["--numOfMolsPerRow"], {">": 0})
    
    MiscUtil.ValidateOptionTextValue("-p, --paging", Options["--paging"], "yes no")
    MiscUtil.ValidateOptionIntegerValue("--pageLength", Options["--pageLength"], {">": 0})
    
    MiscUtil.ValidateOptionTextValue("--popover", Options["--popover"], "yes no")
    MiscUtil.ValidateOptionIntegerValue("--popoverDataCount", Options["--popoverDataCount"], {">": 0})
    MiscUtil.ValidateOptionIntegerValue("--popoverTextWidth", Options["--popoverTextWidth"], {">": 0})
    
    MiscUtil.ValidateOptionTextValue("--showMolName", Options["--showMolName"], "yes no")
    
    MiscUtil.ValidateOptionTextValue("--scrollX", Options["--scrollX"], "yes no")
    MiscUtil.ValidateOptionTextValue("--scrollY", Options["--scrollY"], "yes no")
    if not re.search("vh$", Options["--scrollYSize"], re.I):
        MiscUtil.ValidateOptionIntegerValue("--scrollYSize", Options["--scrollYSize"], {">": 0})

    MiscUtil.ValidateOptionTextValue("--tableFooter", Options["--tableFooter"], "yes no")
    MiscUtil.ValidateOptionTextValue("--tableHeader", Options["--tableHeader"], "yes no")
Esempio n. 20
0
def RetrievePAINSPatterns(PAINSFilterMode):
    """Retrieve PAINS patterns for specified PAINS mode"""

    MayaChemToolsDataDir = MiscUtil.GetMayaChemToolsLibDataPath()
    PAINSFiltersFilePath = os.path.join(MayaChemToolsDataDir,
                                        "PAINSFilters.csv")

    MiscUtil.PrintInfo(
        "\nRetrieving PAINS SMARTS patterns for PAINS filter type, %s, from file %s"
        % (PAINSFilterMode, PAINSFiltersFilePath))

    if not os.path.exists(PAINSFiltersFilePath):
        MiscUtil.PrintError("The PAINS filters file, %s, doesn't exist.\n" %
                            (PAINSFiltersFilePath))

    FilterFile = open(PAINSFiltersFilePath, "r")
    if FilterFile is None:
        MiscUtil.PrintError("Couldn't open PAINS filter file: %s.\n" %
                            (PAINSFiltersFilePath))

    # Collect all PAINS filter lines...
    HeaderLine = True
    FiltersLines = []
    for Line in FilterFile:
        Line = Line.rstrip()
        # Ignore comments...
        if re.match("^#", Line, re.I):
            continue
        # Ignore header line...
        if HeaderLine:
            HeaderLine = False
            continue
        FiltersLines.append(Line)

    # Process PAINS filter lines using csv reader...
    SMARTSPatterns = []

    FiltersReader = csv.reader(FiltersLines, delimiter=',', quotechar='"')
    for LineWords in FiltersReader:
        FilterType = LineWords[0]
        ID = LineWords[1]
        SMARTS = LineWords[2]

        if re.match("^All$", PAINSFilterMode,
                    re.I) or FilterType.lower() == PAINSFilterMode.lower():
            SMARTSPatterns.append(SMARTS)

    FilterFile.close()

    MiscUtil.PrintInfo("Total number of PAINS SMARTS patterns: %d" %
                       (len(SMARTSPatterns)))

    return SMARTSPatterns
Esempio n. 21
0
def ProcessSpecifiedFunctionalGroups():
    """Process and validate specified functional groups"""

    OptionsInfo["SpecifiedFunctionalGroups"] = []
    OptionsInfo["SpecifiedFunctionalGroupsNegateMatch"] = []

    if re.match("^All$", OptionsInfo["FunctionalGroups"], re.I):
        OptionsInfo["SpecifiedFunctionalGroups"] = FunctionalGroupsMap['Names']
        OptionsInfo["SpecifiedFunctionalGroupsNegateMatch"] = [False] * len(
            OptionsInfo["SpecifiedFunctionalGroups"])
        return

    # Set up a map of valid group names for checking specified group names...
    CanonicalGroupNameMap = {}
    for GroupName in FunctionalGroupsMap['Names']:
        CanonicalGroupNameMap[GroupName.lower()] = GroupName

    # Parse and validate specified names...
    GroupNames = re.sub(" ", "", OptionsInfo["FunctionalGroups"])
    if not GroupNames:
        MiscUtil.PrintError(
            "No functional group name specified for \"-f, --functionalGroups\" option"
        )

    SpecifiedFunctionalGroups = []
    SpecifiedNegateMatchStatus = []

    for GroupName in GroupNames.split(","):
        CanonicalGroupName = GroupName.lower()
        NegateMatchStatus = False
        if re.match("^!", CanonicalGroupName, re.I):
            NegateMatchStatus = True
            CanonicalGroupName = re.sub("^!", "", CanonicalGroupName)
        if CanonicalGroupName in CanonicalGroupNameMap:
            SpecifiedFunctionalGroups.append(
                CanonicalGroupNameMap[CanonicalGroupName])
            SpecifiedNegateMatchStatus.append(NegateMatchStatus)
        else:
            MiscUtil.PrintWarning(
                "The functional group name, %s, specified using \"-f, --functionalGroups\" option is not a valid name."
                % (GroupName))

    if not len(SpecifiedFunctionalGroups):
        MiscUtil.PrintError(
            "No valid functional group names specified for \"-f, --functionalGroups\" option"
        )

    OptionsInfo["SpecifiedFunctionalGroups"] = SpecifiedFunctionalGroups
    OptionsInfo[
        "SpecifiedFunctionalGroupsNegateMatch"] = SpecifiedNegateMatchStatus
Esempio n. 22
0
def ValidateOptions():
    """Validate option values"""

    MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"])
    MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"],
                                   "sdf sd smi txt csv tsv")

    MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"],
                                   "sdf sd smi")
    if re.match("^filter$", Options["--mode"], re.I):
        MiscUtil.ValidateOptionsOutputFileOverwrite("-o, --outfile",
                                                    Options["--outfile"],
                                                    "--overwrite",
                                                    Options["--overwrite"])
        MiscUtil.ValidateOptionsDistinctFileNames("-i, --infile",
                                                  Options["--infile"],
                                                  "-o, --outfile",
                                                  Options["--outfile"])

    MiscUtil.ValidateOptionTextValue("--outfileFiltered",
                                     Options["--outfileFiltered"], "yes no")

    MiscUtil.ValidateOptionTextValue("-m, --mode", Options["--mode"],
                                     "filter count")
    if re.match("^filter$", Options["--mode"], re.I):
        if not Options["--outfile"]:
            MiscUtil.PrintError(
                "The outfile must be specified using \"-o, --outfile\" during \"filter\" value of \"-m, --mode\" option"
            )

    MiscUtil.ValidateOptionTextValue("--mp", Options["--mp"], "yes no")
    MiscUtil.ValidateOptionTextValue("-n, --negate", Options["--negate"],
                                     "yes no")
Esempio n. 23
0
def RetrieveRefFileInfo():
    """Retrieve information for ref file."""

    RefFileInfo = {}

    RefFile = OptionsInfo["RefFileName"]

    FileDir, FileName, FileExt = MiscUtil.ParseFileName(RefFile)
    RefFileRoot = FileName

    if re.match("^FirstInputFile$", OptionsInfo["AlignRefFile"], re.I):
        ChainIDs = OptionsInfo["InfilesInfo"]["ChainIDs"][0]
    else:
        MiscUtil.PrintInfo(
            "\nRetrieving chains information for alignment reference file %s..."
            % RefFile)
        ChainIDs = RetrieveChainIDs(RefFile, RefFileRoot)
        if not len(ChainIDs):
            if re.match("^FirstChain$", OptionsInfo["AlignMode"], re.I):
                MiscUtil.PrintError(
                    "The align mode, %s, can't be used for aligning chains: No non-empty chain IDs found in input file."
                    % (OptionsInfo["AlignMode"]))

    RefFileInfo["RefFileName"] = RefFile
    RefFileInfo["RefFileRoot"] = RefFileRoot
    RefFileInfo["PyMOLObjectName"] = "AlignRef_%s" % RefFileRoot
    RefFileInfo["ChainIDs"] = ChainIDs

    OptionsInfo["RefFileInfo"] = RefFileInfo
Esempio n. 24
0
def AlignInputObject(FileIndex):
    """Align input object to reference object."""

    RefName = OptionsInfo["RefFileInfo"]["PyMOLObjectName"]
    FitName = OptionsInfo["InfilesInfo"]["PyMOLObjectNames"][FileIndex]

    MiscUtil.PrintInfo("\nAligning %s to %s..." % (FitName, RefName))

    if re.match("^FirstChain$", OptionsInfo["AlignMode"], re.I):
        RefFirstChainID = OptionsInfo["RefFileInfo"]["ChainIDs"][0]
        RefSelection = "(%s and chain %s)" % (RefName, RefFirstChainID)

        FitFirstChainID = RetrieveFirstChainID(FileIndex)
        FitSelection = "(%s and chain %s)" % (FitName, FitFirstChainID)
    else:
        RefSelection = RefName
        FitSelection = FitName

    if re.match("^align$", OptionsInfo["AlignMethod"], re.I):
        pymol.cmd.align(FitSelection, RefSelection)
    elif re.match("^cealign$", OptionsInfo["AlignMethod"], re.I):
        pymol.cmd.cealign(RefSelection, FitSelection)
    elif re.match("^super$", OptionsInfo["AlignMethod"], re.I):
        pymol.cmd.super(FitSelection, RefSelection)
    else:
        MiscUtil.PrintError("Invalid alignment method: %s" %
                            OptionsInfo["AlignMethod"])
def GetInterfaceChainsAndResiduesInfo(MolName1, ChainIDs1, MolName2, ChainIDs2,
                                      Method, Cutoff):
    """Get interface chains and residues info for chains using a specified methodology."""

    InterfaceChainsResiduesInfo1 = None
    InterfaceChainsResiduesInfo2 = None

    ChainNames1 = ",".join(ChainIDs1)
    ChainNames2 = ",".join(ChainIDs2)

    if re.match("^BySASAChange$", Method, re.I):
        InterfaceChainsResiduesInfo1, InterfaceChainsResiduesInfo2 = PyMOLUtil.GetInterfaceChainsResiduesBySASAChange(
            MolName1, ChainNames1, MolName2, ChainNames2, Cutoff)
    elif re.match("^ByHeavyAtomsDistance$", Method, re.I):
        InterfaceChainsResiduesInfo1, InterfaceChainsResiduesInfo2 = PyMOLUtil.GetnterfaceChainsResiduesByHeavyAtomsDistance(
            MolName1, ChainNames1, MolName2, ChainNames2, Cutoff)
    elif re.match("^ByCAlphaAtomsDistance$", Method, re.I):
        InterfaceChainsResiduesInfo1, InterfaceChainsResiduesInfo2 = PyMOLUtil.GetInterfaceChainsResiduesByCAlphaAtomsDistance(
            MolName1, ChainNames1, MolName2, ChainNames2, Cutoff)
    else:
        MiscUtil.PrintError(
            "Failed to retrieve interface residues information: Method %s is not valid..."
            % Method)

    return InterfaceChainsResiduesInfo1, InterfaceChainsResiduesInfo2
Esempio n. 26
0
def ValidateOptions():
    """Validate option values"""

    MiscUtil.ValidateOptionTextValue("--alignMethod", Options["--alignMethod"],
                                     "align cealign super")
    MiscUtil.ValidateOptionTextValue("--alignMode", Options["--alignMode"],
                                     "FirstChain Complex")

    # Expand infiles to handle presence of multiple input files...
    InfileNames = MiscUtil.ExpandFileNames(Options["--infiles"], ",")
    if len(InfileNames) < 2:
        MiscUtil.PrintError(
            "Number of input files specified for \"-i, --infiles\" option, %d, must be greater than 2..."
            % (len(InfileNames)))

    # Validate file extensions...
    for Infile in InfileNames:
        MiscUtil.ValidateOptionFilePath("-i, --infiles", Infile)
        MiscUtil.ValidateOptionFileExt("-i, --infiles", Infile, "pdb cif")
    Options["--infileNames"] = InfileNames

    if not re.match("^FirstInputFile$", Options["--alignRefFile"], re.I):
        AlignRefFile = Options["--alignRefFile"]
        MiscUtil.ValidateOptionFilePath("--alignRefFile", AlignRefFile)
        MiscUtil.ValidateOptionFileExt("--alignRefFile", AlignRefFile,
                                       "pdb cif")
def ProcessClusteringMethodParameter():
    """Process specified clustering method parameter."""

    OptionsInfo["SpecifiedHierarchicalClusteringMethod"] = ""
    OptionsInfo["SpecifiedHierarchicalClusteringMethodID"] = ""
    
    if re.match("^Butina$", OptionsInfo["ClusteringMethod"], re.I):
        # Nothing to process...
        return

    # Setup a canonical cluster method name map..
    ClusteringMethodInfoMap = {}
    CanonicalClusteringMethodNameMap = {}
    for Name in sorted(rdSimDivPickers.ClusterMethod.names):
        NameID =  rdSimDivPickers.ClusterMethod.names[Name]
        ClusteringMethodInfoMap[Name] = NameID
        
        CanonicalName = Name.lower()
        CanonicalClusteringMethodNameMap[CanonicalName] = Name

    CanonicalName = OptionsInfo["ClusteringMethod"].lower()
    if not CanonicalName in CanonicalClusteringMethodNameMap:
        MiscUtil.PrintError("The clustering method, %s, specified using \"-c, --clusteringMethod\" option is not a valid name." % (OptionsInfo["ClusteringMethod"]))

    SpecifiedHierarchicalClusteringMethodName = CanonicalClusteringMethodNameMap[CanonicalName]
    OptionsInfo["SpecifiedHierarchicalClusteringMethod"] = SpecifiedHierarchicalClusteringMethodName
    OptionsInfo["SpecifiedHierarchicalClusteringMethodID"] = ClusteringMethodInfoMap[SpecifiedHierarchicalClusteringMethodName] 
def GetEnergy(Mol, ConfID = None):
    "Calculate energy."

    Status = True
    Energy = None

    if ConfID is None:
        ConfID = -1
    
    if OptionsInfo["UseUFF"]:
        UFFMoleculeForcefield = AllChem.UFFGetMoleculeForceField(Mol, confId = ConfID)
        if UFFMoleculeForcefield is None:
            Status = False
        else:
            Energy = UFFMoleculeForcefield.CalcEnergy()
    elif OptionsInfo["UseMMFF"]:
        MMFFMoleculeProperties = AllChem.MMFFGetMoleculeProperties(Mol, mmffVariant = OptionsInfo["MMFFVariant"])
        MMFFMoleculeForcefield = AllChem.MMFFGetMoleculeForceField(Mol, MMFFMoleculeProperties, confId = ConfID)
        if MMFFMoleculeForcefield is None:
            Status = False
        else:
            Energy = MMFFMoleculeForcefield.CalcEnergy()
    else:
        MiscUtil.PrintError("Couldn't retrieve conformer energy: Specified forcefield, %s, is not supported" % OptionsInfo["ForceField"])
    
    return (Status, Energy)
def PerformConstrainedMinimization():
    """Perform constrained minimization."""
    
    # Read and validate reference molecule...
    RefMol = RetrieveReferenceMolecule()
    
    # Setup a molecule reader for input file...
    MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["Infile"])
    OptionsInfo["InfileParams"]["AllowEmptyMols"] = True
    Mols  = RDKitUtil.ReadMolecules(OptionsInfo["Infile"], **OptionsInfo["InfileParams"])

    # Set up a molecule writer...
    Writer = RDKitUtil.MoleculesWriter(OptionsInfo["Outfile"], **OptionsInfo["OutfileParams"])
    if Writer is None:
        MiscUtil.PrintError("Failed to setup a writer for output fie %s " % OptionsInfo["Outfile"])
    MiscUtil.PrintInfo("Generating file %s..." % OptionsInfo["Outfile"])

    MolCount, ValidMolCount, CoreScaffoldMissingCount, MinimizationFailedCount = ProcessMolecules(RefMol, Mols, Writer)

    if Writer is not None:
        Writer.close()
    
    MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount)
    MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount)
    MiscUtil.PrintInfo("Number of molecules with missing core scaffold: %d" % CoreScaffoldMissingCount)
    MiscUtil.PrintInfo("Number of molecules failed during conformation generation or minimization: %d" % MinimizationFailedCount)
    MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount + CoreScaffoldMissingCount + MinimizationFailedCount))
def SetupCoreScaffoldsBySMARTSOrSMILES():
    """Setup core scaffold molecules(s) using specified SMARTS or SMILES."""

    BySMARTS = True if re.match("^BySMARTS$", OptionsInfo["CoreScaffold"],
                                re.I) else False
    CoreScaffoldList = OptionsInfo["SMARTSOrSMILESCoreScaffoldList"]

    if BySMARTS:
        MiscUtil.PrintInfo(
            "\nSetting up core scaffold(s) using SMARTS...\nSMARTS core scaffold(s): %s"
            % " ".join(CoreScaffoldList))
    else:
        MiscUtil.PrintInfo(
            "\nSetting up core scaffold(s) using SMILES...\nSMILES core scaffold(s): %s"
            % " ".join(CoreScaffoldList))

    CoreMols = []
    for Core in CoreScaffoldList:
        if BySMARTS:
            CoreMol = Chem.MolFromSmarts(Core)
        else:
            CoreMol = Chem.MolFromSmiles(Core)
        if CoreMol is None:
            MiscUtil.PrintError(
                "Failed to generate mol for core scaffold: %s" % (Core))
        CoreMols.append(CoreMol)

    return CoreMols