def ProcessMoleculesUsingMultipleProcesses(Mols, Writer):
    """Process molecules and calculate descriptors using multiprocessing."""

    DescriptorsCount = len(OptionsInfo["SpecifiedDescriptorNames"])
    MiscUtil.PrintInfo(
        "\nCalculating %d molecular %s for each molecule using multiprocessing......"
        % (DescriptorsCount,
           ("descroptors" if DescriptorsCount > 1 else "descriptor")))

    MPParams = OptionsInfo["MPParams"]

    # Setup data for initializing a worker process...
    MiscUtil.PrintInfo("Encoding options info...")
    InitializeWorkerProcessArgs = (
        MiscUtil.ObjectToBase64EncodedString(Options),
        MiscUtil.ObjectToBase64EncodedString(OptionsInfo))

    # Setup a encoded mols data iterable for a worker process...
    WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStrings(Mols)

    # Setup process pool along with data initialization for each process...
    MiscUtil.PrintInfo(
        "\nConfiguring multiprocessing using %s method..." %
        ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"],
                                      re.I) else "mp.Pool.map()"))
    MiscUtil.PrintInfo("NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n" %
                       (MPParams["NumProcesses"], MPParams["InputDataMode"],
                        ("automatic" if MPParams["ChunkSize"] is None else
                         MPParams["ChunkSize"])))

    ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeWorkerProcess,
                          InitializeWorkerProcessArgs)

    # Start processing...
    if re.match("^Lazy$", MPParams["InputDataMode"], re.I):
        Results = ProcessPool.imap(WorkerProcess, WorkerProcessDataIterable,
                                   MPParams["ChunkSize"])
    elif re.match("^InMemory$", MPParams["InputDataMode"], re.I):
        Results = ProcessPool.map(WorkerProcess, WorkerProcessDataIterable,
                                  MPParams["ChunkSize"])
    else:
        MiscUtil.PrintError(
            "The value, %s, specified for \"--inputDataMode\" is not supported."
            % (MPParams["InputDataMode"]))

    (MolCount, ValidMolCount) = [0] * 2
    for Result in Results:
        MolCount += 1
        MolIndex, EncodedMol, CalculatedValues = Result

        if EncodedMol is None:
            continue
        ValidMolCount += 1

        Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)

        # Write descriptor values...
        WriteDescriptorValues(Mol, MolCount, Writer, CalculatedValues)

    return (MolCount, ValidMolCount)
Beispiel #2
0
def ProcessMoleculesUsingMultipleProcesses(Mols, Writer):
    """Process and remove salts from molecules using  multiprocessing."""
    
    MiscUtil.PrintInfo("\nRemoving salts using multiprocessing...")
    
    MPParams = OptionsInfo["MPParams"]
    Compute2DCoords = OptionsInfo["OutfileParams"]["Compute2DCoords"]
    
    # Setup data for initializing a worker process...
    InitializeWorkerProcessArgs = (MiscUtil.ObjectToBase64EncodedString(Options), MiscUtil.ObjectToBase64EncodedString(OptionsInfo))

    # Setup a encoded mols data iterable for a worker process by pickling only public
    # and private molecule properties...
    WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStrings(Mols)

    # Setup process pool along with data initialization for each process...
    MiscUtil.PrintInfo("\nConfiguring multiprocessing using %s method..." % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()"))
    MiscUtil.PrintInfo("NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n" % (MPParams["NumProcesses"], MPParams["InputDataMode"], ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"])))
    
    ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeWorkerProcess, InitializeWorkerProcessArgs)
    
    # Start processing...
    if re.match("^Lazy$", MPParams["InputDataMode"], re.I):
        Results = ProcessPool.imap(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
    elif re.match("^InMemory$", MPParams["InputDataMode"], re.I):
        Results = ProcessPool.map(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
    else:
        MiscUtil.PrintError("The value, %s, specified for \"--inputDataMode\" is not supported." % (MPParams["InputDataMode"]))
    
    SetSMILESMolProps = OptionsInfo["OutfileParams"]["SetSMILESMolProps"]
    
    (MolCount, ValidMolCount, SaltsMolCount) = [0] * 3
    FirstMol = True
    for Result in Results:
        MolCount += 1
        MolIndex, EncodedMol, SaltyStatus = Result
        
        if EncodedMol is None:
            continue
        ValidMolCount += 1
        
        Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
        
        if FirstMol:
            FirstMol = False
            if SetSMILESMolProps:
                RDKitUtil.SetWriterMolProps(Writer, Mol)
        
        if SaltyStatus:
            SaltsMolCount += 1

        WriteMolecule(Writer, Mol, Compute2DCoords)
    
    return (MolCount, ValidMolCount, SaltsMolCount)
Beispiel #3
0
def ProcessMoleculesUsingMultipleProcesses(Mols, Writer):
    """Process and calculate energy of molecules using  process."""
    
    MiscUtil.PrintInfo("\nCalculating energy using multiprocessing...")
    
    MPParams = OptionsInfo["MPParams"]
    
    # Setup data for initializing a worker process...
    InitializeWorkerProcessArgs = (MiscUtil.ObjectToBase64EncodedString(Options), MiscUtil.ObjectToBase64EncodedString(OptionsInfo))
    
    # Setup a encoded mols data iterable for a worker process...
    WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStrings(Mols)

    # Setup process pool along with data initialization for each process...
    MiscUtil.PrintInfo("\nConfiguring multiprocessing using %s method..." % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()"))
    MiscUtil.PrintInfo("NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n" % (MPParams["NumProcesses"], MPParams["InputDataMode"], ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"])))
    
    ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeWorkerProcess, InitializeWorkerProcessArgs)
    
    # Start processing...
    if re.match("^Lazy$", MPParams["InputDataMode"], re.I):
        Results = ProcessPool.imap(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
    elif re.match("^InMemory$", MPParams["InputDataMode"], re.I):
        Results = ProcessPool.map(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
    else:
        MiscUtil.PrintError("The value, %s, specified for \"--inputDataMode\" is not supported." % (MPParams["InputDataMode"]))
    
    (MolCount, ValidMolCount, EnergyFailedCount) = [0] * 3
    for Result in Results:
        MolCount += 1
        MolIndex, EncodedMol, CalcStatus, Energy = Result
        
        if EncodedMol is None:
            continue
        ValidMolCount += 1

        if CalcStatus:
            Energy = "%.2f" % Energy
        else:
            if not OptionsInfo["QuietMode"]:
                MolName = RDKitUtil.GetMolName(Mol, MolCount)
                MiscUtil.PrintWarning("Failed to calculate energy for molecule %s" % MolName)
            
            EnergyFailedCount += 1
            continue
        
        Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
        WriteMolecule(Writer, Mol, Energy)
    
    return (MolCount, ValidMolCount, EnergyFailedCount)
def ProcessMoleculesUsingMultipleProcesses(RefMol, Mols, Writer):
    """Process and minimize molecules using multiprocessing."""

    MPParams = OptionsInfo["MPParams"]
    
    # Setup data for initializing a worker process...
    MiscUtil.PrintInfo("Encoding options info and reference molecule...")
    
    OptionsInfo["EncodedRefMol"] = RDKitUtil.MolToBase64EncodedMolString(RefMol)
    InitializeWorkerProcessArgs = (MiscUtil.ObjectToBase64EncodedString(Options), MiscUtil.ObjectToBase64EncodedString(OptionsInfo))
    
    # Setup a encoded mols data iterable for a worker process...
    WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStrings(Mols)

    # Setup process pool along with data initialization for each process...
    MiscUtil.PrintInfo("\nConfiguring multiprocessing using %s method..." % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()"))
    MiscUtil.PrintInfo("NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n" % (MPParams["NumProcesses"], MPParams["InputDataMode"], ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"])))
    
    ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeWorkerProcess, InitializeWorkerProcessArgs)
    
    # Start processing...
    if re.match("^Lazy$", MPParams["InputDataMode"], re.I):
        Results = ProcessPool.imap(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
    elif re.match("^InMemory$", MPParams["InputDataMode"], re.I):
        Results = ProcessPool.map(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
    else:
        MiscUtil.PrintError("The value, %s, specified for \"--inputDataMode\" is not supported." % (MPParams["InputDataMode"]))
    
    (MolCount, ValidMolCount, CoreScaffoldMissingCount, MinimizationFailedCount) = [0] * 4
    for Result in Results:
        MolCount += 1
        MolIndex, EncodedMol, CoreScaffoldMissingStatus, CalcStatus, Energy, ScaffoldEmbedRMSD  = Result
        
        if EncodedMol is None:
            continue
        ValidMolCount += 1

        if CoreScaffoldMissingStatus:
            CoreScaffoldMissingStatus += 1
            continue
        
        if not CalcStatus:
            MinimizationFailedCount += 1
            continue
            
        Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
        WriteMolecule(Writer, Mol, Energy, ScaffoldEmbedRMSD)
    
    return (MolCount, ValidMolCount, CoreScaffoldMissingCount, MinimizationFailedCount)
Beispiel #5
0
def ProcessMoleculesUsingMultipleProcesses(Mols, GroupsPatternMols, Writer,
                                           GroupOutfilesWriters):
    """Process and search molecules using multiprocessing."""

    MiscUtil.PrintInfo(
        "\nSearching functional groups  using multiprocessing...")

    MPParams = OptionsInfo["MPParams"]
    Compute2DCoords = OptionsInfo["OutfileParams"]["Compute2DCoords"]
    CombineMatchResults = OptionsInfo["CombineMatchResults"]
    SetSMILESMolProps = OptionsInfo["OutfileParams"]["SetSMILESMolProps"]

    # Setup data for initializing a worker process...
    MiscUtil.PrintInfo(
        "Encoding options info and functional groups pattern molecules...")
    OptionsInfo["EncodedGroupPatternMols"] = [
        RDKitUtil.MolToBase64EncodedMolString(PatternMol)
        for PatternMol in GroupsPatternMols
    ]
    InitializeWorkerProcessArgs = (
        MiscUtil.ObjectToBase64EncodedString(Options),
        MiscUtil.ObjectToBase64EncodedString(OptionsInfo),
        MiscUtil.ObjectToBase64EncodedString(FunctionalGroupsMap))

    # Setup a encoded mols data iterable for a worker process...
    WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStrings(Mols)

    # Setup process pool along with data initialization for each process...
    MiscUtil.PrintInfo(
        "\nConfiguring multiprocessing using %s method..." %
        ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"],
                                      re.I) else "mp.Pool.map()"))
    MiscUtil.PrintInfo("NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n" %
                       (MPParams["NumProcesses"], MPParams["InputDataMode"],
                        ("automatic" if MPParams["ChunkSize"] is None else
                         MPParams["ChunkSize"])))

    ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeWorkerProcess,
                          InitializeWorkerProcessArgs)

    # Start processing...
    if re.match("^Lazy$", MPParams["InputDataMode"], re.I):
        Results = ProcessPool.imap(WorkerProcess, WorkerProcessDataIterable,
                                   MPParams["ChunkSize"])
    elif re.match("^InMemory$", MPParams["InputDataMode"], re.I):
        Results = ProcessPool.map(WorkerProcess, WorkerProcessDataIterable,
                                  MPParams["ChunkSize"])
    else:
        MiscUtil.PrintError(
            "The value, %s, specified for \"--inputDataMode\" is not supported."
            % (MPParams["InputDataMode"]))

    GroupsPatternsMatchCountList = [0] * len(
        OptionsInfo["SpecifiedFunctionalGroups"])

    (MolCount, ValidMolCount, RemainingMolCount) = [0] * 3

    FirstMol = True
    for Result in Results:
        MolCount += 1
        MolIndex, EncodedMol, MolMatched, GroupsPatternMatchStatusList = Result

        if EncodedMol is None:
            continue
        ValidMolCount += 1

        Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)

        if FirstMol:
            FirstMol = False
            if SetSMILESMolProps:
                if Writer is not None:
                    RDKitUtil.SetWriterMolProps(Writer, Mol)
                for GroupOutfileWriter in GroupOutfilesWriters:
                    if GroupOutfileWriter is not None:
                        RDKitUtil.SetWriterMolProps(GroupOutfileWriter, Mol)

        # Update functional group match count...
        for GroupIndex, MatchStatus in enumerate(GroupsPatternMatchStatusList):
            if MatchStatus:
                GroupsPatternsMatchCountList[GroupIndex] += 1

        if not MolMatched:
            continue

        RemainingMolCount += 1
        WriteMolecule(Writer, GroupOutfilesWriters, Mol, Compute2DCoords,
                      CombineMatchResults, GroupsPatternMatchStatusList)

    return (MolCount, ValidMolCount, RemainingMolCount,
            GroupsPatternsMatchCountList)
Beispiel #6
0
def ProcessMoleculesUsingMultipleProcesses(Mols, ChEMBLPatternMols, Writer,
                                           WriterFiltered):
    """Process and filter molecules using multiprocessing."""

    MiscUtil.PrintInfo("\nFiltering molecules using multiprocessing...")

    MPParams = OptionsInfo["MPParams"]
    NegateMatch = OptionsInfo["NegateMatch"]
    OutfileFilteredMode = OptionsInfo["OutfileFilteredMode"]
    Compute2DCoords = OptionsInfo["OutfileParams"]["Compute2DCoords"]
    SetSMILESMolProps = OptionsInfo["OutfileParams"]["SetSMILESMolProps"]

    # Setup data for initializing a worker process...
    MiscUtil.PrintInfo(
        "Encoding options info and ChEMBL alert pattern molecules...")
    OptionsInfo["EncodedChEMBLPatternMols"] = [
        RDKitUtil.MolToBase64EncodedMolString(PatternMol)
        for PatternMol in ChEMBLPatternMols
    ]
    InitializeWorkerProcessArgs = (
        MiscUtil.ObjectToBase64EncodedString(Options),
        MiscUtil.ObjectToBase64EncodedString(OptionsInfo))

    # Setup a encoded mols data iterable for a worker process...
    WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStrings(Mols)

    # Setup process pool along with data initialization for each process...
    MiscUtil.PrintInfo(
        "\nConfiguring multiprocessing using %s method..." %
        ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"],
                                      re.I) else "mp.Pool.map()"))
    MiscUtil.PrintInfo("NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n" %
                       (MPParams["NumProcesses"], MPParams["InputDataMode"],
                        ("automatic" if MPParams["ChunkSize"] is None else
                         MPParams["ChunkSize"])))

    ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeWorkerProcess,
                          InitializeWorkerProcessArgs)

    # Start processing...
    if re.match("^Lazy$", MPParams["InputDataMode"], re.I):
        Results = ProcessPool.imap(WorkerProcess, WorkerProcessDataIterable,
                                   MPParams["ChunkSize"])
    elif re.match("^InMemory$", MPParams["InputDataMode"], re.I):
        Results = ProcessPool.map(WorkerProcess, WorkerProcessDataIterable,
                                  MPParams["ChunkSize"])
    else:
        MiscUtil.PrintError(
            "The value, %s, specified for \"--inputDataMode\" is not supported."
            % (MPParams["InputDataMode"]))

    (MolCount, ValidMolCount, RemainingMolCount) = [0] * 3
    FirstMol = True
    for Result in Results:
        MolCount += 1
        MolIndex, EncodedMol, MolMatched = Result

        if EncodedMol is None:
            continue
        ValidMolCount += 1

        Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)

        if FirstMol:
            FirstMol = False
            if SetSMILESMolProps:
                if Writer is not None:
                    RDKitUtil.SetWriterMolProps(Writer, Mol)
                if WriterFiltered is not None:
                    RDKitUtil.SetWriterMolProps(WriterFiltered, Mol)

        if MolMatched == NegateMatch:
            RemainingMolCount += 1
            WriteMolecule(Writer, Mol, Compute2DCoords)
        else:
            if OutfileFilteredMode:
                WriteMolecule(WriterFiltered, Mol, Compute2DCoords)

    return (MolCount, ValidMolCount, RemainingMolCount)
Beispiel #7
0
def ProcessMoleculesUsingMultipleProcesses(Mols, Writer):
    """Process molecules and calculate partial charges using a multiprocessing. """

    MiscUtil.PrintInfo(
        "Calculating partial atomic charges using multiprocessing...")

    MPParams = OptionsInfo["MPParams"]
    Compute2DCoords = OptionsInfo["OutfileParams"]["Compute2DCoords"]

    # Setup data for initializing a worker process...
    InitializeWorkerProcessArgs = (
        MiscUtil.ObjectToBase64EncodedString(Options),
        MiscUtil.ObjectToBase64EncodedString(OptionsInfo))

    # Setup a encoded mols data iterable for a worker process...
    WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStrings(Mols)

    # Setup process pool along with data initialization for each process...
    MiscUtil.PrintInfo(
        "\nConfiguring multiprocessing using %s method..." %
        ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"],
                                      re.I) else "mp.Pool.map()"))
    MiscUtil.PrintInfo("NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n" %
                       (MPParams["NumProcesses"], MPParams["InputDataMode"],
                        ("automatic" if MPParams["ChunkSize"] is None else
                         MPParams["ChunkSize"])))

    ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeWorkerProcess,
                          InitializeWorkerProcessArgs)

    # Start processing...
    if re.match("^Lazy$", MPParams["InputDataMode"], re.I):
        Results = ProcessPool.imap(WorkerProcess, WorkerProcessDataIterable,
                                   MPParams["ChunkSize"])
    elif re.match("^InMemory$", MPParams["InputDataMode"], re.I):
        Results = ProcessPool.map(WorkerProcess, WorkerProcessDataIterable,
                                  MPParams["ChunkSize"])
    else:
        MiscUtil.PrintError(
            "The value, %s, specified for \"--inputDataMode\" is not supported."
            % (MPParams["InputDataMode"]))

    (MolCount, ValidMolCount, CalcFailedCount) = [0] * 3
    for Result in Results:
        MolCount += 1
        MolIndex, EncodedMol, CalcStatus, PartialCharges = Result

        if EncodedMol is None:
            continue
        ValidMolCount += 1

        if not CalcStatus:
            CalcFailedCount += 1
            continue

        MolWithHs = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)

        # Write out charges...
        WriteMolPartialCharges(Writer, MolWithHs, PartialCharges,
                               Compute2DCoords)

    return (MolCount, ValidMolCount, CalcFailedCount)