def ProcessMoleculesUsingMultipleProcesses(Mols, Writer): """Process molecules and calculate descriptors using multiprocessing.""" DescriptorsCount = len(OptionsInfo["SpecifiedDescriptorNames"]) MiscUtil.PrintInfo( "\nCalculating %d molecular %s for each molecule using multiprocessing......" % (DescriptorsCount, ("descroptors" if DescriptorsCount > 1 else "descriptor"))) MPParams = OptionsInfo["MPParams"] # Setup data for initializing a worker process... MiscUtil.PrintInfo("Encoding options info...") InitializeWorkerProcessArgs = ( MiscUtil.ObjectToBase64EncodedString(Options), MiscUtil.ObjectToBase64EncodedString(OptionsInfo)) # Setup a encoded mols data iterable for a worker process... WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStrings(Mols) # Setup process pool along with data initialization for each process... MiscUtil.PrintInfo( "\nConfiguring multiprocessing using %s method..." % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()")) MiscUtil.PrintInfo("NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n" % (MPParams["NumProcesses"], MPParams["InputDataMode"], ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"]))) ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeWorkerProcess, InitializeWorkerProcessArgs) # Start processing... if re.match("^Lazy$", MPParams["InputDataMode"], re.I): Results = ProcessPool.imap(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"]) elif re.match("^InMemory$", MPParams["InputDataMode"], re.I): Results = ProcessPool.map(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"]) else: MiscUtil.PrintError( "The value, %s, specified for \"--inputDataMode\" is not supported." % (MPParams["InputDataMode"])) (MolCount, ValidMolCount) = [0] * 2 for Result in Results: MolCount += 1 MolIndex, EncodedMol, CalculatedValues = Result if EncodedMol is None: continue ValidMolCount += 1 Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol) # Write descriptor values... WriteDescriptorValues(Mol, MolCount, Writer, CalculatedValues) return (MolCount, ValidMolCount)
def ProcessMoleculesUsingMultipleProcesses(Mols, Writer): """Process and remove salts from molecules using multiprocessing.""" MiscUtil.PrintInfo("\nRemoving salts using multiprocessing...") MPParams = OptionsInfo["MPParams"] Compute2DCoords = OptionsInfo["OutfileParams"]["Compute2DCoords"] # Setup data for initializing a worker process... InitializeWorkerProcessArgs = (MiscUtil.ObjectToBase64EncodedString(Options), MiscUtil.ObjectToBase64EncodedString(OptionsInfo)) # Setup a encoded mols data iterable for a worker process by pickling only public # and private molecule properties... WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStrings(Mols) # Setup process pool along with data initialization for each process... MiscUtil.PrintInfo("\nConfiguring multiprocessing using %s method..." % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()")) MiscUtil.PrintInfo("NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n" % (MPParams["NumProcesses"], MPParams["InputDataMode"], ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"]))) ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeWorkerProcess, InitializeWorkerProcessArgs) # Start processing... if re.match("^Lazy$", MPParams["InputDataMode"], re.I): Results = ProcessPool.imap(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"]) elif re.match("^InMemory$", MPParams["InputDataMode"], re.I): Results = ProcessPool.map(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"]) else: MiscUtil.PrintError("The value, %s, specified for \"--inputDataMode\" is not supported." % (MPParams["InputDataMode"])) SetSMILESMolProps = OptionsInfo["OutfileParams"]["SetSMILESMolProps"] (MolCount, ValidMolCount, SaltsMolCount) = [0] * 3 FirstMol = True for Result in Results: MolCount += 1 MolIndex, EncodedMol, SaltyStatus = Result if EncodedMol is None: continue ValidMolCount += 1 Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol) if FirstMol: FirstMol = False if SetSMILESMolProps: RDKitUtil.SetWriterMolProps(Writer, Mol) if SaltyStatus: SaltsMolCount += 1 WriteMolecule(Writer, Mol, Compute2DCoords) return (MolCount, ValidMolCount, SaltsMolCount)
def ProcessMoleculesUsingMultipleProcesses(Mols, Writer): """Process and calculate energy of molecules using process.""" MiscUtil.PrintInfo("\nCalculating energy using multiprocessing...") MPParams = OptionsInfo["MPParams"] # Setup data for initializing a worker process... InitializeWorkerProcessArgs = (MiscUtil.ObjectToBase64EncodedString(Options), MiscUtil.ObjectToBase64EncodedString(OptionsInfo)) # Setup a encoded mols data iterable for a worker process... WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStrings(Mols) # Setup process pool along with data initialization for each process... MiscUtil.PrintInfo("\nConfiguring multiprocessing using %s method..." % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()")) MiscUtil.PrintInfo("NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n" % (MPParams["NumProcesses"], MPParams["InputDataMode"], ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"]))) ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeWorkerProcess, InitializeWorkerProcessArgs) # Start processing... if re.match("^Lazy$", MPParams["InputDataMode"], re.I): Results = ProcessPool.imap(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"]) elif re.match("^InMemory$", MPParams["InputDataMode"], re.I): Results = ProcessPool.map(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"]) else: MiscUtil.PrintError("The value, %s, specified for \"--inputDataMode\" is not supported." % (MPParams["InputDataMode"])) (MolCount, ValidMolCount, EnergyFailedCount) = [0] * 3 for Result in Results: MolCount += 1 MolIndex, EncodedMol, CalcStatus, Energy = Result if EncodedMol is None: continue ValidMolCount += 1 if CalcStatus: Energy = "%.2f" % Energy else: if not OptionsInfo["QuietMode"]: MolName = RDKitUtil.GetMolName(Mol, MolCount) MiscUtil.PrintWarning("Failed to calculate energy for molecule %s" % MolName) EnergyFailedCount += 1 continue Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol) WriteMolecule(Writer, Mol, Energy) return (MolCount, ValidMolCount, EnergyFailedCount)
def ProcessMoleculesUsingMultipleProcesses(RefMol, Mols, Writer): """Process and minimize molecules using multiprocessing.""" MPParams = OptionsInfo["MPParams"] # Setup data for initializing a worker process... MiscUtil.PrintInfo("Encoding options info and reference molecule...") OptionsInfo["EncodedRefMol"] = RDKitUtil.MolToBase64EncodedMolString(RefMol) InitializeWorkerProcessArgs = (MiscUtil.ObjectToBase64EncodedString(Options), MiscUtil.ObjectToBase64EncodedString(OptionsInfo)) # Setup a encoded mols data iterable for a worker process... WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStrings(Mols) # Setup process pool along with data initialization for each process... MiscUtil.PrintInfo("\nConfiguring multiprocessing using %s method..." % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()")) MiscUtil.PrintInfo("NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n" % (MPParams["NumProcesses"], MPParams["InputDataMode"], ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"]))) ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeWorkerProcess, InitializeWorkerProcessArgs) # Start processing... if re.match("^Lazy$", MPParams["InputDataMode"], re.I): Results = ProcessPool.imap(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"]) elif re.match("^InMemory$", MPParams["InputDataMode"], re.I): Results = ProcessPool.map(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"]) else: MiscUtil.PrintError("The value, %s, specified for \"--inputDataMode\" is not supported." % (MPParams["InputDataMode"])) (MolCount, ValidMolCount, CoreScaffoldMissingCount, MinimizationFailedCount) = [0] * 4 for Result in Results: MolCount += 1 MolIndex, EncodedMol, CoreScaffoldMissingStatus, CalcStatus, Energy, ScaffoldEmbedRMSD = Result if EncodedMol is None: continue ValidMolCount += 1 if CoreScaffoldMissingStatus: CoreScaffoldMissingStatus += 1 continue if not CalcStatus: MinimizationFailedCount += 1 continue Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol) WriteMolecule(Writer, Mol, Energy, ScaffoldEmbedRMSD) return (MolCount, ValidMolCount, CoreScaffoldMissingCount, MinimizationFailedCount)
def ProcessMoleculesUsingMultipleProcesses(Mols, GroupsPatternMols, Writer, GroupOutfilesWriters): """Process and search molecules using multiprocessing.""" MiscUtil.PrintInfo( "\nSearching functional groups using multiprocessing...") MPParams = OptionsInfo["MPParams"] Compute2DCoords = OptionsInfo["OutfileParams"]["Compute2DCoords"] CombineMatchResults = OptionsInfo["CombineMatchResults"] SetSMILESMolProps = OptionsInfo["OutfileParams"]["SetSMILESMolProps"] # Setup data for initializing a worker process... MiscUtil.PrintInfo( "Encoding options info and functional groups pattern molecules...") OptionsInfo["EncodedGroupPatternMols"] = [ RDKitUtil.MolToBase64EncodedMolString(PatternMol) for PatternMol in GroupsPatternMols ] InitializeWorkerProcessArgs = ( MiscUtil.ObjectToBase64EncodedString(Options), MiscUtil.ObjectToBase64EncodedString(OptionsInfo), MiscUtil.ObjectToBase64EncodedString(FunctionalGroupsMap)) # Setup a encoded mols data iterable for a worker process... WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStrings(Mols) # Setup process pool along with data initialization for each process... MiscUtil.PrintInfo( "\nConfiguring multiprocessing using %s method..." % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()")) MiscUtil.PrintInfo("NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n" % (MPParams["NumProcesses"], MPParams["InputDataMode"], ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"]))) ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeWorkerProcess, InitializeWorkerProcessArgs) # Start processing... if re.match("^Lazy$", MPParams["InputDataMode"], re.I): Results = ProcessPool.imap(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"]) elif re.match("^InMemory$", MPParams["InputDataMode"], re.I): Results = ProcessPool.map(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"]) else: MiscUtil.PrintError( "The value, %s, specified for \"--inputDataMode\" is not supported." % (MPParams["InputDataMode"])) GroupsPatternsMatchCountList = [0] * len( OptionsInfo["SpecifiedFunctionalGroups"]) (MolCount, ValidMolCount, RemainingMolCount) = [0] * 3 FirstMol = True for Result in Results: MolCount += 1 MolIndex, EncodedMol, MolMatched, GroupsPatternMatchStatusList = Result if EncodedMol is None: continue ValidMolCount += 1 Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol) if FirstMol: FirstMol = False if SetSMILESMolProps: if Writer is not None: RDKitUtil.SetWriterMolProps(Writer, Mol) for GroupOutfileWriter in GroupOutfilesWriters: if GroupOutfileWriter is not None: RDKitUtil.SetWriterMolProps(GroupOutfileWriter, Mol) # Update functional group match count... for GroupIndex, MatchStatus in enumerate(GroupsPatternMatchStatusList): if MatchStatus: GroupsPatternsMatchCountList[GroupIndex] += 1 if not MolMatched: continue RemainingMolCount += 1 WriteMolecule(Writer, GroupOutfilesWriters, Mol, Compute2DCoords, CombineMatchResults, GroupsPatternMatchStatusList) return (MolCount, ValidMolCount, RemainingMolCount, GroupsPatternsMatchCountList)
def ProcessMoleculesUsingMultipleProcesses(Mols, ChEMBLPatternMols, Writer, WriterFiltered): """Process and filter molecules using multiprocessing.""" MiscUtil.PrintInfo("\nFiltering molecules using multiprocessing...") MPParams = OptionsInfo["MPParams"] NegateMatch = OptionsInfo["NegateMatch"] OutfileFilteredMode = OptionsInfo["OutfileFilteredMode"] Compute2DCoords = OptionsInfo["OutfileParams"]["Compute2DCoords"] SetSMILESMolProps = OptionsInfo["OutfileParams"]["SetSMILESMolProps"] # Setup data for initializing a worker process... MiscUtil.PrintInfo( "Encoding options info and ChEMBL alert pattern molecules...") OptionsInfo["EncodedChEMBLPatternMols"] = [ RDKitUtil.MolToBase64EncodedMolString(PatternMol) for PatternMol in ChEMBLPatternMols ] InitializeWorkerProcessArgs = ( MiscUtil.ObjectToBase64EncodedString(Options), MiscUtil.ObjectToBase64EncodedString(OptionsInfo)) # Setup a encoded mols data iterable for a worker process... WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStrings(Mols) # Setup process pool along with data initialization for each process... MiscUtil.PrintInfo( "\nConfiguring multiprocessing using %s method..." % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()")) MiscUtil.PrintInfo("NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n" % (MPParams["NumProcesses"], MPParams["InputDataMode"], ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"]))) ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeWorkerProcess, InitializeWorkerProcessArgs) # Start processing... if re.match("^Lazy$", MPParams["InputDataMode"], re.I): Results = ProcessPool.imap(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"]) elif re.match("^InMemory$", MPParams["InputDataMode"], re.I): Results = ProcessPool.map(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"]) else: MiscUtil.PrintError( "The value, %s, specified for \"--inputDataMode\" is not supported." % (MPParams["InputDataMode"])) (MolCount, ValidMolCount, RemainingMolCount) = [0] * 3 FirstMol = True for Result in Results: MolCount += 1 MolIndex, EncodedMol, MolMatched = Result if EncodedMol is None: continue ValidMolCount += 1 Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol) if FirstMol: FirstMol = False if SetSMILESMolProps: if Writer is not None: RDKitUtil.SetWriterMolProps(Writer, Mol) if WriterFiltered is not None: RDKitUtil.SetWriterMolProps(WriterFiltered, Mol) if MolMatched == NegateMatch: RemainingMolCount += 1 WriteMolecule(Writer, Mol, Compute2DCoords) else: if OutfileFilteredMode: WriteMolecule(WriterFiltered, Mol, Compute2DCoords) return (MolCount, ValidMolCount, RemainingMolCount)
def ProcessMoleculesUsingMultipleProcesses(Mols, Writer): """Process molecules and calculate partial charges using a multiprocessing. """ MiscUtil.PrintInfo( "Calculating partial atomic charges using multiprocessing...") MPParams = OptionsInfo["MPParams"] Compute2DCoords = OptionsInfo["OutfileParams"]["Compute2DCoords"] # Setup data for initializing a worker process... InitializeWorkerProcessArgs = ( MiscUtil.ObjectToBase64EncodedString(Options), MiscUtil.ObjectToBase64EncodedString(OptionsInfo)) # Setup a encoded mols data iterable for a worker process... WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStrings(Mols) # Setup process pool along with data initialization for each process... MiscUtil.PrintInfo( "\nConfiguring multiprocessing using %s method..." % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()")) MiscUtil.PrintInfo("NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n" % (MPParams["NumProcesses"], MPParams["InputDataMode"], ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"]))) ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeWorkerProcess, InitializeWorkerProcessArgs) # Start processing... if re.match("^Lazy$", MPParams["InputDataMode"], re.I): Results = ProcessPool.imap(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"]) elif re.match("^InMemory$", MPParams["InputDataMode"], re.I): Results = ProcessPool.map(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"]) else: MiscUtil.PrintError( "The value, %s, specified for \"--inputDataMode\" is not supported." % (MPParams["InputDataMode"])) (MolCount, ValidMolCount, CalcFailedCount) = [0] * 3 for Result in Results: MolCount += 1 MolIndex, EncodedMol, CalcStatus, PartialCharges = Result if EncodedMol is None: continue ValidMolCount += 1 if not CalcStatus: CalcFailedCount += 1 continue MolWithHs = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol) # Write out charges... WriteMolPartialCharges(Writer, MolWithHs, PartialCharges, Compute2DCoords) return (MolCount, ValidMolCount, CalcFailedCount)