def main(resname, folder, top, out_report_name, format_out, nProcessors, output_folder, new_report): """ Calculate the relative SASA values of the ligand :param resname: Ligand resname :type resname: str :param folder: Path the simulation :type folder: str :param top: Path to the topology :type top: str :param out_report_name: Name of the output file :type out_report_name: str :param format_out: String with the format of the output :type format_out: str :param nProcessors: Number of processors to use :type nProcessors: int :param output_folder: Path where to store the new reports :type output_folder: str :param new_report: Whether to create new reports :type new_report: bool """ # Constants if output_folder is not None: out_report_name = os.path.join(output_folder, out_report_name) outputFilename = "_".join([out_report_name, "%d"]) trajName = "*traj*" reportName = "*report*_%d" if nProcessors is None: nProcessors = utilities.getCpuCount() nProcessors = max(1, nProcessors) print("Calculating SASA with %d processors" % nProcessors) pool = mp.Pool(nProcessors) epochs = utilities.get_epoch_folders(folder) if top is not None: top_obj = getTopologyObject(top) else: top_obj = None files = [] if not epochs: # path does not contain an adaptive simulation, we'll try to retrieve # trajectories from the specified path files = process_folder(None, folder, trajName, reportName, os.path.join(folder, outputFilename), top_obj) for epoch in epochs: print("Epoch", epoch) files.extend( process_folder(epoch, folder, trajName, reportName, os.path.join(folder, epoch, outputFilename), top_obj)) results = [] for info in files: results.append( pool.apply_async(process_file, args=(info[0], info[2], resname, info[1], info[4], format_out, new_report, info[3]))) for res in results: res.get()
def main(col_energy, folder, out_report_name, format_out, nProcessors, output_folder, new_report, reportName, trajs_to_select): """ Calculate the relative SASA values of the ligand :param col_energy: Column corresponding to the energy in the reports :type col_energy: int :param folder: Path the simulation :type folder: str :param out_report_name: Name of the output file :type out_report_name: str :param format_out: String with the format of the output :type format_out: str :param nProcessors: Number of processors to use :type nProcessors: int :param output_folder: Path where to store the new reports :type output_folder: str :param new_report: Whether to create new reports :type new_report: bool """ # Constants if output_folder is not None: out_report_name = os.path.join(output_folder, out_report_name) outputFilename = "_".join([out_report_name, "%d"]) trajName = "*traj*" if reportName is None: reportName = "report_%d" else: reportName += "_%d" if nProcessors is None: nProcessors = utilities.getCpuCount() nProcessors = max(1, nProcessors) print("Standarizing energy with %d processors" % nProcessors) epochs = utilities.get_epoch_folders(folder) files = [] if not epochs: # path does not contain an adaptive simulation, we'll try to retrieve # trajectories from the specified path files = analysis_utils.process_folder( None, folder, trajName, reportName, os.path.join(folder, outputFilename), None, trajs_to_select) for epoch in epochs: print("Epoch", epoch) files.extend( analysis_utils.process_folder( epoch, folder, trajName, reportName, os.path.join(folder, epoch, outputFilename), None, trajs_to_select)) pool = mp.Pool(nProcessors) results = [ pool.apply_async(process_file, args=(info[1], info[4], format_out, new_report, info[3], col_energy)) for info in files ] pool.close() pool.join() for res in results: res.get()
def main(residues, folder, top, out_report_name, format_out, nProcessors, output_folder, new_report, trajs_to_select): """ Calculate the distances between paris of atoms :param residues: Pairs of atoms to calculate distances :type residues: list :param folder: Path the simulation :type folder: str :param top: Path to the topology :type top: str :param out_report_name: Name of the output file :type out_report_name: str :param format_out: String with the format of the output :type format_out: str :param nProcessors: Number of processors to use :type nProcessors: int :param output_folder: Path where to store the new reports :type output_folder: str :param new_report: Whether to create new reports :type new_report: bool :param trajs_to_select: Number of the reports to read, if don't want to select all :type trajs_to_select: set """ # Constants if output_folder is not None: out_report_name = os.path.join(output_folder, out_report_name) outputFilename = "_".join([out_report_name, "%d"]) trajName = "*traj*" reportName = "*report*_%d" distances_label = "\t".join(residues) residues = parse_selection(residues) if nProcessors is None: nProcessors = utilities.getCpuCount() nProcessors = max(1, nProcessors) print("Calculating distances with %d processors" % nProcessors) epochs = utilities.get_epoch_folders(folder) if top is not None: top_obj = utilities.getTopologyObject(top) else: top_obj = None files = [] if not epochs: # path does not contain an adaptive simulation, we'll try to retrieve # trajectories from the specified path files = analysis_utils.process_folder(None, folder, trajName, reportName, os.path.join(folder, outputFilename), top_obj, trajs_to_select) for epoch in epochs: print("Epoch", epoch) files.extend(analysis_utils.process_folder(epoch, folder, trajName, reportName, os.path.join(folder, epoch, outputFilename), top_obj, trajs_to_select)) print("Starting to process files!") pool = mp.Pool(nProcessors) results = [pool.apply_async(process_file, args=(info[0], info[2], residues, info[1], info[4], format_out, new_report, info[3], distances_label)) for info in files] pool.close() pool.join() for res in results: res.get()
def main(controlFile, trajName, reportName, folder, top, outputFilename, nProcessors, output_folder, format_str, new_report, trajs_to_select): """ Calculate the corrected rmsd values of conformation taking into account molecule symmetries :param controlFile: Control file :type controlFile: str :param folder: Path the simulation :type folder: str :param top: Path to the topology :type top: str :param outputFilename: Name of the output file :type outputFilename: str :param nProcessors: Number of processors to use :type nProcessors: int :param output_folder: Path where to store the new reports :type output_folder: str :param format_str: String with the format of the report :type format_str: str :param new_report: Whether to write rmsd to a new report file :type new_report: bool """ if trajName is None: trajName = "*traj*" else: trajName += "_*" if reportName is None: reportName = "report_%d" else: reportName += "_%d" if output_folder is not None: outputFilename = os.path.join(output_folder, outputFilename) outputFilename += "_%d" if nProcessors is None: nProcessors = utilities.getCpuCount() nProcessors = max(1, nProcessors) print("Calculating RMSDs with %d processors" % nProcessors) epochs = utilities.get_epoch_folders(folder) if top is not None: top_obj = utilities.getTopologyObject(top) else: top_obj = None resname, nativeFilename, symmetries, rmsdColInReport = readControlFile(controlFile) nativePDB = atomset.PDB() nativePDB.initialise(nativeFilename, resname=resname) files = [] if not epochs: # path does not contain an adaptive simulation, we'll try to retrieve # trajectories from the specified path files = analysis_utils.process_folder(None, folder, trajName, reportName, os.path.join(folder, outputFilename), top_obj, trajs_to_select) for epoch in epochs: print("Epoch", epoch) files.extend(analysis_utils.process_folder(epoch, folder, trajName, reportName, os.path.join(folder, epoch, outputFilename), top_obj, trajs_to_select)) pool = mp.Pool(nProcessors) results = [pool.apply_async(calculate_rmsd_traj, args=(nativePDB, resname, symmetries, rmsdColInReport, info[0], info[1], info[2], info[3], info[4], format_str, new_report)) for info in files] pool.close() pool.join() for res in results: res.get()
def main(folder_name=".", atom_Ids="", lig_resname="", numtotalSteps=0, enforceSequential_run=0, writeLigandTrajectory=True, setNumber=0, protein_CA=0, non_Repeat=False, nProcessors=None, parallelize=True, topology=None, sidechains=False, sidechain_folder=".", cm=False, use_extra_atoms=False, CM_mode="p-lig", calc_dihedrals=False, dihedrals_projection=False): params = ParamsHandler(folder_name, atom_Ids, lig_resname, numtotalSteps, enforceSequential_run, writeLigandTrajectory, setNumber, protein_CA, non_Repeat, nProcessors, parallelize, topology, sidechains, sidechain_folder, cm, use_extra_atoms, CM_mode, calc_dihedrals, dihedrals_projection) constants = Constants() if params.topology is not None: params.topology = utilities.getTopologyObject(params.topology) params.lig_resname = parseResname(params.atomIds, params.lig_resname, params.contact_map, params.cm_mode, params.dihedrals) folderWithTrajs = params.folder_name makeGatheredTrajsFolder(constants) if params.enforceSequential_run: folders = ["."] else: folders = utilities.get_epoch_folders(folderWithTrajs) if len(folders) == 0: folders = ["."] # if multiprocess is not available, turn off parallelization params.parallelize &= PARALELLIZATION if params.parallelize: if params.nProcessors is None: params.nProcessors = utilities.getCpuCount() params.nProcessors = max(1, params.nProcessors) print("Running extractCoords with %d cores" % (params.nProcessors)) pool = mp.Pool(params.nProcessors) else: pool = None params.sidechains = extractSidechainIndexes( params, pool=pool) if params.sidechains else [] for folder_it in folders: pathFolder = os.path.join(folderWithTrajs, folder_it) print("Extracting coords from folder %s" % folder_it) ligand_trajs_folder = os.path.join(pathFolder, constants.ligandTrajectoryFolder) if params.writeLigandTrajectory and not os.path.exists( ligand_trajs_folder): os.makedirs(ligand_trajs_folder) writeFilenamesExtractedCoordinates(pathFolder, params, constants, pool=pool) if not params.non_Repeat: print("Repeating snapshots from folder %s" % folder_it) repeatExtractedSnapshotsInFolder(pathFolder, constants, params.numtotalSteps, pool=None) print("Gathering trajs in %s" % constants.gatherTrajsFolder) gatherTrajs(constants, folder_it, params.setNumber, params.non_Repeat)
def main(folder_name=".", atom_Ids="", lig_resname="", numtotalSteps=0, enforceSequential_run=0, writeLigandTrajectory=True, setNumber=0, protein_CA=0, non_Repeat=False, nProcessors=None, parallelize=True, topology=None, sidechains=False, sidechain_folder="."): constants = Constants() if topology is not None: topology = getTopologyObject(topology) lig_resname = parseResname(atom_Ids, lig_resname) folderWithTrajs = folder_name makeGatheredTrajsFolder(constants) if enforceSequential_run: folders = ["."] else: allFolders = os.listdir(folderWithTrajs) folders = [epoch for epoch in allFolders if epoch.isdigit()] if len(folders) == 0: folders = ["."] # if multiprocess is not available, turn off parallelization parallelize &= PARALELLIZATION if parallelize: if nProcessors is None: nProcessors = utilities.getCpuCount() nProcessors = max(1, nProcessors) print("Running extractCoords with %d cores" % (nProcessors)) pool = mp.Pool(nProcessors) else: pool = None sidechains = extractSidechainIndexes( glob.glob(sidechain_folder), lig_resname, topology=topology, pool=pool) if sidechains else [] for folder_it in folders: pathFolder = os.path.join(folderWithTrajs, folder_it) print("Extracting coords from folder %s" % folder_it) ligand_trajs_folder = os.path.join(pathFolder, constants.ligandTrajectoryFolder) if writeLigandTrajectory and not os.path.exists(ligand_trajs_folder): os.makedirs(ligand_trajs_folder) writeFilenamesExtractedCoordinates(pathFolder, lig_resname, atom_Ids, writeLigandTrajectory, constants, protein_CA, sidechains, pool=pool, topology=topology) if not non_Repeat: print("Repeating snapshots from folder %s" % folder_it) repeatExtractedSnapshotsInFolder(pathFolder, constants, numtotalSteps, pool=None) print("Gathering trajs in %s" % constants.gatherTrajsFolder) gatherTrajs(constants, folder_it, setNumber, non_Repeat)