Esempio n. 1
0
def cluster_traject(resname,
                    trajToDistribute,
                    columnToChoose,
                    distance_contact,
                    clusterThreshold,
                    path_to_cluster,
                    output_path,
                    mapping_out,
                    epsilon=0.5,
                    report_basename="report",
                    condition="min",
                    metricweights="linear",
                    nclusters=5):

    outputPathConst = constants.OutputPathConstants(output_path)
    outputPathConst.tmpFolder = output_path
    outputPathConst.buildTmpFolderConstants(outputPathConst.tmpFolder)
    utilities.makeFolder(outputPathConst.tmpFolder)

    thresholdCalc = thresholdcalculator.ThresholdCalculatorConstant(
        value=clusterThreshold)
    similarityEval = clustering.CMSimilarityEvaluator("Jaccard")
    clusteringObject = clustering.ContactMapAccumulativeClustering(
        thresholdCalc,
        similarityEval,
        resname=resname,
        reportBaseFilename=report_basename,
        columnOfReportFile=columnToChoose,
        contactThresholdDistance=distance_contact,
        altSelection=True)

    clusteringObject.cluster([path_to_cluster], ignoreFirstRow=True)
    spawning_params = spawning.SpawningParams()
    spawning_params.reportFilename = report_basename
    spawning_params.epsilon = epsilon
    spawning_params.nclusters = nclusters
    spawning_params.metricWeights = metricweights
    spawning_params.condition = condition

    density = densitycalculator.NullDensityCalculator()
    spawningObject = spawning.EpsilonDegeneracyCalculator(
        spawning_params, density)
    degeneracy = spawningObject.calculate(clusteringObject.clusters,
                                          trajToDistribute, spawning_params)
    spawningObject.log()

    _, procMapping = spawningObject.writeSpawningInitialStructures(
        outputPathConst, degeneracy, clusteringObject, 0)
    processorManagerFilename = "processorMapping.txt"
    utilities.writeProcessorMappingToDisk(mapping_out,
                                          processorManagerFilename,
                                          procMapping)
Esempio n. 2
0
def buildNewClusteringAndWriteInitialStructuresInRestart(firstRun, outputPathConstants, clusteringBlock,
                                                         spawningParams, spawningCalculator, simulationRunner, topologies, processManager):
    """
        It reads the previous clustering method, and if there are changes (clustering method or related to thresholds),
        reclusters the previous trajectories. Returns the clustering object to use,
        and the initial structure filenames as strings

        :param firstRun: New epoch to run
        :type firstRun: int
        :param outputPathConstants: Contains outputPath-related constants
        :type outputPathConstants: str
        :param clusteringBlock: Contains the new clustering block
        :type clusteringBlock: json
        :param spawningParams: Spawning params
        :type spawningParams: :py:class:`.SpawningParams`
        :param spawningCalculator: Spawning calculator object
        :type spawningCalculator: :py:class:`.SpawningCalculator`
        :param simulationRunner: :py:class:`.SimulationRunner` Simulation runner object
        :type simulationRunner: :py:class:`.SimulationRunner`
        :param topologies: Topology object containing the set of topologies needed for the simulation
        :type topologies: :py:class:`.Topology`
        :param processManager: Object to synchronize the possibly multiple processes
        :type processManager: :py:class:`.ProcessesManager`

        :returns: :py:class:`.Clustering`, str -- The clustering method to use in the adaptive sampling simulation and the initial structures filenames
    """
    processorManagerFilename = "procMapping.txt"
    clusteringMethod = getWorkingClusteringObjectAndReclusterIfNecessary(firstRun, outputPathConstants, clusteringBlock, spawningParams, simulationRunner, topologies, processManager)
    if processManager.isMaster():
        degeneracyOfRepresentatives = spawningCalculator.calculate(clusteringMethod.getClusterListForSpawning(), simulationRunner.getWorkingProcessors(), firstRun)
        spawningCalculator.log()
        _, procMapping = spawningCalculator.writeSpawningInitialStructures(outputPathConstants, degeneracyOfRepresentatives, clusteringMethod, firstRun, topologies=topologies)
        utilities.writeProcessorMappingToDisk(outputPathConstants.tmpFolder, processorManagerFilename, procMapping)
    else:
        clusteringMethod = None
    processManager.barrier()
    if not processManager.isMaster():
        procMapping = utilities.readProcessorMappingFromDisk(outputPathConstants.tmpFolder, processorManagerFilename)
    # for compatibility with old data
    procMapping = [element if element is not None else (0, 0, 0) for element in procMapping]
    topologies.mapEpochTopologies(firstRun, procMapping)
    simulationRunner.updateMappingProcessors(procMapping)
    processManager.barrier()
    initialStructuresAsString = simulationRunner.createMultipleComplexesFilenames(simulationRunner.getWorkingProcessors(), outputPathConstants.tmpInitialStructuresTemplate, firstRun)

    return clusteringMethod, initialStructuresAsString
Esempio n. 3
0
def main(jsonParams, clusteringHook=None):
    """
        Main body of the adaptive sampling program.

        :param jsonParams: A string with the name of the control file to use
        :type jsonParams: str
    """

    controlFileValidator.validate(jsonParams)
    generalParams, spawningBlock, simulationrunnerBlock, clusteringBlock = loadParams(
        jsonParams)

    spawningAlgorithmBuilder = spawning.SpawningAlgorithmBuilder()
    spawningCalculator = spawningAlgorithmBuilder.build(spawningBlock)

    runnerbuilder = simulationrunner.RunnerBuilder()
    simulationRunner = runnerbuilder.build(simulationrunnerBlock)

    restart = generalParams.get(blockNames.GeneralParams.restart, True)
    debug = generalParams.get(blockNames.GeneralParams.debug, False)
    outputPath = generalParams[blockNames.GeneralParams.outputPath]
    initialStructuresWildcard = generalParams[
        blockNames.GeneralParams.initialStructures]
    writeAll = generalParams.get(blockNames.GeneralParams.writeAllClustering,
                                 False)
    nativeStructure = generalParams.get(
        blockNames.GeneralParams.nativeStructure, '')
    resname = clusteringBlock[blockNames.ClusteringTypes.params].get(
        blockNames.ClusteringTypes.ligandResname)
    if resname is None:
        # check if resname is provided in the simulation block
        resname = simulationRunner.getResname()

    initialStructures = expandInitialStructuresWildcard(
        initialStructuresWildcard)
    if not initialStructures:
        raise InitialStructuresError("No initial structures found!!!")

    if len(initialStructures) > simulationRunner.getWorkingProcessors():
        raise InitialStructuresError(
            "Error: More initial structures than Working Processors found!!!")

    if resname is not None:
        checkSymmetryDict(clusteringBlock, initialStructures, resname)

    outputPathConstants = constants.OutputPathConstants(outputPath)

    if not debug:
        atexit.register(utilities.cleanup, outputPathConstants.tmpFolder)
    simulationRunner.unifyReportNames(
        spawningCalculator.parameters.reportFilename)
    utilities.makeFolder(outputPath)
    utilities.makeFolder(outputPathConstants.tmpFolder)
    utilities.makeFolder(outputPathConstants.topologies)
    processManager = ProcessesManager(outputPath,
                                      simulationRunner.getNumReplicas())
    firstRun = findFirstRun(outputPath,
                            outputPathConstants.clusteringOutputObject,
                            simulationRunner, restart)
    if processManager.isMaster():
        printRunInfo(restart, debug, simulationRunner, spawningCalculator,
                     clusteringBlock, outputPath, initialStructuresWildcard)
        saveInitialControlFile(jsonParams,
                               outputPathConstants.originalControlFile)
    processManager.barrier()
    # once the replicas are properly syncronized there is no need for the
    # process files, and erasing them allows us to restart simulations
    cleanProcessesFiles(processManager.syncFolder)

    topologies = utilities.Topology(outputPathConstants.topologies)
    if restart and firstRun is not None:
        topology_files = glob.glob(
            os.path.join(outputPathConstants.topologies, "topology*.pdb"))
        topology_files.sort(key=utilities.getTrajNum)
        topologies.setTopologies(topology_files)
        if firstRun == 0:
            createMappingForFirstEpoch(initialStructures, topologies,
                                       simulationRunner.getWorkingProcessors())
            clusteringMethod, initialStructuresAsString = buildNewClusteringAndWriteInitialStructuresInNewSimulation(
                debug, jsonParams, outputPathConstants, clusteringBlock,
                spawningCalculator.parameters, initialStructures,
                simulationRunner, processManager)
        else:
            clusteringMethod, initialStructuresAsString = buildNewClusteringAndWriteInitialStructuresInRestart(
                firstRun, outputPathConstants, clusteringBlock,
                spawningCalculator.parameters, spawningCalculator,
                simulationRunner, topologies, processManager)
        if processManager.isMaster():
            checkMetricExitConditionMultipleTrajsinRestart(
                firstRun, outputPathConstants.epochOutputPathTempletized,
                simulationRunner)
        processManager.barrier()

    if firstRun is None or not restart:
        topologies.setTopologies(initialStructures)
        if processManager.isMaster():
            if not debug:
                cleanPreviousSimulation(outputPath,
                                        outputPathConstants.allTrajsPath)
            writeTopologyFiles(initialStructures,
                               outputPathConstants.topologies)
        processManager.barrier()
        firstRun = 0  # if restart false, but there were previous simulations

        if simulationRunner.parameters.runEquilibration:
            initialStructures = simulationRunner.equilibrate(
                initialStructures, outputPathConstants,
                spawningCalculator.parameters.reportFilename, outputPath,
                resname, processManager, topologies)
            # write the equilibration structures for each replica
            processManager.writeEquilibrationStructures(
                outputPathConstants.tmpFolder, initialStructures)
            if processManager.isMaster(
            ) and simulationRunner.parameters.constraints:
                # write the new constraints for synchronization
                utilities.writeNewConstraints(
                    outputPathConstants.topologies, "new_constraints.txt",
                    simulationRunner.parameters.constraints)
            processManager.barrier()

            if not processManager.isMaster(
            ) and simulationRunner.parameters.constraints:
                simulationRunner.parameters.constraints = utilities.readConstraints(
                    outputPathConstants.topologies, "new_constraints.txt")
            # read all the equilibration structures
            initialStructures = processManager.readEquilibrationStructures(
                outputPathConstants.tmpFolder)
            topologies.setTopologies(initialStructures,
                                     cleanFiles=processManager.isMaster())
            if processManager.isMaster():
                writeTopologyFiles(initialStructures,
                                   outputPathConstants.topologies)
            # ensure that topologies are written
            processManager.barrier()
            topology_files = glob.glob(
                os.path.join(outputPathConstants.topologies, "topology*.pdb"))
            topology_files.sort(key=utilities.getTrajNum)
            topologies.setTopologies(topology_files, cleanFiles=False)
        createMappingForFirstEpoch(initialStructures, topologies,
                                   simulationRunner.getWorkingProcessors())

        clusteringMethod, initialStructuresAsString = buildNewClusteringAndWriteInitialStructuresInNewSimulation(
            debug, jsonParams, outputPathConstants, clusteringBlock,
            spawningCalculator.parameters, initialStructures, simulationRunner,
            processManager)

    if processManager.isMaster():
        repeat, numSteps = simulationRunner.getClusteringInfo()
        clusteringMethod.updateRepeatParameters(repeat, numSteps)
        clusteringMethod.setProcessors(simulationRunner.getWorkingProcessors())
    if simulationRunner.parameters.modeMovingBox is not None and simulationRunner.parameters.boxCenter is None:
        simulationRunner.parameters.boxCenter = simulationRunner.selectInitialBoxCenter(
            initialStructuresAsString, resname)
    for i in range(firstRun, simulationRunner.parameters.iterations):
        if processManager.isMaster():
            utilities.print_unbuffered("Iteration", i)
            outputDir = outputPathConstants.epochOutputPathTempletized % i
            utilities.makeFolder(outputDir)

            simulationRunner.writeMappingToDisk(
                outputPathConstants.epochOutputPathTempletized % i)
            topologies.writeMappingToDisk(
                outputPathConstants.epochOutputPathTempletized % i, i)
            if i == 0:
                # write the object to file at the start of the first epoch, so
                # the topologies can always be loaded
                topologies.writeTopologyObject()
        processManager.barrier()
        if processManager.isMaster():
            utilities.print_unbuffered("Production run...")
        if not debug:
            simulationRunner.runSimulation(
                i, outputPathConstants, initialStructuresAsString, topologies,
                spawningCalculator.parameters.reportFilename, processManager)
        processManager.barrier()

        if processManager.isMaster():
            if simulationRunner.parameters.postprocessing:
                simulationRunner.processTrajectories(
                    outputPathConstants.epochOutputPathTempletized % i,
                    topologies, i)
            utilities.print_unbuffered("Clustering...")
            startTime = time.time()
            clusterEpochTrajs(clusteringMethod, i,
                              outputPathConstants.epochOutputPathTempletized,
                              topologies, outputPathConstants)
            endTime = time.time()
            utilities.print_unbuffered("Clustering ligand: %s sec" %
                                       (endTime - startTime))

            if clusteringHook is not None:
                clusteringHook(clusteringMethod, outputPathConstants,
                               simulationRunner, i + 1)
            clustersList = clusteringMethod.getClusterListForSpawning()
            clustersFiltered = [True for _ in clusteringMethod]

        if simulationRunner.parameters.modeMovingBox is not None:
            simulationRunner.getNextIterationBox(
                outputPathConstants.epochOutputPathTempletized % i, resname,
                topologies, i)
            if processManager.isMaster():
                clustersList, clustersFiltered = clusteringMethod.filterClustersAccordingToBox(
                    simulationRunner.parameters)

        if processManager.isMaster():
            if spawningCalculator.parameters.filterByMetric:
                clustersList, clustersFiltered = clusteringMethod.filterClustersAccordingToMetric(
                    clustersFiltered,
                    spawningCalculator.parameters.filter_value,
                    spawningCalculator.parameters.condition,
                    spawningCalculator.parameters.filter_col)

            degeneracyOfRepresentatives = spawningCalculator.calculate(
                clustersList,
                simulationRunner.getWorkingProcessors(),
                i,
                outputPathConstants=outputPathConstants)
            spawningCalculator.log()
            # this method only does works with MSM-based spwaning methods,
            # creating a plot of the stationary distribution and the PMF, for
            # the rest of methods it does nothing
            spawningCalculator.createPlots(outputPathConstants, i,
                                           clusteringMethod)

            if degeneracyOfRepresentatives is not None:
                if simulationRunner.parameters.modeMovingBox is not None or spawningCalculator.parameters.filterByMetric:
                    degeneracyOfRepresentatives = mergeFilteredClustersAccordingToBox(
                        degeneracyOfRepresentatives, clustersFiltered)
                utilities.print_unbuffered("Degeneracy",
                                           degeneracyOfRepresentatives)
                assert len(degeneracyOfRepresentatives) == len(
                    clusteringMethod)
            else:
                # When using null or independent spawning the calculate method returns None
                assert spawningCalculator.type in spawningTypes.SPAWNING_NO_DEGENERACY_TYPES, "calculate returned None with spawning type %s" % spawningTypes.SPAWNING_TYPE_TO_STRING_DICTIONARY[
                    spawningCalculator.type]

            clusteringMethod.writeOutput(
                outputPathConstants.clusteringOutputDir % i,
                degeneracyOfRepresentatives,
                outputPathConstants.clusteringOutputObject % i, writeAll)
            simulationRunner.cleanCheckpointFiles(
                outputPathConstants.epochOutputPathTempletized % i)

            if i > 0:
                # Remove old clustering object, since we already have a newer one
                try:
                    os.remove(outputPathConstants.clusteringOutputObject %
                              (i - 1))
                except OSError:
                    # In case of restart
                    pass

        # Prepare for next pele iteration
        if i != simulationRunner.parameters.iterations - 1:
            # Differentiate between null spawning and the rest of spawning
            # methods
            if spawningCalculator.shouldWriteStructures():
                if processManager.isMaster():
                    _, procMapping = spawningCalculator.writeSpawningInitialStructures(
                        outputPathConstants,
                        degeneracyOfRepresentatives,
                        clusteringMethod,
                        i + 1,
                        topologies=topologies)
                    utilities.writeProcessorMappingToDisk(
                        outputPathConstants.tmpFolder, "processMapping.txt",
                        procMapping)
                processManager.barrier()
                if not processManager.isMaster():
                    procMapping = utilities.readProcessorMappingFromDisk(
                        outputPathConstants.tmpFolder, "processMapping.txt")
                simulationRunner.updateMappingProcessors(procMapping)
                topologies.mapEpochTopologies(i + 1, procMapping)
                initialStructuresAsString = simulationRunner.createMultipleComplexesFilenames(
                    simulationRunner.getWorkingProcessors(),
                    outputPathConstants.tmpInitialStructuresTemplate, i + 1)

        if processManager.isMaster():
            topologies.writeTopologyObject()
            if clusteringMethod.symmetries and nativeStructure:
                fixReportsSymmetry(
                    outputPathConstants.epochOutputPathTempletized % i,
                    resname, nativeStructure, clusteringMethod.symmetries,
                    topologies)

            # check exit condition, if defined
            if simulationRunner.hasExitCondition():
                if simulationRunner.checkExitCondition(
                        clusteringMethod,
                        outputPathConstants.epochOutputPathTempletized % i):
                    utilities.print_unbuffered(
                        "Simulation exit condition met at iteration %d, stopping"
                        % i)
                    # send a signal to all possible adaptivePELE copies to stop
                    for pid in processManager.lockInfo:
                        if pid != processManager.pid:
                            os.kill(pid, signal.SIGTERM)
                    break
                else:
                    utilities.print_unbuffered(
                        "Simulation exit condition not met at iteration %d, continuing..."
                        % i)
        processManager.barrier()