Exemple #1
0
    def load_memory_state(self, projectinfo_file):
        """
        Loads the 'memory state' from a serialized file on disk

        Parameters
        ----------
        projectinfo_file : str
            The file on disk from which to read.

        Notes
        -----
        When reading the memory state, we have to decode base16, and
        also remove leading 'a' characters

        See Also
        --------
        save_memory_state
        """

        logger.info("Loading memory state from: %s", projectinfo_file)

        project_info = Project.load_from_hdf( projectinfo_file )
        self.memory = cPickle.loads( project_info["Memory"] )

        return
Exemple #2
0
    def load_memory_state(self, projectinfo_file):
        """
        Loads the 'memory state' from a serialized file on disk

        Parameters
        ----------
        projectinfo_file : str
            The file on disk from which to read.

        Notes
        -----
        When reading the memory state, we have to decode base16, and
        also remove leading 'a' characters

        See Also
        --------
        save_memory_state
        """

        logger.info("Loading memory state from: %s", projectinfo_file)

        project_info = Project.load_from_hdf(projectinfo_file)
        self.memory = cPickle.loads(project_info["Memory"])

        return
Exemple #3
0
    def save_memory_state(self):
        """
        Saves the 'memory state' to disk in a serialized format.

        Notes
        -----
        When saving, we encode the keys into base16 with a leading 'a',
        because the HDF5 Serializer doesnt' like '/' characters and is
        super picky in general

        See Also
        --------
        load_memory_state
        """

        project_info = Project.load_from_hdf( projectinfo_file )
        project_info["Memory"] = cPickle.dumps( self.memory )
        project_info.save_to_hdf( projectinfo_file, do_file_check=False )

        return
Exemple #4
0
    def save_memory_state(self):
        """
        Saves the 'memory state' to disk in a serialized format.

        Notes
        -----
        When saving, we encode the keys into base16 with a leading 'a',
        because the HDF5 Serializer doesnt' like '/' characters and is
        super picky in general

        See Also
        --------
        load_memory_state
        """

        project_info = Project.load_from_hdf(projectinfo_file)
        project_info["Memory"] = cPickle.dumps(self.memory)
        project_info.save_to_hdf(projectinfo_file, do_file_check=False)

        return
Exemple #5
0
    def write_all_trajectories(self,
                               input_dir,
                               output_dir,
                               stride,
                               max_rmsd,
                               min_gens,
                               center_conformations,
                               num_proc,
                               input_style,
                               update=False):
        """
        Convert all of the trajectories in the FAH project in input_dir to
        lh5 trajectory files which will be placed in output dir.

        If the 'update' flag is set, then will use the memory object to check for
        previously converted data, and add to it (rather than reconverting everything).
        This functionality can be more cleanly called through the update_trajectories()
        method.

        Parameters
        ----------
        input_dir : str
            The directory to look for XTC/DCD files in.

        output_dir : str
            The place to write the converted lh5s

        stride : int
            The size of the stride to employ. E.g., if stride = 3, the script
            keeps every 3rd MD snapshot from the original data. Useful to throw
            away highly correlated data if snapshots were saved frequently.

        max_rmsd : float
            Throw away any data that is further than `max_rmsd` (in nm) from the
            pdb file associated with the project. This is used as a sanity check
            to prevent including, e.g. data from a simulation that is blowing up.

        min_gens : int
            Discard trajectories with fewer than `min_gens` generations.

        center_conformations : bool
            Whether to center the converted (lh5) conformations.

        num_proc : int
            Number of processors to employ. Note that this function is typically
            I/O limited, so paralellism is unlikely to yield much gain.

        input_style : {'FAH', 'FILE'}
            If you use input_style = 'FAH', this code uses knowledge of the
            RUN*/CLONE* directory structure to yield all the CLONE directories.
            If you use input_style = 'FILE', this code uses os.walk() which is
            A LOT slower because it has to stat every file, but is capable of
            recursively searching for xtc files to arbitrary depths.

        update : bool
            If `True`, then tries to figure out what data has already been converted
            by reading the "memory state" in the provided ProjectInfo file, and only
            converts new data. If `False`, does a fresh re-convert.


        Notes
        -----
        Since sometimes a conversion fails, we collect all trajectories at the
        end and renumber them such that they are contiguously numbered.
        """

        if update:
            assert os.path.exists(output_dir)
        else:
            try:
                os.mkdir(output_dir)
            except OSError:
                logger.error('Error: The directory %s already exists',
                             output_dir)
                sys.exit(1)

        intermediate_filename_root = '_trj'  # A placeholder name

        #dtm does not play nice with OpenMP
        use_parallel_rmsd = (num_proc != 'use_dtm_instead')

        jobs = []
        for i, clone_dir in enumerate(
                self.yield_xtc_directories(input_dir, input_style)):

            job = {
                'clone_dir': clone_dir,
                'output_dir': output_dir,
                'pdb_file': self.pdb_topology,
                'trajectory_number': i,
                'stride': stride,
                'max_rmsd': max_rmsd,
                'min_gens': min_gens,
                'center_conformations': center_conformations,
                'memory_check': update,
                'omp_parallel_rmsd': use_parallel_rmsd
            }
            jobs.append(job)

        if len(jobs) == 0:
            raise RuntimeError('No conversion jobs found!')

        if num_proc == 'use_dtm_instead':
            # use DTM mpi parallel map
            dtm.map(self.write_trajectory_mapper, jobs)
        elif num_proc > 1:
            # use multiprocessing
            pool = Pool(processes=num_proc)
            pool.map(self.write_trajectory_mapper, jobs)
        else:
            # use regular serial execution
            map(self.write_trajectory_mapper, jobs)

        # Rename trajectory files such that they have contiguous numbering
        logger.info(
            "Finished Generating Trajectories. Renaming them now in contiguous order"
        )
        mapping = {
        }  # document the directory changes, allowing us to update memory
        for i, filename in enumerate(sorted(os.listdir(output_dir),
                                            key=keynat)):
            path = os.path.join(output_dir, filename)
            new_path = os.path.join(output_dir, "trj%d.lh5" % i)
            os.rename(path, new_path)
            mapping[path] = new_path

        # update the memory hash to accound for our renumbering
        for key in self.memory.keys():
            if key not in ['convert_parameters', 'SerializerFilename']:
                logger.info("%s --> %s", self.memory[key][0],
                            mapping[self.memory[key][0]])
                self.memory[key][0] = mapping[self.memory[key][0]]

        # save the parameters used for this run in the memory file, and write to disk
        logger.info("Generating Project File: %s", self.projectinfo_file)
        if update:
            try:
                os.remove(self.projectinfo_file
                          )  # if we are updating, just start w fresh slate
            except:
                pass

        self.memory['convert_parameters'] = (input_dir, output_dir, stride,
                                             max_rmsd, min_gens,
                                             center_conformations, num_proc,
                                             self.projectinfo_file,
                                             input_style)

        Project.CreateProjectFromDir(Filename=self.projectinfo_file,
                                     TrajFilePath=output_dir,
                                     TrajFileBaseName='trj',
                                     TrajFileType='.lh5',
                                     ConfFilename=self.pdb_topology,
                                     initial_memory=cPickle.dumps(self.memory))

        logger.info("Data converted properly.")

        return
Exemple #6
0
to use GetRandomConfs.py""")
    parser.add_argument('project')
    parser.add_argument('assignments', default='Data/Assignments.Fixed.h5')
    parser.add_argument(
        'conformations_per_state',
        default=5,
        type=int,
        help='Number of conformations to sample from each state')
    parser.add_argument(
        'states',
        nargs='+',
        type=int,
        help='''Which states to sample from. Pass a list of integers, separated
        by whitespace. To specify ALL of the states (Although the script GetRandomConfs.py
        is more efficient for this purpose), pass the integer -1.''')
    parser.add_argument('output_dir', default='PDBs')
    args = parser.parse_args()

    if -1 in args.states:
        logger.info("Ripping PDBs for all states")
        args.states = 'all'

    try:
        assignments = io.loadh(args.assignments, 'arr_0')
    except KeyError:
        assignments = io.loadh(args.assignments, 'Data')
    project = Project.load_from(args.project)

    run(project, assignments, args.conformations_per_state, args.states,
        args.output_dir)