コード例 #1
0
 def check_filename(self,filename):
     """
     checks if a filename exists in a given hdf5 file
     :param filename: the filename
     :param hdf5_file: the hdf5 file
     :return:
     """
     return six.b(filename) in self.file._handle.root.processed_filenames
コード例 #2
0
def hdf5_concatenate_core17(job_tuple):
    """Concatenate tar bzipped XTC files created by Folding@Home Core17.

    Parameters
    ----------
    path : str
        Path to directory containing "results-*.tar.bz2".  E.g. a single CLONE directory.
    top : mdtraj.Topology
        Topology for system
    output_filename : str
        Filename of output HDF5 file to generate.

    Notes
    -----
    We use HDF5 because it provides an easy way to store the metadata associated
    with which files have already been processed.
    """

    proj_folder, top_folder, db_name, run, clone = job_tuple
    path = os.path.join(proj_folder,"RUN%d/CLONE%d/"%(run,clone))
    top = md.load(os.path.join(top_folder,"%d.pdb"%run))
    output_filename =  os.path.join(proj_folder,"trajectories/%d_%d.hdf5"%(run,clone))

    glob_input = os.path.join(path, "results-*.tar.bz2")
    filenames = glob.glob(glob_input)
    filenames = sorted(filenames, key=keynat)

    if len(filenames) <= 0:
        return

    trj_file = HDF5TrajectoryFile(output_filename, mode='a')

    try:
        trj_file._create_earray(where='/', name='processed_filenames',atom=trj_file.tables.StringAtom(1024), shape=(0,))
        trj_file.topology = top.topology
    except trj_file.tables.NodeError:
        pass

    for filename in filenames:
        if six.b(filename) in trj_file._handle.root.processed_filenames:  # On Py3, the pytables list of filenames has type byte (e.g. b"hey"), so we need to deal with this via six.
            print("Already processed %s" % filename)
            continue
        with enter_temp_directory():
            print("Processing %s" % filename)
            archive = tarfile.open(filename, mode='r:bz2')
            try:
                archive.extract("positions.xtc")
                trj = md.load("positions.xtc", top=top)
                for frame in trj:
                    trj_file.write(coordinates=frame.xyz, cell_lengths=frame.unitcell_lengths, cell_angles=frame.unitcell_angles)
                    trj_file._handle.root.processed_filenames.append([filename])
            except:
                #something wrong with the current trajectory file. Warn and return immediately
                warnings.warn("Problem at %s.Stopping trajectory here"%filename)
                return
    return 
コード例 #3
0
ファイル: fah.py プロジェクト: jchodera/fahmunge
def concatenate_ocore(path, top_filename, output_filename):
    """Concatenate XTC files created by Siegetank OCore.

    Parameters
    ----------
    path : str
        Path to stream directory containing frame directories /0, /1, /2
        etc.
    top_filename : str
        Filepath to read Topology for system
    output_filename : str
        Filename of output HDF5 file to generate.

    Notes
    -----
    We use HDF5 because it provides an easy way to store the metadata associated
    with which files have already been processed.
    """
    # Open topology file.
    top = md.load(top_filename % vars())

    sorted_folders = sorted(os.listdir(path), key=lambda value: int(value))
    sorted_folders = [os.path.join(path, folder) for folder in sorted_folders]

    if len(sorted_folders) <= 0:
        return

    trj_file = HDF5TrajectoryFile(output_filename, mode='a')

    try:
        trj_file._create_earray(where='/',
                                name='processed_folders',
                                atom=trj_file.tables.StringAtom(1024),
                                shape=(0, ))
        trj_file.topology = top.topology
    except trj_file.tables.NodeError:
        pass

    for folder in sorted_folders:
        if six.b(
                folder
        ) in trj_file._handle.root.processed_folders:  # On Py3, the pytables list of filenames has type byte (e.g. b"hey"), so we need to deal with this via six.
            print("Already processed %s" % folder)
            continue
        print("Processing %s" % folder)
        xtc_filename = os.path.join(folder, "frames.xtc")
        trj = md.load(xtc_filename, top=top)

        for frame in trj:
            trj_file.write(coordinates=frame.xyz,
                           cell_lengths=frame.unitcell_lengths,
                           cell_angles=frame.unitcell_angles,
                           time=frame.time)

        trj_file._handle.root.processed_folders.append([folder])
コード例 #4
0
ファイル: fah.py プロジェクト: kyleabeauchamp/FAHMunge
def concatenate_core17(path, top, output_filename):
    """Concatenate tar bzipped XTC files created by Folding@Home Core17.
    
    Parameters
    ----------
    path : str
        Path to directory containing "results-*.tar.bz2".  E.g. a single CLONE directory.
    top : mdtraj.Topology
        Topology for system
    output_filename : str
        Filename of output HDF5 file to generate.
    
    Notes
    -----
    We use HDF5 because it provides an easy way to store the metadata associated
    with which files have already been processed.
    """
    glob_input = os.path.join(path, "results-*.tar.bz2")
    filenames = glob.glob(glob_input)
    filenames = sorted(filenames, key=keynat)

    if len(filenames) <= 0:
        return

    trj_file = HDF5TrajectoryFile(output_filename, mode='a')

    try:
        trj_file._create_earray(where='/',
                                name='processed_filenames',
                                atom=trj_file.tables.StringAtom(1024),
                                shape=(0, ))
        trj_file.topology = top.topology
    except trj_file.tables.NodeError:
        pass

    for filename in filenames:
        if six.b(
                filename
        ) in trj_file._handle.root.processed_filenames:  # On Py3, the pytables list of filenames has type byte (e.g. b"hey"), so we need to deal with this via six.
            print("Already processed %s" % filename)
            continue
        with enter_temp_directory():
            print("Processing %s" % filename)
            archive = tarfile.open(filename, mode='r:bz2')
            archive.extract("positions.xtc")
            trj = md.load("positions.xtc", top=top)

            for frame in trj:
                trj_file.write(coordinates=frame.xyz,
                               cell_lengths=frame.unitcell_lengths,
                               cell_angles=frame.unitcell_angles)

            trj_file._handle.root.processed_filenames.append([filename])
コード例 #5
0
    def validate_filename(self, index, filename, filenames):
        """
        :param index: Index of the file we are working on
        :param filename: The filename
        :param filenames: List of filenames
        :return: True if the index-1 file is in the \
        processed_filenames. This is to ensure trajectory
        continuity.
        """
        if index==0:
            return True
        else:
            f_path, fname = os.path.split(filename)
            exp1 = os.path.join(f_path,"results-%.3d.tar.bz2"%(index))
            exp2 = os.path.join(f_path,"results%d"%(index))

            exp1_min_1 = os.path.join(f_path,"results-%.3d.tar.bz2"%(index-1))
            exp2_min_1 = os.path.join(f_path,"results%d"%(index-1))
            return ((six.b(exp1_min_1) in
                     self.file._handle.root.processed_filenames and
                     exp1==filename) or
                    (six.b(exp2_min_1) in
                   self.file._handle.root.processed_filenames) and
                    exp2==filename)
コード例 #6
0
ファイル: fah.py プロジェクト: choderalab/fahmunge
def concatenate_ocore(path, top_filename, output_filename):
    """Concatenate XTC files created by Siegetank OCore.

    Parameters
    ----------
    path : str
        Path to stream directory containing frame directories /0, /1, /2
        etc.
    top_filename : str
        Filepath to read Topology for system
    output_filename : str
        Filename of output HDF5 file to generate.

    Notes
    -----
    We use HDF5 because it provides an easy way to store the metadata associated
    with which files have already been processed.
    """
    # Open topology file.
    top = md.load(top_filename % vars())

    sorted_folders = sorted(os.listdir(path), key=lambda value: int(value))
    sorted_folders = [os.path.join(path, folder) for folder in sorted_folders]

    if len(sorted_folders) <= 0:
        return

    trj_file = HDF5TrajectoryFile(output_filename, mode='a')

    try:
        trj_file._create_earray(where='/', name='processed_folders',atom=trj_file.tables.StringAtom(1024), shape=(0,))
        trj_file.topology = top.topology
    except trj_file.tables.NodeError:
        pass

    for folder in sorted_folders:
        if six.b(folder) in trj_file._handle.root.processed_folders:  # On Py3, the pytables list of filenames has type byte (e.g. b"hey"), so we need to deal with this via six.
            print("Already processed %s" % folder)
            continue
        print("Processing %s" % folder)
        xtc_filename = os.path.join(folder, "frames.xtc")
        trj = md.load(xtc_filename, top=top)

        for frame in trj:
            trj_file.write(coordinates=frame.xyz, cell_lengths=frame.unitcell_lengths, cell_angles=frame.unitcell_angles, time=frame.time)

        trj_file._handle.root.processed_folders.append([folder])
コード例 #7
0
ファイル: fah.py プロジェクト: dr-nate/FAHMunge
def concatenate_core17(path, top, output_filename):
    """Concatenate tar bzipped XTC files created by Folding@Home Core17.
    
    Parameters
    ----------
    path : str
        Path to directory containing "results-*.tar.bz2".  E.g. a single CLONE directory.
    top : mdtraj.Topology
        Topology for system
    output_filename : str
        Filename of output HDF5 file to generate.
    
    Notes
    -----
    We use HDF5 because it provides an easy way to store the metadata associated
    with which files have already been processed.
    """
    glob_input = os.path.join(path, "results-*.tar.bz2")
    filenames = glob.glob(glob_input)
    filenames = sorted(filenames, key=keynat)
    
    if len(filenames) <= 0:
        return
    
    trj_file = HDF5TrajectoryFile(output_filename, mode='a')
    
    try:
        trj_file._create_earray(where='/', name='processed_filenames',atom=trj_file.tables.StringAtom(1024), shape=(0,))
        trj_file.topology = top.topology
    except trj_file.tables.NodeError:
        pass

    for filename in filenames:
        if six.b(filename) in trj_file._handle.root.processed_filenames:  # On Py3, the pytables list of filenames has type byte (e.g. b"hey"), so we need to deal with this via six.
            print("Already processed %s" % filename)
            continue
        with enter_temp_directory():
            print("Processing %s" % filename)
            archive = tarfile.open(filename, mode='r:bz2')
            archive.extract("positions.xtc")
            trj = md.load("positions.xtc", top=top)

            for frame in trj:
                trj_file.write(coordinates=frame.xyz, cell_lengths=frame.unitcell_lengths, cell_angles=frame.unitcell_angles, time=frame.time)
            
            trj_file._handle.root.processed_filenames.append([filename])
コード例 #8
0
ファイル: fah.py プロジェクト: choderalab/fahmunge
def concatenate_core17(path, top_filename, output_filename, maxtime=None, maxpackets=None):
    """Concatenate tar bzipped XTC files created by Folding@Home Core17.
    This version accepts only filenames and paths.

    Parameters
    ----------
    path : str
        Path to directory containing "results-*.tar.bz2".  E.g. a single CLONE directory.
    top_filename : str
        Filepath to read Topology for system
    output_filename : str
        Filename of output HDF5 file to generate.
    maxpackets : int, optional, default=None
        If specified, will stop processing after `maxpackets` results packets have been processed
    maxtime : int, optional, default=None
        If specified, will stop processing after `maxtime` seconds have passed.

    Notes
    -----
    We use HDF5 because it provides an easy way to store the metadata associated
    with which files have already been processed.
    """

    # Open topology file.
    top = md.load(top_filename % vars())

    # Glob file paths and return result files in sequential order.
    glob_input = os.path.join(path, "results-*.tar.bz2")
    filenames = glob.glob(glob_input)
    filenames = natsorted(filenames)

    print("Concatenating XTC files from '%s' into '%s' [%d results packets found]" % (path, output_filename, len(filenames)))

    # If no result files are present, return.
    if len(filenames) <= 0:
        del top
        return

    # Check integrity of trajectory if it exists.
    delete_trajectory_if_broken(output_filename)

    # Open trajectory for appending.
    trj_file = HDF5TrajectoryFile(output_filename, mode='a')

    MAX_FILEPATH_LENGTH = 1024 # Is this large enough?
    try:
        # TODO: Store MD5 hashes instead of filenames?
        trj_file._create_earray(where='/', name='processed_filenames',atom=trj_file.tables.StringAtom(MAX_FILEPATH_LENGTH), shape=(0,))
        trj_file.topology = top.topology
    except trj_file.tables.NodeError:
        # Object already exists; skip ahead.
        pass

    result_packets_processed = 0
    initial_time = time.time()
    try:
        for filename in filenames:
            # Check that we haven't violated our filename length assumption
            if len(filename) > MAX_FILEPATH_LENGTH:
                msg = "Filename is longer than hard-coded MAX_FILEPATH_LENGTH limit (%d > %d). Increase MAX_FILEPATH_LENGTH and rebuild." % (len(filename), MAX_FILEPATH_LENGTH)
                print(msg)
                raise Exception(msg)
            # Check if we have already processed this file
            if six.b(filename) in trj_file._handle.root.processed_filenames:  # On Py3, the pytables list of filenames has type byte (e.g. b"hey"), so we need to deal with this via six.
                print("Already processed %s" % filename)
                continue
            # Extract frames from trajectory in a temporary directory
            absfilename = os.path.abspath(filename)
            with enter_temp_directory():
                # Extract frames
                archive = tarfile.open(absfilename, mode='r:bz2')
                archive.extract("positions.xtc")
                trj = md.load("positions.xtc", top=top)
                print("   appending %d frames from '%s' to '%s'" % (trj.n_frames, filename, output_filename))
                for frame in trj:
                    trj_file.write(coordinates=frame.xyz, cell_lengths=frame.unitcell_lengths, cell_angles=frame.unitcell_angles, time=frame.time)
                os.unlink("positions.xtc")
                del archive, trj

                # Append list of processed files
                trj_file._handle.root.processed_filenames.append([filename])

                # Flush data
                trj_file.flush()

            # Track statistics on processed packets
            elapsed_time = time.time() - initial_time
            result_packets_processed += 1

            # Return if we have processed the requested number of results packets.
            if maxpackets and (result_packets_processed >= maxpackets):
                break

    except RuntimeError:
        print("Cannot munge %s due to damaged XTC %s or mismatch with topology file." % (path, filename))

    # Clean up.
    trj_file.close()
    del top, trj_file
コード例 #9
0
ファイル: core21.py プロジェクト: choderalab/fahmunge
def process_core21_clone(clone_path, topology_filename, processed_trajectory_filename, atom_selection_string, terminate_event=None, delete_on_unpack=False, compress_xml=False, chunksize=10, signal_handler=None):
    """
    Process core21 result packets in a CLONE, concatenating to a specified trajectory.
    This will append to the specified trajectory if it already exists.

    Note
    ----
    * ws9 stores core21 result packets in an uncompressed directory. Original result packets are left untouched.
    * ws8 and earlier versions store result packets in compressed archives; this method will safely unpack them and remove the original compressed files.
    * An exception will be raised if something goes wrong with processing. The calling process will have to catch this and abort CLONE processing.

    Parameters
    ----------
    clone_path : str
        Source path to CLONE data directory
    topology_filename : str
        Path to PDB or other file containing topology information
    processed_trajectory_filename : str
        Path to concatenated stripped trajectory
    atom_selection_string : str
        MDTraj DSL specifying which atoms should be stripped from source WUs.
    terminate_event : multiprocessing.Event, optional, default=None
        If specified, will terminate early if terminate_event.is_set() is True
    delete_on_unpack : bool, optional, default=True
        If True, will delete old ws8-style .tar.bz2 files after they have been unpacked.
        WARNING: THIS COULD BE DANGEROUS
    compress_xml : bool, optional, default=False
        If True, will compress XML files after unpacking them.
    chunksize : int, optional, default=10
        Chunksize (in number of frames) to use for mdtraj.iterload reading of trajectory
    signal_handler : SignalHandler, optional, default=None
        If None, a new SignalHandler object will be created.

    TODO
    ----
    * Add unpacking step to support ws9
    * Include a safer way to substitute vars()

    """
    # Check for early termination since topology reading might take a while
    if terminate_event and terminate_event.is_set():
        return

    MAX_FILEPATH_LENGTH = 1024 # MAXIMUM FILEPATH LENGTH; this may be too short for some installations

    if not signal_handler:
        signal_handler = SignalHandler()

    # Read the topology for the source WU
    # TODO: Only read topology if we have not processed all the WU packets
    # TODO: Use LRU cache to cache work_unit_topology based on filename
    print('Reading topology from %s for clone %s...' % (topology_filename, clone_path))
    top = md.load(topology_filename)
    work_unit_topology = copy.deepcopy(top.topology) # extract topology
    del top # close file

    # Check for early termination since topology reading might take a while
    if terminate_event and terminate_event.is_set():
        return

    # Determine atoms that will be written to trajectory
    atom_indices = work_unit_topology.select(atom_selection_string)

    # Create a new Topology for the atom subset to be written to the trajectory
    trajectory_topology = work_unit_topology.subset(atom_indices)

    # Glob file paths and return result files in sequential order.
    result_packets = list_core21_result_packets(clone_path)

    # Return if there are no WUs to process
    if len(result_packets) <= 0:
        return

    # Open trajectory for appending
    trj_file = HDF5TrajectoryFile(processed_trajectory_filename, mode='a')

    # Initialize new trajectory with topology and list of processed WUs if they are absent
    try:
        # TODO: Switch from pytables StringAtom to arbitrary-length string
        # http://www.pytables.org/usersguide/datatypes.html
        trj_file._create_earray(where='/', name='processed_folders',atom=trj_file.tables.StringAtom(MAX_FILEPATH_LENGTH), shape=(0,))
        trj_file.topology = trajectory_topology # assign topology
    except trj_file.tables.NodeError:
        pass

    # Process each WU, checking whether signal has been received after each.
    for result_packet in result_packets:
        # Stop processing if signal handler indicates we should terminate
        if (signal_handler.terminate) or (terminate_event and terminate_event.is_set()):
            break

        # Skip this WU if we have already processed it
        if six.b(result_packet) in trj_file._handle.root.processed_folders:  # On Py3, the pytables list of filenames has type byte (e.g. b"hey"), so we need to deal with this via six.
            continue

        # If the result packet is compressed, decompress it and return the new directory name
        result_packet = ensure_result_packet_is_decompressed(result_packet, work_unit_topology, delete_on_unpack=delete_on_unpack, compress_xml=compress_xml)

        # Check that we haven't violated our filename length assumption
        if len(result_packet) > MAX_FILEPATH_LENGTH:
            msg = "Filename is longer than hard-coded MAX_FILEPATH_LENGTH limit (%d > %d). Increase MAX_FILEPATH_LENGTH and re-install." % (len(result_packet), MAX_FILEPATH_LENGTH)
            print(msg)
            raise Exception(msg)

        # Stop processing if signal handler indicates we should terminate
        if (signal_handler.terminate) or (terminate_event and terminate_event.is_set()):
            break

        # Process the work unit
        # TODO: Write to logger instead of printing to terminal
        # TODO: We could conceivably also check for early termination in the chunk loop if we carefully track the last chunk processed as well.
        print("   Processing %s" % result_packet)
        xtc_filename = os.path.join(result_packet, "positions.xtc")
        for chunk in md.iterload(xtc_filename, top=work_unit_topology, atom_indices=atom_indices, chunk=chunksize):
            trj_file.write(coordinates=chunk.xyz, cell_lengths=chunk.unitcell_lengths, cell_angles=chunk.unitcell_angles, time=chunk.time)
        # Record that we've processed the WU
        trj_file._handle.root.processed_folders.append([result_packet])

    # Sync the trajectory file to flush all data to disk
    trj_file.close()

    # Make sure we tell everyone to terminate if we are terminating
    if signal_handler.terminate and terminate_event:
        terminate_event.set()
コード例 #10
0
def process_core21_clone(clone_path,
                         topology_filename,
                         processed_trajectory_filename,
                         atom_selection_string,
                         terminate_event=None,
                         delete_on_unpack=False,
                         compress_xml=False,
                         chunksize=10,
                         signal_handler=None):
    """
    Process core21 result packets in a CLONE, concatenating to a specified trajectory.
    This will append to the specified trajectory if it already exists.

    Note
    ----
    * ws9 stores core21 result packets in an uncompressed directory. Original result packets are left untouched.
    * ws8 and earlier versions store result packets in compressed archives; this method will safely unpack them and remove the original compressed files.
    * An exception will be raised if something goes wrong with processing. The calling process will have to catch this and abort CLONE processing.

    Parameters
    ----------
    clone_path : str
        Source path to CLONE data directory
    topology_filename : str
        Path to PDB or other file containing topology information
    processed_trajectory_filename : str
        Path to concatenated stripped trajectory
    atom_selection_string : str
        MDTraj DSL specifying which atoms should be stripped from source WUs.
    terminate_event : multiprocessing.Event, optional, default=None
        If specified, will terminate early if terminate_event.is_set() is True
    delete_on_unpack : bool, optional, default=True
        If True, will delete old ws8-style .tar.bz2 files after they have been unpacked.
        WARNING: THIS COULD BE DANGEROUS
    compress_xml : bool, optional, default=False
        If True, will compress XML files after unpacking them.
    chunksize : int, optional, default=10
        Chunksize (in number of frames) to use for mdtraj.iterload reading of trajectory
    signal_handler : SignalHandler, optional, default=None
        If None, a new SignalHandler object will be created.

    TODO
    ----
    * Add unpacking step to support ws9
    * Include a safer way to substitute vars()

    """
    # Check for early termination since topology reading might take a while
    if terminate_event and terminate_event.is_set():
        return

    MAX_FILEPATH_LENGTH = 1024  # MAXIMUM FILEPATH LENGTH; this may be too short for some installations

    if not signal_handler:
        signal_handler = SignalHandler()

    # Read the topology for the source WU
    # TODO: Only read topology if we have not processed all the WU packets
    # TODO: Use LRU cache to cache work_unit_topology based on filename
    print('Reading topology from %s for clone %s...' %
          (topology_filename, clone_path))
    top = md.load(topology_filename)
    work_unit_topology = copy.deepcopy(top.topology)  # extract topology
    del top  # close file

    # Check for early termination since topology reading might take a while
    if terminate_event and terminate_event.is_set():
        return

    # Determine atoms that will be written to trajectory
    atom_indices = work_unit_topology.select(atom_selection_string)

    # Create a new Topology for the atom subset to be written to the trajectory
    trajectory_topology = work_unit_topology.subset(atom_indices)

    # Glob file paths and return result files in sequential order.
    result_packets = list_core21_result_packets(clone_path)

    # Return if there are no WUs to process
    if len(result_packets) <= 0:
        return

    # Open trajectory for appending
    trj_file = HDF5TrajectoryFile(processed_trajectory_filename, mode='a')

    # Initialize new trajectory with topology and list of processed WUs if they are absent
    try:
        # TODO: Switch from pytables StringAtom to arbitrary-length string
        # http://www.pytables.org/usersguide/datatypes.html
        trj_file._create_earray(
            where='/',
            name='processed_folders',
            atom=trj_file.tables.StringAtom(MAX_FILEPATH_LENGTH),
            shape=(0, ))
        trj_file.topology = trajectory_topology  # assign topology
    except trj_file.tables.NodeError:
        pass

    # Process each WU, checking whether signal has been received after each.
    for result_packet in result_packets:
        # Stop processing if signal handler indicates we should terminate
        if (signal_handler.terminate) or (terminate_event
                                          and terminate_event.is_set()):
            break

        # Skip this WU if we have already processed it
        if six.b(
                result_packet
        ) in trj_file._handle.root.processed_folders:  # On Py3, the pytables list of filenames has type byte (e.g. b"hey"), so we need to deal with this via six.
            continue

        # If the result packet is compressed, decompress it and return the new directory name
        result_packet = ensure_result_packet_is_decompressed(
            result_packet,
            work_unit_topology,
            delete_on_unpack=delete_on_unpack,
            compress_xml=compress_xml)

        # Check that we haven't violated our filename length assumption
        if len(result_packet) > MAX_FILEPATH_LENGTH:
            msg = "Filename is longer than hard-coded MAX_FILEPATH_LENGTH limit (%d > %d). Increase MAX_FILEPATH_LENGTH and re-install." % (
                len(result_packet), MAX_FILEPATH_LENGTH)
            print(msg)
            raise Exception(msg)

        # Stop processing if signal handler indicates we should terminate
        if (signal_handler.terminate) or (terminate_event
                                          and terminate_event.is_set()):
            break

        # Process the work unit
        # TODO: Write to logger instead of printing to terminal
        # TODO: We could conceivably also check for early termination in the chunk loop if we carefully track the last chunk processed as well.
        print("   Processing %s" % result_packet)
        xtc_filename = os.path.join(result_packet, "positions.xtc")
        for chunk in md.iterload(xtc_filename,
                                 top=work_unit_topology,
                                 atom_indices=atom_indices,
                                 chunk=chunksize):
            trj_file.write(coordinates=chunk.xyz,
                           cell_lengths=chunk.unitcell_lengths,
                           cell_angles=chunk.unitcell_angles,
                           time=chunk.time)
        # Record that we've processed the WU
        trj_file._handle.root.processed_folders.append([result_packet])

    # Sync the trajectory file to flush all data to disk
    trj_file.close()

    # Make sure we tell everyone to terminate if we are terminating
    if signal_handler.terminate and terminate_event:
        terminate_event.set()
コード例 #11
0
ファイル: fah.py プロジェクト: jchodera/fahmunge
def concatenate_core17(path,
                       top_filename,
                       output_filename,
                       maxtime=None,
                       maxpackets=None):
    """Concatenate tar bzipped XTC files created by Folding@Home Core17.
    This version accepts only filenames and paths.

    Parameters
    ----------
    path : str
        Path to directory containing "results-*.tar.bz2".  E.g. a single CLONE directory.
    top_filename : str
        Filepath to read Topology for system
    output_filename : str
        Filename of output HDF5 file to generate.
    maxpackets : int, optional, default=None
        If specified, will stop processing after `maxpackets` results packets have been processed
    maxtime : int, optional, default=None
        If specified, will stop processing after `maxtime` seconds have passed.

    Notes
    -----
    We use HDF5 because it provides an easy way to store the metadata associated
    with which files have already been processed.
    """

    # Open topology file.
    top = md.load(top_filename % vars())

    # Glob file paths and return result files in sequential order.
    glob_input = os.path.join(path, "results-*.tar.bz2")
    filenames = glob.glob(glob_input)
    filenames = natsorted(filenames)

    print(
        "Concatenating XTC files from '%s' into '%s' [%d results packets found]"
        % (path, output_filename, len(filenames)))

    # If no result files are present, return.
    if len(filenames) <= 0:
        del top
        return

    # Check integrity of trajectory if it exists.
    delete_trajectory_if_broken(output_filename)

    # Open trajectory for appending.
    trj_file = HDF5TrajectoryFile(output_filename, mode='a')

    MAX_FILEPATH_LENGTH = 1024  # Is this large enough?
    try:
        # TODO: Store MD5 hashes instead of filenames?
        trj_file._create_earray(
            where='/',
            name='processed_filenames',
            atom=trj_file.tables.StringAtom(MAX_FILEPATH_LENGTH),
            shape=(0, ))
        trj_file.topology = top.topology
    except trj_file.tables.NodeError:
        # Object already exists; skip ahead.
        pass

    result_packets_processed = 0
    initial_time = time.time()
    try:
        for filename in filenames:
            # Check that we haven't violated our filename length assumption
            if len(filename) > MAX_FILEPATH_LENGTH:
                msg = "Filename is longer than hard-coded MAX_FILEPATH_LENGTH limit (%d > %d). Increase MAX_FILEPATH_LENGTH and rebuild." % (
                    len(filename), MAX_FILEPATH_LENGTH)
                print(msg)
                raise Exception(msg)
            # Check if we have already processed this file
            if six.b(
                    filename
            ) in trj_file._handle.root.processed_filenames:  # On Py3, the pytables list of filenames has type byte (e.g. b"hey"), so we need to deal with this via six.
                print("Already processed %s" % filename)
                continue
            # Extract frames from trajectory in a temporary directory
            absfilename = os.path.abspath(filename)
            with enter_temp_directory():
                # Extract frames
                archive = tarfile.open(absfilename, mode='r:bz2')
                archive.extract("positions.xtc")
                trj = md.load("positions.xtc", top=top)
                print("   appending %d frames from '%s' to '%s'" %
                      (trj.n_frames, filename, output_filename))
                for frame in trj:
                    trj_file.write(coordinates=frame.xyz,
                                   cell_lengths=frame.unitcell_lengths,
                                   cell_angles=frame.unitcell_angles,
                                   time=frame.time)
                os.unlink("positions.xtc")
                del archive, trj

                # Append list of processed files
                trj_file._handle.root.processed_filenames.append([filename])

                # Flush data
                trj_file.flush()

            # Track statistics on processed packets
            elapsed_time = time.time() - initial_time
            result_packets_processed += 1

            # Return if we have processed the requested number of results packets.
            if maxpackets and (result_packets_processed >= maxpackets):
                break

    except RuntimeError:
        print(
            "Cannot munge %s due to damaged XTC %s or mismatch with topology file."
            % (path, filename))

    # Clean up.
    trj_file.close()
    del top, trj_file