Exemple #1
0
    def init(self, *args, **kwargs):

        # open and initialize the HDF5 file

        self.wepy_h5 = WepyHDF5(self.file_path, mode=self.mode,
                                topology=self._tmp_topology,
                                units=self.units,
                                sparse_fields=list(self._sparse_fields.keys()),
                                feature_shapes=self._feature_shapes,
                                feature_dtypes=self._feature_dtypes,
                                n_dims=self._n_dims,
                                main_rep_idxs=self.main_rep_idxs,
                                alt_reps=self.alt_reps_idxs)

        with self.wepy_h5:

            # if this is a continuation run of another run we want to
            # initialize it as such
            continue_run = None
            # get the run to continue if specified
            if "continue_run" in kwargs:
                if kwargs['continue_run'] is not None:
                    continue_run = kwargs['continue_run']

            # initialize a new run
            run_grp = self.wepy_h5.new_run(continue_run=continue_run)
            self.wepy_run_idx = run_grp.attrs['run_idx']

            # initialize the run record groups using their fields
            self.wepy_h5.init_run_fields_resampling(self.wepy_run_idx, self.resampling_fields)
            # the enumeration for the values of resampling
            self.wepy_h5.init_run_fields_resampling_decision(self.wepy_run_idx, self.decision_enum)
            self.wepy_h5.init_run_fields_resampler(self.wepy_run_idx, self.resampler_fields)
            # set the fields that are records for tables etc. unless
            # they are already set
            if 'resampling' not in self.wepy_h5.record_fields:
                self.wepy_h5.init_record_fields('resampling', self.resampling_records)
            if 'resampler' not in self.wepy_h5.record_fields:
                self.wepy_h5.init_record_fields('resampler', self.resampler_records)

            # if there were no warping fields set there is no boundary
            # conditions and we don't initialize them
            if self.warping_fields is not None:
                self.wepy_h5.init_run_fields_warping(self.wepy_run_idx, self.warping_fields)
                self.wepy_h5.init_run_fields_progress(self.wepy_run_idx, self.progress_fields)
                self.wepy_h5.init_run_fields_bc(self.wepy_run_idx, self.bc_fields)
                # table records
                if 'warping' not in self.wepy_h5.record_fields:
                    self.wepy_h5.init_record_fields('warping', self.warping_records)
                if 'boundary_conditions' not in self.wepy_h5.record_fields:
                    self.wepy_h5.init_record_fields('boundary_conditions', self.bc_records)
                if 'progress' not in self.wepy_h5.record_fields:
                    self.wepy_h5.init_record_fields('progress', self.progress_records)

        # if this was opened in a truncation mode, we don't want to
        # overwrite old runs with future calls to init(). so we
        # change the mode to read/write 'r+'
        if self.mode == 'w':
            self.mode = 'r+'
Exemple #2
0
    def analyse(self, num_walkers):

        wepy_h5 = WepyHDF5(self.hdf5_reporter_path, mode='r')
        wepy_h5.open()

        max_x, max_range = self.find_max_range(wepy_h5, num_walkers)

        hd = wepy_h5.h5
        n_cycles = hd['/runs/0/trajectories/0/positions'].shape[0]
        dimension = hd['/runs/0/trajectories/0/positions'].shape[2]

        cycle_idxs = [i for i in range(n_cycles)]

        # set up the RunCycleSlice object for each run
        rcs = RunCycleSlice(0, cycle_idxs, wepy_h5)

        data_list = rcs.compute_observable(self.prob_of_cycle,
                                           ['positions', 'weights'],
                                           int(max_x),
                                           map_func=scoop.futures.map,
                                           debug_prints=True)

        cycles_sum = np.zeros(int(max_x))

        for data in data_list:
            cycles_sum += data

        prob_x = 1 / (n_cycles * dimension) * cycles_sum

        #calculating the accuracy of x
        acc = 0

        for x in range(int(max_x)):
            acc += self.accuracy(x, prob_x[x])

        # print test data
        print("Max x= {}".format(max_x))
        print("Max range = {}".format(max_range))
        print("Probability of x ={}".format(prob_x))
        print("accuracy = {}".format(acc))

        results = {
            "max_x": max_x,
            "max_range": max_range,
            "px": prob_x,
            "accuracy": acc
        }
        return results
Exemple #3
0
def load_chunk(chunk_spec):

    wepy_h5 = WepyHDF5(chunk_spec['wepy_h5_path'], mode='r')

    with wepy_h5:
        frame_fields = {}
        for field in chunk_spec['fields']:
            frame_fields[field] = wepy_h5.get_traj_field(
                chunk_spec['run_idx'],
                chunk_spec['traj_idx'],
                field,
                frames=chunk_spec['frame_idxs'])

    # combine the chunk spec with the traj_fields data
    chunk_spec['traj_fields'] = frame_fields

    return chunk_spec
Exemple #4
0
    def analyse(self, randomwalk_string):
        """Calculates all quality metrics for the random walk simulation
        including pridicted probabilities, accuracy, and maximum
        average range.

        Parameters
        ----------
        walker: object implementing the Walker interface
            The individual walker for which dynamics will be propagated.

        Returns
        -------
        results: dict of str/arraylike

        """

        wepy_h5 = WepyHDF5(self.hdf5_filename, mode='r')
        wepy_h5.open()

        # get the number of euns
        num_runs = wepy_h5.num_runs

        results = []
        for run_idx in range(num_runs):

            max_range, max_dim_ranges = self.get_max_range(wepy_h5, run_idx)

            predicted_probabilty = self.get_predicted_probability(
                wepy_h5, run_idx, int(max_range))

            accuracy_value = self.get_accuracy(predicted_probabilty)

            run_results = {
                'max_range': max_range,
                'max_dim_range': max_dim_ranges,
                'predicted_probabilty': predicted_probabilty,
                'accuracy': accuracy_value
            }

            results.append(run_results)

        self.write_report(randomwalk_string, results)

        return results
Exemple #5
0
def combine_orch_wepy_hdf5s(new_orch, new_hdf5_path, run_ids=None):
    """

    \b
    Parameters
    ----------
    new_orch :
        
    new_hdf5_path :
        

    \b
    Returns
    -------

    """

    if run_ids is None:
        run_ids = new_orch.run_hashes()

    # we assume that the run we are interested in is the only run in
    # the WepyHDF5 file so it is index 0
    singleton_run_idx = 0

    # a key-value for the paths for each run
    hdf5_paths = {}

    # go through each run in the new orchestrator
    for run_id in run_ids:

        # get the configuration used for this run
        run_config = new_orch.run_configuration(*run_id)

        # from that configuration find the WepyHDF5Reporters
        for reporter in run_config.reporters:

            if isinstance(reporter, WepyHDF5Reporter):

                # and save the path for that run
                hdf5_paths[run_id] = reporter.file_path

    click.echo("Combining these HDF5 files:")
    click.echo('\n'.join(hdf5_paths.values()))

    # now that we have the paths (or lack of paths) for all
    # the runs we need to start linking them all
    # together.

    # first we need a master linker HDF5 to do this with

    # so load a template WepyHDF5
    template_wepy_h5_path = hdf5_paths[run_ids[singleton_run_idx]]
    template_wepy_h5 = WepyHDF5(template_wepy_h5_path, mode='r')

    # clone it
    with template_wepy_h5:
        master_wepy_h5 = template_wepy_h5.clone(new_hdf5_path, mode='x')

    click.echo("Into a single master hdf5 file: {}".format(new_hdf5_path))

    # then link all the files to it
    run_mapping = {}
    for run_id, wepy_h5_path in hdf5_paths.items():

        # in the case where continuations were done from
        # checkpoints then the runs data will potentially (and
        # most likely) contain extra cycles since checkpoints are
        # typically produced on some interval of cycles. So, in
        # order for us to actually piece together contigs we need
        # to take care of this.

        # There are two ways to deal with this which can both be
        # done at the same time. The first is to keep the "nubs",
        # which are the small leftover pieces after the checkpoint
        # that ended up getting continued, and make a new run from
        # the last checkpoint to the end of the nub, in both the
        # WepyHDF5 and the orchestrator run collections.

        # The second is to generate a WepyHDF5 run that
        # corresponds to the run in the checkpoint orchestrator.

        # To avoid complexity (for now) we opt to simply dispose
        # of the nubs and assume that not much will be lost from
        # this. For the typical use case of making multiple
        # independent and linear contigs this is also the simplest
        # mode, since the addition of multiple nubs will introduce
        # an extra spanning contig in the contig tree.

        # furthermore the nubs provide a source of problems if
        # rnus were abruptly stopped and data is not written some
        # of the frames can be corrupted. SO until we know how to
        # stop this (probably SWMR mode will help) this is also a
        # reason not to deal with nubs.

        # TODO: add option to keep nubs in HDF5, and deal with in
        # orch (you won't be able to have an end snapshot...).

        # to do this we simply check whether or not the number of
        # cycles for the run_id are less than the number of cycles
        # in the corresponding WepyHDF5 run dataset.
        orch_run_num_cycles = new_orch.run_last_cycle_idx(*run_id)

        # get the number of cycles that are in the data for the run in
        # the HDF5 to compare to the number in the orchestrator run
        # record
        wepy_h5 = WepyHDF5(wepy_h5_path, mode='r')
        with wepy_h5:
            h5_run_num_cycles = wepy_h5.num_run_cycles(singleton_run_idx)

        # sanity check for if the number of cycles in the
        # orchestrator is greater than the HDF5
        if orch_run_num_cycles > h5_run_num_cycles:
            raise ValueError("Number of cycles in orch run is more than HDF5."\
                             "This implies missing data")

        # copy the run (with the slice)
        with master_wepy_h5:

            # TODO: this was the old way of combining where we would
            # just link, however due to the above discussion this is
            # not tenable now. In the future there might be some more
            # complex options taking linking into account but for now
            # we just don't use it and all runs will be copied by this
            # operation

            # # we just link the whole file then sort out the
            # # continuations later since we aren't necessarily doing
            # # this in a logical order
            # new_run_idxs = master_wepy_h5.link_file_runs(wepy_h5_path)

            # extract the runs from the file (there should only be
            # one). This means copy the run, but if we only want a
            # truncation of it we will use the run slice to only get
            # part of it

            # so first we generate the run slices for this file using
            # the number of cycles recorded in the orchestrator
            run_slices = {singleton_run_idx : (0, orch_run_num_cycles)}

            click.echo("Extracting Run: {}".format(run_id))
            click.echo("Frames 0 to {} out of {}".format(
                orch_run_num_cycles, h5_run_num_cycles))

            # then perform the extraction, which will open the other
            # file on its own
            new_run_idxs = master_wepy_h5.extract_file_runs(wepy_h5_path,
                                                            run_slices=run_slices)

            # map the hash id to the new run idx created. There should
            # only be one run in an HDF5 if we are following the
            # orchestration workflow.
            assert len(new_run_idxs) < 2, \
                "Cannot be more than 1 run per HDF5 file in orchestration workflow"

            run_mapping[run_id] = new_run_idxs[0]

            click.echo("Set as run: {}".format(new_run_idxs[0]))

    click.echo("Done extracting runs, setting continuations")

    with master_wepy_h5:

        # now that they are all linked we need to add the snapshot
        # hashes identifying the runs as metadata. This is so we can
        # map the simple run indices in the HDF5 back to the
        # orchestrator defined runs. This will be saved as metadata on
        # the run. Also:

        # We need to set the continuations correctly betwen the runs
        # in different files, so for each run we find the run it
        # continues in the orchestrator
        for run_id, run_idx in run_mapping.items():

            # set the run snapshot hash metadata except for if we have
            # already done it
            try:
                master_wepy_h5.set_run_start_snapshot_hash(run_idx, run_id[0])
            except AttributeError:
                # it was already set so just move on
                pass
            try:
                master_wepy_h5.set_run_end_snapshot_hash(run_idx, run_id[1])
            except AttributeError:
                # it was already set so just move on
                pass

            # find the run_id that this one continues
            continued_run_id = new_orch.run_continues(*run_id)

            # if a None is returned then there was no continuation
            if continued_run_id is None:
                # so we go to the next run_id and don't log any
                # continuation
                continue

            # get the run_idx in the HDF5 that corresponds to this run
            continued_run_idx = run_mapping[continued_run_id]

            click.echo("Run {} continued by {}".format(continued_run_id, run_idx))

            # add the continuation
            master_wepy_h5.add_continuation(run_idx, continued_run_idx)
Exemple #6
0
import numpy as np

from wepy.hdf5 import WepyHDF5

file1 = '../outputs/results.wepy.h5'
file2 = '../outputs/results_cont0_0.wepy.h5'

all_results_file = '../outputs/all_results.wepy.h5'

# make a copy of the result hdf5 file to use as a proxy for another
# run, first remove the copy so we can remake it
#os.remove(file2)
copy2(file1, file2)

# Load wepy hdf5 file into python script
wepy_1_h5 = WepyHDF5(file1, mode='r')
wepy_2_h5 = WepyHDF5(file2, mode='r')

# we make another WepyHDF5 that will contain both as external links,
# so we clone one of the ones we are linking from to get a WepyHDF5
# file with no runs in it, before it is opened
with wepy_1_h5:
    all_wepy_h5 = wepy_1_h5.clone(all_results_file, mode='w')

with all_wepy_h5:

    # link all the file1 runs together preserving continuations
    file_run_idxs = all_wepy_h5.link_file_runs(file1)

    # add the continuation run that is in another file
    run_idx = all_wepy_h5.link_run(file2, 0, continues=0)
Exemple #7
0
import numpy as np

from wepy.hdf5 import WepyHDF5
from wepy.resampling.decisions.clone_merge import MultiCloneMergeDecision
from wepy.boundary_conditions.unbinding import UnbindingBC
from wepy.analysis.transitions import run_transition_probability_matrix
from wepy.analysis.network import MacroStateNetwork
from wepy.analysis.contig_tree import ContigTree

# Load wepy hdf5 file into python script
wepy_h5 = WepyHDF5('../outputs/results.wepy.h5', mode='r+')
run_idx = 0
assg_key = 'rand_assg_idx'
n_classifications = 50

# make random assignments


# observable function
def rand_assg(fields_d, *args, **kwargs):
    assignments = np.random.random_integers(0,
                                            n_classifications,
                                            size=fields_d['weights'].shape[0])
    return assignments


with wepy_h5:

    # compute this random assignment "observable"
    wepy_h5.compute_observable(rand_assg, ['weights'],
                               save_to_hdf5=assg_key,
Exemple #8
0
    def init(self, continue_run=None, init_walkers=None, **kwargs):

        # do the inherited stuff
        super().init(**kwargs)

        # open and initialize the HDF5 file
        logging.info("Initializing HDF5 file at {}".format(self.file_path))

        self.wepy_h5 = WepyHDF5(self.file_path,
                                mode=self.mode,
                                topology=self._tmp_topology,
                                units=self.units,
                                sparse_fields=list(self._sparse_fields.keys()),
                                feature_shapes=self._feature_shapes,
                                feature_dtypes=self._feature_dtypes,
                                n_dims=self._n_dims,
                                main_rep_idxs=self.main_rep_idxs,
                                alt_reps=self.alt_reps_idxs)

        # if we specify save fields only save these for the initial walkers
        if self.save_fields is not None:

            state_fields = list(init_walkers[0].state.dict().keys())

            # make sure all the save_fields are present in the state
            assert all([True if save_field in state_fields else False
                        for save_field in self.save_fields]), \
                            "Not all specified save_fields present in walker states"

            filtered_init_walkers = []
            for walker in init_walkers:
                # make a new state by filtering the attributes of the old ones
                state_d = {
                    k: v
                    for k, v in walker.state.dict().items()
                    if k in self.save_fields
                }

                # and saving alternate representations as we would
                # expect them

                # if there are any alternate representations set them
                for alt_rep_name, alt_rep_idxs in self.alt_reps_idxs.items():

                    alt_rep_path = 'alt_reps/{}'.format(alt_rep_name)

                    # if the idxs are None we want all of the atoms
                    if alt_rep_idxs is None:
                        state_d[alt_rep_path] = state_d['positions'][:]
                    # otherwise get only the atoms we want
                    else:
                        state_d[alt_rep_path] = state_d['positions'][
                            alt_rep_idxs]

                # if the main rep is different then the full state
                # positions set that
                if self.main_rep_idxs is not None:
                    state_d['positions'] = state_d['positions'][
                        self.main_rep_idxs]

                # then making the new state
                new_state = WalkerState(**state_d)

                filtered_init_walkers.append(Walker(new_state, walker.weight))
        # otherwise save the full state
        else:
            filtered_init_walkers = init_walkers

        self.wepy_h5.set_mode(mode='r+')
        with self.wepy_h5:

            # if this is a continuation run of another run we want to
            # initialize it as such

            # initialize a new run
            run_grp = self.wepy_h5.new_run(filtered_init_walkers,
                                           continue_run=continue_run)
            self.wepy_run_idx = run_grp.attrs['run_idx']

            # initialize the run record groups using their fields
            self.wepy_h5.init_run_fields_resampling(self.wepy_run_idx,
                                                    self.resampling_fields)
            # the enumeration for the values of resampling
            self.wepy_h5.init_run_fields_resampling_decision(
                self.wepy_run_idx, self.decision_enum)
            self.wepy_h5.init_run_fields_resampler(self.wepy_run_idx,
                                                   self.resampler_fields)
            # set the fields that are records for tables etc. unless
            # they are already set
            if 'resampling' not in self.wepy_h5.record_fields:
                self.wepy_h5.init_record_fields('resampling',
                                                self.resampling_records)
            if 'resampler' not in self.wepy_h5.record_fields:
                self.wepy_h5.init_record_fields('resampler',
                                                self.resampler_records)

            # if there were no warping fields set there is no boundary
            # conditions and we don't initialize them
            if self.warping_fields is not None:
                self.wepy_h5.init_run_fields_warping(self.wepy_run_idx,
                                                     self.warping_fields)
                self.wepy_h5.init_run_fields_progress(self.wepy_run_idx,
                                                      self.progress_fields)
                self.wepy_h5.init_run_fields_bc(self.wepy_run_idx,
                                                self.bc_fields)
                # table records
                if 'warping' not in self.wepy_h5.record_fields:
                    self.wepy_h5.init_record_fields('warping',
                                                    self.warping_records)
                if 'boundary_conditions' not in self.wepy_h5.record_fields:
                    self.wepy_h5.init_record_fields('boundary_conditions',
                                                    self.bc_records)
                if 'progress' not in self.wepy_h5.record_fields:
                    self.wepy_h5.init_record_fields('progress',
                                                    self.progress_records)

        # if this was opened in a truncation mode, we don't want to
        # overwrite old runs with future calls to init(). so we
        # change the mode to read/write 'r+'
        if self.mode == 'w':
            self.set_mode(0, 'r+')

def traj_field_lj_dist(traj_data):

    positions = traj_data['positions']

    # slice out positions for each LJ particle
    lj1 = positions[:, 0, :]
    lj2 = positions[:, 1, :]
    # compute distances
    distances = np.sqrt((lj1[:, 0] - lj2[:, 0])**2 +
                        (lj1[:, 1] - lj2[:, 1])**2 +
                        (lj1[:, 2] - lj2[:, 2])**2)
    return distances


if __name__ == "__main__":
    from wepy.hdf5 import WepyHDF5

    # load the HDF5 file in read/write so we can save data to the
    # observables
    wepy_hdf5_path = "../outputs/results.wepy.h5"
    wepy_h5 = WepyHDF5(wepy_hdf5_path, mode='r+')

    print('test')
    with wepy_h5:
        wepy_h5.compute_observable(traj_field_lj_dist, ['positions'],
                                   save_to_hdf5='rmsd',
                                   map_func=map,
                                   debug_prints=True)
Exemple #10
0
from wepy.resampling.resamplers.wexplore import WExploreResampler

if sys.argv[1] == '-h' or sys.argv[1] == '--help':
    print("walker_lineage.py run_index walker_index output_DCD_path")
else:
    run_idx = int(sys.argv[1])
    walker_idx = int(sys.argv[2])
    dcd_path = sys.argv[3]

    outputs_dir = osp.realpath('../outputs')

    hdf5_filename = 'results.wepy.h5'

    hdf5_path = osp.join(outputs_dir, hdf5_filename)

    wepy_h5 = WepyHDF5(hdf5_path, mode='r')

    wepy_h5.open()

    cycle_idx = wepy_h5.traj(run_idx, walker_idx)['positions'].shape[0] - 1

    resampling_panel = wepy_h5.run_resampling_panel(run_idx)

    parent_panel = WExploreResampler.DECISION.parent_panel(resampling_panel)
    parent_table = WExploreResampler.DECISION.net_parent_table(parent_panel)

    lineage = ancestors(parent_table, cycle_idx, walker_idx)

    mdj_traj = wepy_h5.run_trace_to_mdtraj(run_idx, lineage)

    mdj_traj.save_dcd(dcd_path)
Exemple #11
0
def traj_fields_chunk_items(wepy_h5_path,
                            fields,
                            run_idxs=Ellipsis,
                            chunk_size=Ellipsis):
    """Generate items that can be used to create a dask.bag object.

    Arguments
    ---------

    wepy_h5_path : str
        The file path to the WepyHDF5 file that will be read from.

    fields : list of str
        The field names/paths for the data to be retrieved.

    chunk_size : int
        This is the size of the chunk (i.e. number of frames) that
        will be retrieved from each trajectory. This is the unit of
        data for which a single task will work on. Dask will also
        partition these chunks as it sees fit.

    Returns
    -------
    chunk_specs : list of dict of str : value

    """

    # open the HDF5
    try:
        wepy_h5 = WepyHDF5(wepy_h5_path, mode='r')
    except OSError:
        print("Failed to open HDF5")
        return None

    with wepy_h5:

        # choose the run idxs
        if run_idxs is not Ellipsis:
            assert all([run_idx in wepy_h5.run_idxs
                        for run_idx in run_idxs]), "run_idx not in runs"
        else:
            run_idxs = wepy_h5.run_idxs

        chunk_specs = []
        for run_idx in run_idxs:
            for traj_idx in wepy_h5.run_traj_idxs(run_idx):

                num_frames = wepy_h5.num_traj_frames(run_idx, traj_idx)

                # determine the specific frame indices in the chunks

                # if the chunk size is either larger than the
                # trajectory, or chunk size is Ellipsis we take the
                # whole trajectory
                if chunk_size is Ellipsis:
                    chunks = [range(num_frames)]
                elif chunk_size > num_frames:
                    chunks = [range(num_frames)]
                else:
                    # split it allowing for an unequal chunk sizes
                    chunks = np.array_split(range(num_frames),
                                            num_frames // chunk_size)

                for frame_idxs in chunks:
                    chunk_spec = {
                        'wepy_h5_path': wepy_h5_path,
                        'run_idx': run_idx,
                        'traj_idx': traj_idx,
                        'frame_idxs': frame_idxs,
                        'fields': fields,
                    }
                    chunk_specs.append(chunk_spec)

    return chunk_specs
Exemple #12
0
def fe_calc(file_list, n_bins, k=2000):
    # NUMERATOR
    # for every cycle, do the binning and averaging for all the walkers and sum up for all times

    # This calculates G0 in eq [8] from Hummer, Szabo PNAS 2001 98, 7
    #
    # G0(z) =  -1/beta ln[ (sum_t (term1 / term2)) / ( sum_t (term3 / term2)) ]
    #
    # where
    #
    # term1 =  <delta(z-z_t) exp(-beta w_t) >     # specific to z and t
    # term2 = <exp(-beta w_t)>                    # specific to t
    # term3 = exp[-beta u(z,t)]                   # specific to z and t
    #

    numer = np.zeros((n_bins))
    denom = np.zeros((n_bins))

    g0 = np.zeros((n_bins))
    n_g0 = np.zeros((n_bins))
    d_values = [(i + 0.5) * (d_max - d_min) / n_bins for i in range(n_bins)]

    # initialize variables
    term1 = np.zeros((n_bins, n_cycles))
    term2 = np.zeros((n_cycles))
    norm = np.zeros((n_cycles))

    n_part = 2
    n_dim = 3
    n_walkers = walkers

    positions = np.zeros((n_walkers, n_cycles, n_part, n_dim))
    work_values = np.zeros((n_walkers, n_cycles))
    weights = np.zeros((n_walkers, n_cycles))

    for index, value in enumerate(file_list):

        wepy_h5 = WepyHDF5(value, mode='r')
        wepy_h5.open()

        for j in range(n_walkers):
            positions[j] = np.array(wepy_h5.h5['runs/0/trajectories/' +
                                               str(j) + '/positions'])
            work_values[j] = np.array(
                wepy_h5.h5['runs/0/trajectories/' + str(j) +
                           '/activity']).reshape((n_cycles))
            weights[j] = np.array(wepy_h5.h5['runs/0/trajectories/' + str(j) +
                                             '/weights']).reshape((n_cycles))

        for cycle in range(n_cycles):
            # these lists have all the distances, work values, and weights for cycle i
            ds_cyc = []
            work_cyc = work_values[:, cycle]
            weight_cyc = weights[:, cycle]

            for j in range(n_walkers):
                # get distances
                p = positions[j, cycle]
                tmp = vec_dist(p[0], p[1], j, cycle)
                ds_cyc.append(tmp)

            e_mbwt = np.exp(-beta * np.array(work_cyc))

            for j, d in enumerate(ds_cyc):
                # find out which bin it's in
                bin_id = int((d - d_min) / (d_max - d_min) * n_bins)

                term1[bin_id][cycle] += weight_cyc[j] * e_mbwt[j]
                term2[cycle] += weight_cyc[j] * e_mbwt[j]
                norm[cycle] += weight_cyc[j]

            # end of loop over cycles
            # terms have been computed, add to the running sums over timepoints
    for b in range(n_bins):
        numer[b] = 0
        for cycle in range(n_cycles):
            numer[b] += term1[b][cycle] / term2[cycle]

            # Note: get_bias_value returns term3
            # need to use cycle+1 so the d0 matches the work values
            term3 = np.exp(-beta * get_bias_value(d_values[b], cycle + 1, k))
            denom[b] += term3 / (term2[cycle] / norm[cycle])

        wepy_h5.close()

    g0_no_gaps = []
    d_values_no_gaps = []
    for b in range(n_bins):
        if numer[b] > 0 and denom[b] > 0:
            g0_no_gaps.append(-np.log(numer[b] / denom[b]) / beta)
            d_values_no_gaps.append(d_values[b])

    g0_arr = np.array(g0_no_gaps)
    d_val_arr = np.array(d_values_no_gaps)

    plt.plot(d_values_no_gaps, g0_arr - g0_arr.min(), label='FES')

    return d_values_no_gaps, g0_no_gaps, g0_arr, d_val_arr
Exemple #13
0
from pathlib import Path

import numpy as np

from wepy.hdf5 import WepyHDF5
from wepy.resampling.decisions.clone_merge import MultiCloneMergeDecision
from wepy.boundary_conditions.unbinding import UnbindingBC
from wepy.analysis.transitions import run_transition_probability_matrix
from wepy.analysis.network import MacroStateNetwork
from wepy.analysis.contig_tree import ContigTree

output_dir = Path('_output')
sim_dir = output_dir / 'we'

# Load wepy hdf5 file into python script
wepy_h5 = WepyHDF5(sim_dir / 'results.wepy.h5', mode='r+')
run_idx = 0
assg_key = 'rand_assg_idx'
n_classifications = 4
random_seed = 1

np.random.seed(random_seed)

# make random assignments


# observable function
def rand_assg(fields_d, *args, **kwargs):
    assignments = np.random.randint(0,
                                    n_classifications,
                                    size=fields_d['weights'].shape)
Exemple #14
0
def combine_orch_wepy_hdf5s(new_orch, new_hdf5_path):
    """

    Parameters
    ----------
    new_orch :
        
    new_hdf5_path :
        

    Returns
    -------

    """

    # a key-value for the paths for each run
    hdf5_paths = {}

    # go through each run in the new orchestrator
    for run_id in new_orch.runs:

        # get the configuration used for this run
        run_config = new_orch.run_configuration(*run_id)

        # from that configuration find the WepyHDF5Reporters
        for reporter in run_config.reporters:

            if isinstance(reporter, WepyHDF5Reporter):

                # and save the path for that run
                hdf5_paths[run_id] = reporter.file_path

    # now that we have the paths (or lack of paths) for all
    # the runs we need to start linking them all
    # together.

    # first we need a master linker HDF5 to do this with

    # so load a template WepyHDF5
    template_wepy_h5_path = hdf5_paths[new_orch.runs[0]]
    template_wepy_h5 = WepyHDF5(template_wepy_h5_path, mode='r')

    # clone it
    with template_wepy_h5:
        master_wepy_h5 = template_wepy_h5.clone(new_hdf5_path, mode='x')

    with master_wepy_h5:
        # then link all the files to it
        run_mapping = {}
        for run_id, wepy_h5_path in hdf5_paths.items():

            # we just link the whole file then sort out the
            # continuations later since we aren't necessarily doing
            # this in a logical order
            new_run_idxs = master_wepy_h5.link_file_runs(wepy_h5_path)

            # map the hash id to the new run idx created. There should
            # only be one run in an HDF5 if we are following the
            # orchestration workflow.
            assert len(new_run_idxs) < 2, \
                "Cannot be more than 1 run per HDF5 file in orchestration workflow"

            run_mapping[run_id] = new_run_idxs[0]

        # now that they are all linked we need to add the snapshot
        # hashes identifying the runs as metadata. This is so we can
        # map the simple run indices in the HDF5 back to the
        # orchestrator defined runs. This will be saved as metadata on
        # the run. Also:

        # We need to set the continuations correctly betwen the runs
        # in different files, so for each run we find the run it
        # continues in the orchestrator
        for run_id, run_idx in run_mapping.items():

            # set the run snapshot hash metadata except for if we have
            # already done it
            try:
                master_wepy_h5.set_run_start_snapshot_hash(run_idx, run_id[0])
            except AttributeError:
                # it was already set so just move on
                pass
            try:
                master_wepy_h5.set_run_end_snapshot_hash(run_idx, run_id[1])
            except AttributeError:
                # it was already set so just move on
                pass

            # find the run_id that this one continues
            continued_run_id = new_orch.run_continues(*run_id)

            # if a None is returned then there was no continuation
            if continued_run_id is None:
                # so we go to the next run_id and don't log any
                # continuation
                continue

            # get the run_idx in the HDF5 that corresponds to this run
            continued_run_idx = run_mapping[continued_run_id]

            # add the continuation
            master_wepy_h5.add_continuation(run_idx, continued_run_idx)