def init(self, *args, **kwargs): # open and initialize the HDF5 file self.wepy_h5 = WepyHDF5(self.file_path, mode=self.mode, topology=self._tmp_topology, units=self.units, sparse_fields=list(self._sparse_fields.keys()), feature_shapes=self._feature_shapes, feature_dtypes=self._feature_dtypes, n_dims=self._n_dims, main_rep_idxs=self.main_rep_idxs, alt_reps=self.alt_reps_idxs) with self.wepy_h5: # if this is a continuation run of another run we want to # initialize it as such continue_run = None # get the run to continue if specified if "continue_run" in kwargs: if kwargs['continue_run'] is not None: continue_run = kwargs['continue_run'] # initialize a new run run_grp = self.wepy_h5.new_run(continue_run=continue_run) self.wepy_run_idx = run_grp.attrs['run_idx'] # initialize the run record groups using their fields self.wepy_h5.init_run_fields_resampling(self.wepy_run_idx, self.resampling_fields) # the enumeration for the values of resampling self.wepy_h5.init_run_fields_resampling_decision(self.wepy_run_idx, self.decision_enum) self.wepy_h5.init_run_fields_resampler(self.wepy_run_idx, self.resampler_fields) # set the fields that are records for tables etc. unless # they are already set if 'resampling' not in self.wepy_h5.record_fields: self.wepy_h5.init_record_fields('resampling', self.resampling_records) if 'resampler' not in self.wepy_h5.record_fields: self.wepy_h5.init_record_fields('resampler', self.resampler_records) # if there were no warping fields set there is no boundary # conditions and we don't initialize them if self.warping_fields is not None: self.wepy_h5.init_run_fields_warping(self.wepy_run_idx, self.warping_fields) self.wepy_h5.init_run_fields_progress(self.wepy_run_idx, self.progress_fields) self.wepy_h5.init_run_fields_bc(self.wepy_run_idx, self.bc_fields) # table records if 'warping' not in self.wepy_h5.record_fields: self.wepy_h5.init_record_fields('warping', self.warping_records) if 'boundary_conditions' not in self.wepy_h5.record_fields: self.wepy_h5.init_record_fields('boundary_conditions', self.bc_records) if 'progress' not in self.wepy_h5.record_fields: self.wepy_h5.init_record_fields('progress', self.progress_records) # if this was opened in a truncation mode, we don't want to # overwrite old runs with future calls to init(). so we # change the mode to read/write 'r+' if self.mode == 'w': self.mode = 'r+'
def analyse(self, num_walkers): wepy_h5 = WepyHDF5(self.hdf5_reporter_path, mode='r') wepy_h5.open() max_x, max_range = self.find_max_range(wepy_h5, num_walkers) hd = wepy_h5.h5 n_cycles = hd['/runs/0/trajectories/0/positions'].shape[0] dimension = hd['/runs/0/trajectories/0/positions'].shape[2] cycle_idxs = [i for i in range(n_cycles)] # set up the RunCycleSlice object for each run rcs = RunCycleSlice(0, cycle_idxs, wepy_h5) data_list = rcs.compute_observable(self.prob_of_cycle, ['positions', 'weights'], int(max_x), map_func=scoop.futures.map, debug_prints=True) cycles_sum = np.zeros(int(max_x)) for data in data_list: cycles_sum += data prob_x = 1 / (n_cycles * dimension) * cycles_sum #calculating the accuracy of x acc = 0 for x in range(int(max_x)): acc += self.accuracy(x, prob_x[x]) # print test data print("Max x= {}".format(max_x)) print("Max range = {}".format(max_range)) print("Probability of x ={}".format(prob_x)) print("accuracy = {}".format(acc)) results = { "max_x": max_x, "max_range": max_range, "px": prob_x, "accuracy": acc } return results
def load_chunk(chunk_spec): wepy_h5 = WepyHDF5(chunk_spec['wepy_h5_path'], mode='r') with wepy_h5: frame_fields = {} for field in chunk_spec['fields']: frame_fields[field] = wepy_h5.get_traj_field( chunk_spec['run_idx'], chunk_spec['traj_idx'], field, frames=chunk_spec['frame_idxs']) # combine the chunk spec with the traj_fields data chunk_spec['traj_fields'] = frame_fields return chunk_spec
def analyse(self, randomwalk_string): """Calculates all quality metrics for the random walk simulation including pridicted probabilities, accuracy, and maximum average range. Parameters ---------- walker: object implementing the Walker interface The individual walker for which dynamics will be propagated. Returns ------- results: dict of str/arraylike """ wepy_h5 = WepyHDF5(self.hdf5_filename, mode='r') wepy_h5.open() # get the number of euns num_runs = wepy_h5.num_runs results = [] for run_idx in range(num_runs): max_range, max_dim_ranges = self.get_max_range(wepy_h5, run_idx) predicted_probabilty = self.get_predicted_probability( wepy_h5, run_idx, int(max_range)) accuracy_value = self.get_accuracy(predicted_probabilty) run_results = { 'max_range': max_range, 'max_dim_range': max_dim_ranges, 'predicted_probabilty': predicted_probabilty, 'accuracy': accuracy_value } results.append(run_results) self.write_report(randomwalk_string, results) return results
def combine_orch_wepy_hdf5s(new_orch, new_hdf5_path, run_ids=None): """ \b Parameters ---------- new_orch : new_hdf5_path : \b Returns ------- """ if run_ids is None: run_ids = new_orch.run_hashes() # we assume that the run we are interested in is the only run in # the WepyHDF5 file so it is index 0 singleton_run_idx = 0 # a key-value for the paths for each run hdf5_paths = {} # go through each run in the new orchestrator for run_id in run_ids: # get the configuration used for this run run_config = new_orch.run_configuration(*run_id) # from that configuration find the WepyHDF5Reporters for reporter in run_config.reporters: if isinstance(reporter, WepyHDF5Reporter): # and save the path for that run hdf5_paths[run_id] = reporter.file_path click.echo("Combining these HDF5 files:") click.echo('\n'.join(hdf5_paths.values())) # now that we have the paths (or lack of paths) for all # the runs we need to start linking them all # together. # first we need a master linker HDF5 to do this with # so load a template WepyHDF5 template_wepy_h5_path = hdf5_paths[run_ids[singleton_run_idx]] template_wepy_h5 = WepyHDF5(template_wepy_h5_path, mode='r') # clone it with template_wepy_h5: master_wepy_h5 = template_wepy_h5.clone(new_hdf5_path, mode='x') click.echo("Into a single master hdf5 file: {}".format(new_hdf5_path)) # then link all the files to it run_mapping = {} for run_id, wepy_h5_path in hdf5_paths.items(): # in the case where continuations were done from # checkpoints then the runs data will potentially (and # most likely) contain extra cycles since checkpoints are # typically produced on some interval of cycles. So, in # order for us to actually piece together contigs we need # to take care of this. # There are two ways to deal with this which can both be # done at the same time. The first is to keep the "nubs", # which are the small leftover pieces after the checkpoint # that ended up getting continued, and make a new run from # the last checkpoint to the end of the nub, in both the # WepyHDF5 and the orchestrator run collections. # The second is to generate a WepyHDF5 run that # corresponds to the run in the checkpoint orchestrator. # To avoid complexity (for now) we opt to simply dispose # of the nubs and assume that not much will be lost from # this. For the typical use case of making multiple # independent and linear contigs this is also the simplest # mode, since the addition of multiple nubs will introduce # an extra spanning contig in the contig tree. # furthermore the nubs provide a source of problems if # rnus were abruptly stopped and data is not written some # of the frames can be corrupted. SO until we know how to # stop this (probably SWMR mode will help) this is also a # reason not to deal with nubs. # TODO: add option to keep nubs in HDF5, and deal with in # orch (you won't be able to have an end snapshot...). # to do this we simply check whether or not the number of # cycles for the run_id are less than the number of cycles # in the corresponding WepyHDF5 run dataset. orch_run_num_cycles = new_orch.run_last_cycle_idx(*run_id) # get the number of cycles that are in the data for the run in # the HDF5 to compare to the number in the orchestrator run # record wepy_h5 = WepyHDF5(wepy_h5_path, mode='r') with wepy_h5: h5_run_num_cycles = wepy_h5.num_run_cycles(singleton_run_idx) # sanity check for if the number of cycles in the # orchestrator is greater than the HDF5 if orch_run_num_cycles > h5_run_num_cycles: raise ValueError("Number of cycles in orch run is more than HDF5."\ "This implies missing data") # copy the run (with the slice) with master_wepy_h5: # TODO: this was the old way of combining where we would # just link, however due to the above discussion this is # not tenable now. In the future there might be some more # complex options taking linking into account but for now # we just don't use it and all runs will be copied by this # operation # # we just link the whole file then sort out the # # continuations later since we aren't necessarily doing # # this in a logical order # new_run_idxs = master_wepy_h5.link_file_runs(wepy_h5_path) # extract the runs from the file (there should only be # one). This means copy the run, but if we only want a # truncation of it we will use the run slice to only get # part of it # so first we generate the run slices for this file using # the number of cycles recorded in the orchestrator run_slices = {singleton_run_idx : (0, orch_run_num_cycles)} click.echo("Extracting Run: {}".format(run_id)) click.echo("Frames 0 to {} out of {}".format( orch_run_num_cycles, h5_run_num_cycles)) # then perform the extraction, which will open the other # file on its own new_run_idxs = master_wepy_h5.extract_file_runs(wepy_h5_path, run_slices=run_slices) # map the hash id to the new run idx created. There should # only be one run in an HDF5 if we are following the # orchestration workflow. assert len(new_run_idxs) < 2, \ "Cannot be more than 1 run per HDF5 file in orchestration workflow" run_mapping[run_id] = new_run_idxs[0] click.echo("Set as run: {}".format(new_run_idxs[0])) click.echo("Done extracting runs, setting continuations") with master_wepy_h5: # now that they are all linked we need to add the snapshot # hashes identifying the runs as metadata. This is so we can # map the simple run indices in the HDF5 back to the # orchestrator defined runs. This will be saved as metadata on # the run. Also: # We need to set the continuations correctly betwen the runs # in different files, so for each run we find the run it # continues in the orchestrator for run_id, run_idx in run_mapping.items(): # set the run snapshot hash metadata except for if we have # already done it try: master_wepy_h5.set_run_start_snapshot_hash(run_idx, run_id[0]) except AttributeError: # it was already set so just move on pass try: master_wepy_h5.set_run_end_snapshot_hash(run_idx, run_id[1]) except AttributeError: # it was already set so just move on pass # find the run_id that this one continues continued_run_id = new_orch.run_continues(*run_id) # if a None is returned then there was no continuation if continued_run_id is None: # so we go to the next run_id and don't log any # continuation continue # get the run_idx in the HDF5 that corresponds to this run continued_run_idx = run_mapping[continued_run_id] click.echo("Run {} continued by {}".format(continued_run_id, run_idx)) # add the continuation master_wepy_h5.add_continuation(run_idx, continued_run_idx)
import numpy as np from wepy.hdf5 import WepyHDF5 file1 = '../outputs/results.wepy.h5' file2 = '../outputs/results_cont0_0.wepy.h5' all_results_file = '../outputs/all_results.wepy.h5' # make a copy of the result hdf5 file to use as a proxy for another # run, first remove the copy so we can remake it #os.remove(file2) copy2(file1, file2) # Load wepy hdf5 file into python script wepy_1_h5 = WepyHDF5(file1, mode='r') wepy_2_h5 = WepyHDF5(file2, mode='r') # we make another WepyHDF5 that will contain both as external links, # so we clone one of the ones we are linking from to get a WepyHDF5 # file with no runs in it, before it is opened with wepy_1_h5: all_wepy_h5 = wepy_1_h5.clone(all_results_file, mode='w') with all_wepy_h5: # link all the file1 runs together preserving continuations file_run_idxs = all_wepy_h5.link_file_runs(file1) # add the continuation run that is in another file run_idx = all_wepy_h5.link_run(file2, 0, continues=0)
import numpy as np from wepy.hdf5 import WepyHDF5 from wepy.resampling.decisions.clone_merge import MultiCloneMergeDecision from wepy.boundary_conditions.unbinding import UnbindingBC from wepy.analysis.transitions import run_transition_probability_matrix from wepy.analysis.network import MacroStateNetwork from wepy.analysis.contig_tree import ContigTree # Load wepy hdf5 file into python script wepy_h5 = WepyHDF5('../outputs/results.wepy.h5', mode='r+') run_idx = 0 assg_key = 'rand_assg_idx' n_classifications = 50 # make random assignments # observable function def rand_assg(fields_d, *args, **kwargs): assignments = np.random.random_integers(0, n_classifications, size=fields_d['weights'].shape[0]) return assignments with wepy_h5: # compute this random assignment "observable" wepy_h5.compute_observable(rand_assg, ['weights'], save_to_hdf5=assg_key,
def init(self, continue_run=None, init_walkers=None, **kwargs): # do the inherited stuff super().init(**kwargs) # open and initialize the HDF5 file logging.info("Initializing HDF5 file at {}".format(self.file_path)) self.wepy_h5 = WepyHDF5(self.file_path, mode=self.mode, topology=self._tmp_topology, units=self.units, sparse_fields=list(self._sparse_fields.keys()), feature_shapes=self._feature_shapes, feature_dtypes=self._feature_dtypes, n_dims=self._n_dims, main_rep_idxs=self.main_rep_idxs, alt_reps=self.alt_reps_idxs) # if we specify save fields only save these for the initial walkers if self.save_fields is not None: state_fields = list(init_walkers[0].state.dict().keys()) # make sure all the save_fields are present in the state assert all([True if save_field in state_fields else False for save_field in self.save_fields]), \ "Not all specified save_fields present in walker states" filtered_init_walkers = [] for walker in init_walkers: # make a new state by filtering the attributes of the old ones state_d = { k: v for k, v in walker.state.dict().items() if k in self.save_fields } # and saving alternate representations as we would # expect them # if there are any alternate representations set them for alt_rep_name, alt_rep_idxs in self.alt_reps_idxs.items(): alt_rep_path = 'alt_reps/{}'.format(alt_rep_name) # if the idxs are None we want all of the atoms if alt_rep_idxs is None: state_d[alt_rep_path] = state_d['positions'][:] # otherwise get only the atoms we want else: state_d[alt_rep_path] = state_d['positions'][ alt_rep_idxs] # if the main rep is different then the full state # positions set that if self.main_rep_idxs is not None: state_d['positions'] = state_d['positions'][ self.main_rep_idxs] # then making the new state new_state = WalkerState(**state_d) filtered_init_walkers.append(Walker(new_state, walker.weight)) # otherwise save the full state else: filtered_init_walkers = init_walkers self.wepy_h5.set_mode(mode='r+') with self.wepy_h5: # if this is a continuation run of another run we want to # initialize it as such # initialize a new run run_grp = self.wepy_h5.new_run(filtered_init_walkers, continue_run=continue_run) self.wepy_run_idx = run_grp.attrs['run_idx'] # initialize the run record groups using their fields self.wepy_h5.init_run_fields_resampling(self.wepy_run_idx, self.resampling_fields) # the enumeration for the values of resampling self.wepy_h5.init_run_fields_resampling_decision( self.wepy_run_idx, self.decision_enum) self.wepy_h5.init_run_fields_resampler(self.wepy_run_idx, self.resampler_fields) # set the fields that are records for tables etc. unless # they are already set if 'resampling' not in self.wepy_h5.record_fields: self.wepy_h5.init_record_fields('resampling', self.resampling_records) if 'resampler' not in self.wepy_h5.record_fields: self.wepy_h5.init_record_fields('resampler', self.resampler_records) # if there were no warping fields set there is no boundary # conditions and we don't initialize them if self.warping_fields is not None: self.wepy_h5.init_run_fields_warping(self.wepy_run_idx, self.warping_fields) self.wepy_h5.init_run_fields_progress(self.wepy_run_idx, self.progress_fields) self.wepy_h5.init_run_fields_bc(self.wepy_run_idx, self.bc_fields) # table records if 'warping' not in self.wepy_h5.record_fields: self.wepy_h5.init_record_fields('warping', self.warping_records) if 'boundary_conditions' not in self.wepy_h5.record_fields: self.wepy_h5.init_record_fields('boundary_conditions', self.bc_records) if 'progress' not in self.wepy_h5.record_fields: self.wepy_h5.init_record_fields('progress', self.progress_records) # if this was opened in a truncation mode, we don't want to # overwrite old runs with future calls to init(). so we # change the mode to read/write 'r+' if self.mode == 'w': self.set_mode(0, 'r+')
def traj_field_lj_dist(traj_data): positions = traj_data['positions'] # slice out positions for each LJ particle lj1 = positions[:, 0, :] lj2 = positions[:, 1, :] # compute distances distances = np.sqrt((lj1[:, 0] - lj2[:, 0])**2 + (lj1[:, 1] - lj2[:, 1])**2 + (lj1[:, 2] - lj2[:, 2])**2) return distances if __name__ == "__main__": from wepy.hdf5 import WepyHDF5 # load the HDF5 file in read/write so we can save data to the # observables wepy_hdf5_path = "../outputs/results.wepy.h5" wepy_h5 = WepyHDF5(wepy_hdf5_path, mode='r+') print('test') with wepy_h5: wepy_h5.compute_observable(traj_field_lj_dist, ['positions'], save_to_hdf5='rmsd', map_func=map, debug_prints=True)
from wepy.resampling.resamplers.wexplore import WExploreResampler if sys.argv[1] == '-h' or sys.argv[1] == '--help': print("walker_lineage.py run_index walker_index output_DCD_path") else: run_idx = int(sys.argv[1]) walker_idx = int(sys.argv[2]) dcd_path = sys.argv[3] outputs_dir = osp.realpath('../outputs') hdf5_filename = 'results.wepy.h5' hdf5_path = osp.join(outputs_dir, hdf5_filename) wepy_h5 = WepyHDF5(hdf5_path, mode='r') wepy_h5.open() cycle_idx = wepy_h5.traj(run_idx, walker_idx)['positions'].shape[0] - 1 resampling_panel = wepy_h5.run_resampling_panel(run_idx) parent_panel = WExploreResampler.DECISION.parent_panel(resampling_panel) parent_table = WExploreResampler.DECISION.net_parent_table(parent_panel) lineage = ancestors(parent_table, cycle_idx, walker_idx) mdj_traj = wepy_h5.run_trace_to_mdtraj(run_idx, lineage) mdj_traj.save_dcd(dcd_path)
def traj_fields_chunk_items(wepy_h5_path, fields, run_idxs=Ellipsis, chunk_size=Ellipsis): """Generate items that can be used to create a dask.bag object. Arguments --------- wepy_h5_path : str The file path to the WepyHDF5 file that will be read from. fields : list of str The field names/paths for the data to be retrieved. chunk_size : int This is the size of the chunk (i.e. number of frames) that will be retrieved from each trajectory. This is the unit of data for which a single task will work on. Dask will also partition these chunks as it sees fit. Returns ------- chunk_specs : list of dict of str : value """ # open the HDF5 try: wepy_h5 = WepyHDF5(wepy_h5_path, mode='r') except OSError: print("Failed to open HDF5") return None with wepy_h5: # choose the run idxs if run_idxs is not Ellipsis: assert all([run_idx in wepy_h5.run_idxs for run_idx in run_idxs]), "run_idx not in runs" else: run_idxs = wepy_h5.run_idxs chunk_specs = [] for run_idx in run_idxs: for traj_idx in wepy_h5.run_traj_idxs(run_idx): num_frames = wepy_h5.num_traj_frames(run_idx, traj_idx) # determine the specific frame indices in the chunks # if the chunk size is either larger than the # trajectory, or chunk size is Ellipsis we take the # whole trajectory if chunk_size is Ellipsis: chunks = [range(num_frames)] elif chunk_size > num_frames: chunks = [range(num_frames)] else: # split it allowing for an unequal chunk sizes chunks = np.array_split(range(num_frames), num_frames // chunk_size) for frame_idxs in chunks: chunk_spec = { 'wepy_h5_path': wepy_h5_path, 'run_idx': run_idx, 'traj_idx': traj_idx, 'frame_idxs': frame_idxs, 'fields': fields, } chunk_specs.append(chunk_spec) return chunk_specs
def fe_calc(file_list, n_bins, k=2000): # NUMERATOR # for every cycle, do the binning and averaging for all the walkers and sum up for all times # This calculates G0 in eq [8] from Hummer, Szabo PNAS 2001 98, 7 # # G0(z) = -1/beta ln[ (sum_t (term1 / term2)) / ( sum_t (term3 / term2)) ] # # where # # term1 = <delta(z-z_t) exp(-beta w_t) > # specific to z and t # term2 = <exp(-beta w_t)> # specific to t # term3 = exp[-beta u(z,t)] # specific to z and t # numer = np.zeros((n_bins)) denom = np.zeros((n_bins)) g0 = np.zeros((n_bins)) n_g0 = np.zeros((n_bins)) d_values = [(i + 0.5) * (d_max - d_min) / n_bins for i in range(n_bins)] # initialize variables term1 = np.zeros((n_bins, n_cycles)) term2 = np.zeros((n_cycles)) norm = np.zeros((n_cycles)) n_part = 2 n_dim = 3 n_walkers = walkers positions = np.zeros((n_walkers, n_cycles, n_part, n_dim)) work_values = np.zeros((n_walkers, n_cycles)) weights = np.zeros((n_walkers, n_cycles)) for index, value in enumerate(file_list): wepy_h5 = WepyHDF5(value, mode='r') wepy_h5.open() for j in range(n_walkers): positions[j] = np.array(wepy_h5.h5['runs/0/trajectories/' + str(j) + '/positions']) work_values[j] = np.array( wepy_h5.h5['runs/0/trajectories/' + str(j) + '/activity']).reshape((n_cycles)) weights[j] = np.array(wepy_h5.h5['runs/0/trajectories/' + str(j) + '/weights']).reshape((n_cycles)) for cycle in range(n_cycles): # these lists have all the distances, work values, and weights for cycle i ds_cyc = [] work_cyc = work_values[:, cycle] weight_cyc = weights[:, cycle] for j in range(n_walkers): # get distances p = positions[j, cycle] tmp = vec_dist(p[0], p[1], j, cycle) ds_cyc.append(tmp) e_mbwt = np.exp(-beta * np.array(work_cyc)) for j, d in enumerate(ds_cyc): # find out which bin it's in bin_id = int((d - d_min) / (d_max - d_min) * n_bins) term1[bin_id][cycle] += weight_cyc[j] * e_mbwt[j] term2[cycle] += weight_cyc[j] * e_mbwt[j] norm[cycle] += weight_cyc[j] # end of loop over cycles # terms have been computed, add to the running sums over timepoints for b in range(n_bins): numer[b] = 0 for cycle in range(n_cycles): numer[b] += term1[b][cycle] / term2[cycle] # Note: get_bias_value returns term3 # need to use cycle+1 so the d0 matches the work values term3 = np.exp(-beta * get_bias_value(d_values[b], cycle + 1, k)) denom[b] += term3 / (term2[cycle] / norm[cycle]) wepy_h5.close() g0_no_gaps = [] d_values_no_gaps = [] for b in range(n_bins): if numer[b] > 0 and denom[b] > 0: g0_no_gaps.append(-np.log(numer[b] / denom[b]) / beta) d_values_no_gaps.append(d_values[b]) g0_arr = np.array(g0_no_gaps) d_val_arr = np.array(d_values_no_gaps) plt.plot(d_values_no_gaps, g0_arr - g0_arr.min(), label='FES') return d_values_no_gaps, g0_no_gaps, g0_arr, d_val_arr
from pathlib import Path import numpy as np from wepy.hdf5 import WepyHDF5 from wepy.resampling.decisions.clone_merge import MultiCloneMergeDecision from wepy.boundary_conditions.unbinding import UnbindingBC from wepy.analysis.transitions import run_transition_probability_matrix from wepy.analysis.network import MacroStateNetwork from wepy.analysis.contig_tree import ContigTree output_dir = Path('_output') sim_dir = output_dir / 'we' # Load wepy hdf5 file into python script wepy_h5 = WepyHDF5(sim_dir / 'results.wepy.h5', mode='r+') run_idx = 0 assg_key = 'rand_assg_idx' n_classifications = 4 random_seed = 1 np.random.seed(random_seed) # make random assignments # observable function def rand_assg(fields_d, *args, **kwargs): assignments = np.random.randint(0, n_classifications, size=fields_d['weights'].shape)
def combine_orch_wepy_hdf5s(new_orch, new_hdf5_path): """ Parameters ---------- new_orch : new_hdf5_path : Returns ------- """ # a key-value for the paths for each run hdf5_paths = {} # go through each run in the new orchestrator for run_id in new_orch.runs: # get the configuration used for this run run_config = new_orch.run_configuration(*run_id) # from that configuration find the WepyHDF5Reporters for reporter in run_config.reporters: if isinstance(reporter, WepyHDF5Reporter): # and save the path for that run hdf5_paths[run_id] = reporter.file_path # now that we have the paths (or lack of paths) for all # the runs we need to start linking them all # together. # first we need a master linker HDF5 to do this with # so load a template WepyHDF5 template_wepy_h5_path = hdf5_paths[new_orch.runs[0]] template_wepy_h5 = WepyHDF5(template_wepy_h5_path, mode='r') # clone it with template_wepy_h5: master_wepy_h5 = template_wepy_h5.clone(new_hdf5_path, mode='x') with master_wepy_h5: # then link all the files to it run_mapping = {} for run_id, wepy_h5_path in hdf5_paths.items(): # we just link the whole file then sort out the # continuations later since we aren't necessarily doing # this in a logical order new_run_idxs = master_wepy_h5.link_file_runs(wepy_h5_path) # map the hash id to the new run idx created. There should # only be one run in an HDF5 if we are following the # orchestration workflow. assert len(new_run_idxs) < 2, \ "Cannot be more than 1 run per HDF5 file in orchestration workflow" run_mapping[run_id] = new_run_idxs[0] # now that they are all linked we need to add the snapshot # hashes identifying the runs as metadata. This is so we can # map the simple run indices in the HDF5 back to the # orchestrator defined runs. This will be saved as metadata on # the run. Also: # We need to set the continuations correctly betwen the runs # in different files, so for each run we find the run it # continues in the orchestrator for run_id, run_idx in run_mapping.items(): # set the run snapshot hash metadata except for if we have # already done it try: master_wepy_h5.set_run_start_snapshot_hash(run_idx, run_id[0]) except AttributeError: # it was already set so just move on pass try: master_wepy_h5.set_run_end_snapshot_hash(run_idx, run_id[1]) except AttributeError: # it was already set so just move on pass # find the run_id that this one continues continued_run_id = new_orch.run_continues(*run_id) # if a None is returned then there was no continuation if continued_run_id is None: # so we go to the next run_id and don't log any # continuation continue # get the run_idx in the HDF5 that corresponds to this run continued_run_idx = run_mapping[continued_run_id] # add the continuation master_wepy_h5.add_continuation(run_idx, continued_run_idx)