def asim_households(asim_store, households_sample_size, trace_hh_id): df_full = asim_store["households"] # if we are tracing hh exclusively if trace_hh_id and households_sample_size == 1: # df contains only trace_hh (or empty if not in full store) df = tracing.slice_ids(df_full, trace_hh_id) # if we need sample a subset of full store elif households_sample_size > 0 and \ len(df_full.index) > households_sample_size: # take the requested random sample df = asim_simulate.random_rows(df_full, households_sample_size) # if tracing and we missed trace_hh in sample, but it is in full store if trace_hh_id and trace_hh_id not in df.index and \ trace_hh_id in df_full.index: # replace first hh in sample with trace_hh print( "replacing household %s with %s in household sample" % (df.index[0], trace_hh_id)) df_hh = tracing.slice_ids(df_full, trace_hh_id) df = pd.concat([df_hh, df[1:]]) else: df = df_full print("loaded households %s" % (df.shape,)) # replace table function with dataframe orca.add_table('asim_households', df) asim_utils.get_rn_generator().add_channel(df, 'asim_households') if trace_hh_id: tracing.register_traceable_table('asim_households', df) tracing.trace_df(df, "asim_households", warn_if_empty=True) return df
def asim_persons(asim_store, households_sample_size, asim_households, trace_hh_id): df = asim_store["persons"] if households_sample_size > 0: # keep all persons in the sampled households df = df[df.household_id.isin(asim_households.index)] print("loaded asim asim_persons %s" % (df.shape,)) # replace table function with dataframe orca.add_table('asim_persons', df) asim_utils.get_rn_generator().add_channel(df, 'asim_persons') if trace_hh_id: tracing.register_traceable_table('asim_persons', df) tracing.trace_df(df, "asim_persons", warn_if_empty=True) return df
def load_checkpoint(checkpoint_name): """ Load dataframes and restore random number channel state from pipeline hdf5 file. This restores the pipeline state that existed at the specified checkpoint in a prior simulation. This allows us to resume the simulation after the specified checkpoint Parameters ---------- checkpoint_name : str model_name of checkpoint to load (resume_after argument to start_pipeline) """ logger.info("load_checkpoint %s" % (checkpoint_name)) checkpoints = read_df(_CHECKPOINT_TABLE_NAME) try: # truncate rows after target checkpoint i = checkpoints[checkpoints[_CHECKPOINT_NAME] == checkpoint_name].index[0] checkpoints = checkpoints.loc[:i] except IndexError: msg = "Couldn't find checkpoint '%s' in checkpoints" % ( checkpoint_name, ) logger.error(msg) raise RuntimeError(msg) # convert pandas dataframe back to array of checkpoint dicts checkpoints = checkpoints.to_dict(orient='records') # drop tables with empty names for checkpoint in checkpoints: for key in checkpoint.keys(): if key not in _NON_TABLE_COLUMNS and not checkpoint[key]: del checkpoint[key] # patch _CHECKPOINTS array of dicts del _CHECKPOINTS[:] _CHECKPOINTS.extend(checkpoints) # patch _CHECKPOINTS dict with latest checkpoint info _LAST_CHECKPOINT.clear() _LAST_CHECKPOINT.update(_CHECKPOINTS[-1]) logger.info("load_checkpoint %s timestamp %s" % (checkpoint_name, _LAST_CHECKPOINT['timestamp'])) # table names in order that tracing.register_traceable_table wants us to register them tables = tracing.sort_for_registration(checkpointed_tables()) for table_name in tables: # read dataframe from pipeline store df = read_df(table_name, checkpoint_name=_LAST_CHECKPOINT[table_name]) logger.info("load_checkpoint table %s %s" % (table_name, df.shape)) # register it as an orca table rewrap(table_name, df) # register for tracing tracing.register_traceable_table(table_name, df) # set random state to pickled state at end of last checkpoint logger.debug("resetting random state") _PRNG.load_channels(cPickle.loads(_LAST_CHECKPOINT[_PRNG_CHANNELS]))