コード例 #1
0
def asim_households(asim_store, households_sample_size, trace_hh_id):

    df_full = asim_store["households"]

    # if we are tracing hh exclusively
    if trace_hh_id and households_sample_size == 1:

        # df contains only trace_hh (or empty if not in full store)
        df = tracing.slice_ids(df_full, trace_hh_id)

    # if we need sample a subset of full store
    elif households_sample_size > 0 and \
            len(df_full.index) > households_sample_size:

        # take the requested random sample
        df = asim_simulate.random_rows(df_full, households_sample_size)

        # if tracing and we missed trace_hh in sample, but it is in full store
        if trace_hh_id and trace_hh_id not in df.index and \
                trace_hh_id in df_full.index:
            # replace first hh in sample with trace_hh
            print(
                "replacing household %s with %s in household sample" %
                (df.index[0], trace_hh_id))
            df_hh = tracing.slice_ids(df_full, trace_hh_id)
            df = pd.concat([df_hh, df[1:]])

    else:
        df = df_full

    print("loaded households %s" % (df.shape,))

    # replace table function with dataframe
    orca.add_table('asim_households', df)

    asim_utils.get_rn_generator().add_channel(df, 'asim_households')

    if trace_hh_id:
        tracing.register_traceable_table('asim_households', df)
        tracing.trace_df(df, "asim_households", warn_if_empty=True)

    return df
コード例 #2
0
def asim_persons(asim_store, households_sample_size, asim_households,
                 trace_hh_id):

    df = asim_store["persons"]

    if households_sample_size > 0:
        # keep all persons in the sampled households
        df = df[df.household_id.isin(asim_households.index)]

    print("loaded asim asim_persons %s" % (df.shape,))

    # replace table function with dataframe
    orca.add_table('asim_persons', df)

    asim_utils.get_rn_generator().add_channel(df, 'asim_persons')

    if trace_hh_id:
        tracing.register_traceable_table('asim_persons', df)
        tracing.trace_df(df, "asim_persons", warn_if_empty=True)

    return df
コード例 #3
0
def load_checkpoint(checkpoint_name):
    """
    Load dataframes and restore random number channel state from pipeline hdf5 file.
    This restores the pipeline state that existed at the specified checkpoint in a prior simulation.
    This allows us to resume the simulation after the specified checkpoint

    Parameters
    ----------
    checkpoint_name : str
        model_name of checkpoint to load (resume_after argument to start_pipeline)
    """

    logger.info("load_checkpoint %s" % (checkpoint_name))

    checkpoints = read_df(_CHECKPOINT_TABLE_NAME)

    try:
        # truncate rows after target checkpoint
        i = checkpoints[checkpoints[_CHECKPOINT_NAME] ==
                        checkpoint_name].index[0]
        checkpoints = checkpoints.loc[:i]
    except IndexError:
        msg = "Couldn't find checkpoint '%s' in checkpoints" % (
            checkpoint_name, )
        logger.error(msg)
        raise RuntimeError(msg)

    # convert pandas dataframe back to array of checkpoint dicts
    checkpoints = checkpoints.to_dict(orient='records')

    # drop tables with empty names
    for checkpoint in checkpoints:
        for key in checkpoint.keys():
            if key not in _NON_TABLE_COLUMNS and not checkpoint[key]:
                del checkpoint[key]

    # patch _CHECKPOINTS array of dicts
    del _CHECKPOINTS[:]
    _CHECKPOINTS.extend(checkpoints)

    # patch _CHECKPOINTS dict with latest checkpoint info
    _LAST_CHECKPOINT.clear()
    _LAST_CHECKPOINT.update(_CHECKPOINTS[-1])

    logger.info("load_checkpoint %s timestamp %s" %
                (checkpoint_name, _LAST_CHECKPOINT['timestamp']))

    # table names in order that tracing.register_traceable_table wants us to register them
    tables = tracing.sort_for_registration(checkpointed_tables())

    for table_name in tables:
        # read dataframe from pipeline store
        df = read_df(table_name, checkpoint_name=_LAST_CHECKPOINT[table_name])
        logger.info("load_checkpoint table %s %s" % (table_name, df.shape))
        # register it as an orca table
        rewrap(table_name, df)
        # register for tracing
        tracing.register_traceable_table(table_name, df)

    # set random state to pickled state at end of last checkpoint
    logger.debug("resetting random state")
    _PRNG.load_channels(cPickle.loads(_LAST_CHECKPOINT[_PRNG_CHANNELS]))