예제 #1
0
def setup_hmm(rg, EM_params):
    G, node_map = hmmconf.rg_to_nx_undirected(rg, map_nodes=True)
    n_states = len(node_map)

    # deal with initial marking and get start probability
    is_inv = lambda t: t.name is None
    startprob = hmmconf.compute_startprob(rg, node_map, n_states, is_inv)
    # add epsilon mass to all states
    startprob += 1e-5
    hmmconf.utils.normalize(startprob, axis=1)

    # remove invisible transitions 
    to_remove = list()
    for t in rg.transitions:
        if is_inv(t):
            to_remove.append(t)

    for t in to_remove:
        rg.transitions.remove(t)
        t.from_state.outgoing.remove(t)
        t.to_state.incoming.remove(t)

    dist_df = hmmconf.compute_distance_matrix(G, node_map, as_dataframe=True)
    distmat = dist_df.values
    # print('Distance df: \n{}'.format(dist_df))

    obsmap = {t.name: int(t.name) for t in rg.transitions}
    int2state = {val:key for key, val in node_map.items()}
    int2obs = {val:key for key, val in obsmap.items()}
    n_obs = len(obsmap)

    logger.info('No. of states: {}'.format(n_states))

    transcube = hmmconf.compute_state_trans_cube(rg, node_map, obsmap, n_obs, n_states)
    emitmat = hmmconf.compute_emission_mat(rg, node_map, obsmap, n_obs, n_states)
    confmat = hmmconf.compute_conformance_mat(emitmat)
    conform_f = hmmconf.conform

    hmm = hmmconf.HMMConf(conform_f, startprob, transcube, emitmat, confmat, distmat, 
                          int2state, int2obs, n_states, n_obs, params='to', verbose=True, 
                          n_jobs=EM_params[N_JOBS], tol=EM_params[TOL], 
                          n_iter=EM_params[N_ITER], random_seed=EM_params[RANDOM_SEED_PARAM])
    return hmm
예제 #2
0
    time_dict[TIME_CONFORM] = took_conform
    time_dict[TIME_N_CASES] = filtered_event_df["caseid"].unique().shape[0]
    time_dict[TIME_N_EVENTS] = filtered_event_df.shape[0]
    logger.info(
        f"Estimating conforming distribution params took: {took_conform:.3f}s")
    transcube, emitmat, conforming_caseids = params
    hmmconf_params = {
        'params': EXPERIMENT_CONFIGS[EM_PARAMS],
        'conf_tol': EXPERIMENT_CONFIGS[CONF_TOL],
        'n_iter': EXPERIMENT_CONFIGS[N_ITER],
        'tol': EXPERIMENT_CONFIGS[TOL],
        'verbose': True,
        'n_procs': EXPERIMENT_CONFIGS[N_JOBS],
        'random_seed': EXPERIMENT_CONFIGS[RANDOM_SEED_PARAM]
    }
    hmm = hmmconf.HMMConf(startprob, transcube, emitmat, confmat, int2state,
                          int2obs, **hmmconf_params)

    int2state_list = list(int2state.items())
    stateid_list, state_list = zip(*int2state_list)
    columns = ['state_id', 'state']
    state_id_df = pd.DataFrame({'state_id': stateid_list, 'state': state_list})
    logger.info(f"State id df: \n{state_id_df}")

    n_caseids = filtered_event_df["caseid"].unique().shape[0]
    filter_by_conforming_caseids = filtered_event_df["caseid"].isin(
        conforming_caseids)
    filtered_train_event_df = filtered_event_df.loc[
        ~filter_by_conforming_caseids, :]
    n_caseids_train = filtered_train_event_df["caseid"].unique().shape[0]
    logger.info(
        f"Filtered train event df shape: {filtered_train_event_df.shape}")