def setup_hmm(rg, EM_params): G, node_map = hmmconf.rg_to_nx_undirected(rg, map_nodes=True) n_states = len(node_map) # deal with initial marking and get start probability is_inv = lambda t: t.name is None startprob = hmmconf.compute_startprob(rg, node_map, n_states, is_inv) # add epsilon mass to all states startprob += 1e-5 hmmconf.utils.normalize(startprob, axis=1) # remove invisible transitions to_remove = list() for t in rg.transitions: if is_inv(t): to_remove.append(t) for t in to_remove: rg.transitions.remove(t) t.from_state.outgoing.remove(t) t.to_state.incoming.remove(t) dist_df = hmmconf.compute_distance_matrix(G, node_map, as_dataframe=True) distmat = dist_df.values # print('Distance df: \n{}'.format(dist_df)) obsmap = {t.name: int(t.name) for t in rg.transitions} int2state = {val:key for key, val in node_map.items()} int2obs = {val:key for key, val in obsmap.items()} n_obs = len(obsmap) logger.info('No. of states: {}'.format(n_states)) transcube = hmmconf.compute_state_trans_cube(rg, node_map, obsmap, n_obs, n_states) emitmat = hmmconf.compute_emission_mat(rg, node_map, obsmap, n_obs, n_states) confmat = hmmconf.compute_conformance_mat(emitmat) conform_f = hmmconf.conform hmm = hmmconf.HMMConf(conform_f, startprob, transcube, emitmat, confmat, distmat, int2state, int2obs, n_states, n_obs, params='to', verbose=True, n_jobs=EM_params[N_JOBS], tol=EM_params[TOL], n_iter=EM_params[N_ITER], random_seed=EM_params[RANDOM_SEED_PARAM]) return hmm
time_dict[TIME_CONFORM] = took_conform time_dict[TIME_N_CASES] = filtered_event_df["caseid"].unique().shape[0] time_dict[TIME_N_EVENTS] = filtered_event_df.shape[0] logger.info( f"Estimating conforming distribution params took: {took_conform:.3f}s") transcube, emitmat, conforming_caseids = params hmmconf_params = { 'params': EXPERIMENT_CONFIGS[EM_PARAMS], 'conf_tol': EXPERIMENT_CONFIGS[CONF_TOL], 'n_iter': EXPERIMENT_CONFIGS[N_ITER], 'tol': EXPERIMENT_CONFIGS[TOL], 'verbose': True, 'n_procs': EXPERIMENT_CONFIGS[N_JOBS], 'random_seed': EXPERIMENT_CONFIGS[RANDOM_SEED_PARAM] } hmm = hmmconf.HMMConf(startprob, transcube, emitmat, confmat, int2state, int2obs, **hmmconf_params) int2state_list = list(int2state.items()) stateid_list, state_list = zip(*int2state_list) columns = ['state_id', 'state'] state_id_df = pd.DataFrame({'state_id': stateid_list, 'state': state_list}) logger.info(f"State id df: \n{state_id_df}") n_caseids = filtered_event_df["caseid"].unique().shape[0] filter_by_conforming_caseids = filtered_event_df["caseid"].isin( conforming_caseids) filtered_train_event_df = filtered_event_df.loc[ ~filter_by_conforming_caseids, :] n_caseids_train = filtered_train_event_df["caseid"].unique().shape[0] logger.info( f"Filtered train event df shape: {filtered_train_event_df.shape}")