n_caseids = filtered_event_df["caseid"].unique().shape[0] filter_by_conforming_caseids = filtered_event_df["caseid"].isin( conforming_caseids) filtered_train_event_df = filtered_event_df.loc[ ~filter_by_conforming_caseids, :] n_caseids_train = filtered_train_event_df["caseid"].unique().shape[0] logger.info( f"Filtered train event df shape: {filtered_train_event_df.shape}") logger.info( f'Fitting with {n_caseids_train}/{n_caseids} non-conforming cases') train_X, train_lengths = event_df_to_hmm_format(filtered_train_event_df) logger.info(f"Starting training...") start_fit = time.time() tracker = hmmconf.ConformanceTracker( hmm, max_n_case=EXPERIMENT_CONFIGS[MAX_N_CASE]) tracker.hmm.fit(train_X, train_lengths) took_fit = time.time() - start_fit time_dict[TIME_FIT] = took_fit time_dict[TIME_N_TRAIN_CASES] = n_caseids_train time_dict[TIME_N_TRAIN_EVENTS] = filtered_train_event_df.shape[0] info_msg = f"Training using {n_caseids_train} cases took: {took_fit:.3f}s" info_msg += f" ({took_fit / 60:.0f} mins {took_fit % 60:.0f} secs)" logger.info(info_msg) took_all = time.time() - start_all logger.info(f"Took: {took_all / 60:.0f} mins {took_all % 60:.0f} secs") time_dict[TIME_ALL] = took_all time_fp = os.path.join(results_dir, "time_results.csv") time_df = pd.DataFrame(time_dict, index=[0])
def make_conformance_tracker(hmm): return hmmconf.ConformanceTracker(hmm, max_n_case=100000)