def setup_data(model_path: Path, log_path: Path): settings = Configuration(model_path=Path(model_path), log_path=Path(log_path)) settings.fill_in_derived_fields() log = LogReader(log_path) graph = BPMNGraph.from_bpmn_path(model_path) return graph, log, settings
def simulate(settings: Configuration, log_data, evaluate_fn: Callable = None): """General simulation function that takes in different simulators and evaluators.""" if evaluate_fn is None: evaluate_fn = evaluate_logs if isinstance(settings, dict): settings = Configuration(**settings) # Simulator choice based on configuration if settings.simulator is SimulatorKind.BIMP: raise NotImplementedError('BIMP simulator is not implemented') elif settings.simulator is SimulatorKind.CUSTOM: simulate_fn = diffresbp_simulator settings.read_options.column_names = { 'CaseID': 'caseid', 'Activity': 'task', 'EnableTimestamp': 'enabled_timestamp', 'StartTimestamp': 'start_timestamp', 'EndTimestamp': 'end_timestamp', 'Resource': 'user' } else: raise ValueError(f'Unknown simulator {settings.simulator}') # Number of cases to simulate n_cases = len(log_data.caseid.unique()) settings.simulation_cases = n_cases reps = settings.repetitions cpu_count = multiprocessing.cpu_count() w_count = reps if reps <= cpu_count else cpu_count pool = multiprocessing.Pool(processes=w_count) # Simulate args = [(settings, rep) for rep in range(reps)] p = pool.map_async(simulate_fn, args) progress_bar_async(p, 'simulating', reps) # Read simulated logs p = pool.map_async(_read_stats_alt, args) progress_bar_async(p, 'reading simulated logs', reps) # Evaluate args = [(settings, log_data, log) for log in p.get()] if n_cases > 1000: pool.close() results = [ evaluate_fn(arg) for arg in tqdm(args, 'evaluating results') ] sim_values = list(itertools.chain(*results)) else: p = pool.map_async(evaluate_fn, args) progress_bar_async(p, 'evaluating results', reps) pool.close() sim_values = list(itertools.chain(*p.get())) return sim_values
def discover(ctx, config_path): repository_dir = get_project_dir() ctx.params['config_path'] = repository_dir.joinpath(config_path) config_data = config_data_from_file(config_path) config_data.update(ctx.params) config = Configuration(**config_data) config.fill_in_derived_fields() discoverer = Discoverer(config) discoverer.run()
def test_AndPriorORemove_default(entry_point): config_path = entry_point / 'optimize_debug_config_2.yml' config = config_data_from_file(config_path) assert config['strc'] is not None structure_config = Configuration(**config['strc']) assert structure_config.and_prior == [AndPriorORemove.FALSE] assert structure_config.or_rep == [AndPriorORemove.FALSE]
def test_remove_outliers(args): for arg in args: settings = Configuration() log_path = arg['log_path'] log = LogReader(log_path) print(f'Running test for {log_path}') result = remove_outliers(log.df) assert result is not None assert 'caseid' in result.keys() assert 'duration_seconds' not in result.keys()
def test_splitminer(entry_point, arg, tmp_path): log_path = entry_point / arg['log_path'] read_options = arg['read_options'] log = LogReader(log_path) assert len(log.data) != 0 config = Configuration() config.project_name = os.path.basename(log_path).split('.')[0] config.read_options = read_options config.output = tmp_path output_path = config.output / (config.project_name + '.xes') write_xes(log, output_path) print(tmp_path.absolute()) config.epsilon = 0.5 config.eta = 0.5 config.and_prior = AndPriorORemove.FALSE config.or_rep = AndPriorORemove.FALSE exit_code = StructureMiner._sm3_miner( log_path, config) # TODO: make _sm3_miner to return exit code
def test_diffresbp_simulator(entry_point, arg): qbp_path = entry_point / arg['qbp_path'] config = Configuration() config.output = qbp_path.parent config.project_name, _ = os.path.splitext(qbp_path.name) config.repetitions = 1 config.simulation_cases = get_number_of_cases(qbp_path) diffresbp_simulator((config, config.repetitions)) json_path = qbp_path.with_suffix('.json') assert json_path.exists() output_dir = config.output / 'sim_data' shutil.rmtree(output_dir)
def optimize(ctx, config_path): repository_dir = get_project_dir() ctx.params['config_path'] = repository_dir.joinpath(config_path) config_data = config_data_from_file(config_path) config_data.update(ctx.params) strc_data = config_data.pop('strc') tm_data = config_data.pop('tm') global_data = config_data global_config = Configuration(**global_data) global_config.fill_in_derived_fields() strc_data.update(global_data) structure_optimizer_config = Configuration(**strc_data) structure_optimizer_config.fill_in_derived_fields() tm_data.update(global_data) time_optimizer_config = Configuration(**tm_data) time_optimizer_config.fill_in_derived_fields() optimizer = Optimizer({'gl': global_config, 'strc': structure_optimizer_config, 'tm': time_optimizer_config}) optimizer.run(discover_model=global_config.model_path is None)