def preprocess(): for log in get_event_log_files(): print(log.name) data = Dataset(log.name) data.event_log.save_csv("../Data/Nolle_Data/" + log.name + "_data.csv") with open("../Data/Nolle_Data/" + log.name + "_labels.csv", "w") as fout: for label in data.text_labels: if label == "Normal": fout.write("0\n") else: fout.write("1\n")
'bpic13-0.0-2.json.gz', 'bpic13-0.0-3.json.gz', 'bpic15-0.0-1.json.gz', 'bpic15-0.0-2.json.gz', 'bpic15-0.0-3.json.gz', 'bpic15-0.0-4.json.gz', 'bpic15-0.0-5.json.gz', 'bpic12-0.0-0.json.gz', 'bpic17-0.0-1.json.gz', 'bpic17-0.0-2.json.gz' ] np.random.seed(0) # This will ensure reproducibility ps = [0.3] event_log_paths = [ e.path for e in get_event_log_files(EVENTLOG_DIR) if 'bpic' in e.name and e.p == 0.0 ] combinations = list(itertools.product(event_log_paths, ps)) for event_log_path, p in tqdm(combinations, desc='Add anomalies'): event_log_file = EventLogFile(event_log_path) event_log = EventLog.from_json(event_log_path) anomalies = [ ReplaceAnomaly(max_replacements=1), SkipSequenceAnomaly(max_sequence_size=2), ReworkAnomaly(max_distance=5, max_sequence_size=3), EarlyAnomaly(max_distance=5, max_sequence_size=2), LateAnomaly(max_distance=5, max_sequence_size=2), InsertAnomaly(max_inserts=2)
Model(creation_date=end_time.datetime, algorithm=ad.name, training_duration=training_time, file_name=model_file.file, training_event_log_id=EventLog.get_id_by_name(dataset_name), training_host=socket.gethostname(), hyperparameters=str(dict(**ad_kwargs, **fit_kwargs)))) session.commit() session.close() if isinstance(ad, NNAnomalyDetector): from keras.backend import clear_session clear_session() datasets = sorted([e.name for e in get_event_log_files() if e.p == 0.01]) print(datasets) ads = [ dict(ad=OneClassSVM), dict(ad=NaiveAnomalyDetector), dict(ad=SamplingAnomalyDetector), dict(ad=DAE, fit_kwargs=dict(epochs=50, batch_size=500)), dict(ad=BINetv1, fit_kwargs=dict(epochs=20, batch_size=500)) ] for ad in ads: [fit_and_save(d, **ad) for d in tqdm(datasets, desc=ad['ad'].name)] ####################################################################################################### import itertools from multiprocessing.pool import Pool
json_files = [ 'largelog-0.0-0.json.gz', 'smalllog-0.0-0.json.gz' ] for xes_file, json_file in tqdm(list(zip(xes_files, json_files))): event_log = EventLog.from_xes(os.path.join(BPIC_DIR, xes_file)) event_log.save_json(os.path.join(EVENTLOG_DIR, json_file)) #Add anomalies for k in range(0,10): np.random.seed(k) # This will ensure reproducibility ps = [0.3] event_log_paths = [e.path for e in get_event_log_files(EVENTLOG_DIR) if 'log' in e.name and e.p == 0.0] combinations = list(itertools.product(event_log_paths, ps)) for event_log_path, p in tqdm(combinations, desc='Add anomalies'): event_log_file = EventLogFile(event_log_path) event_log = EventLog.from_json(event_log_path) anomalies = [ ReplaceAnomaly(max_replacements=1) ] for anomaly in anomalies: # This is necessary to initialize the likelihood graph correctly anomaly.activities = event_log.unique_activities #anomaly.attributes = [CategoricalAttributeGenerator(name=name, values=values) for name, values in # event_log.unique_attribute_values.items() if name != 'name']
process_models = [ m for m in get_process_model_files() if 'testing' not in m and 'paper' not in m ] for process_model in tqdm(process_models, desc='Generate'): generate_for_process_model(process_model, size=5000, anomalies=anomalies, num_attr=[1, 2, 3, 4], seed=1337) ## Add Anomalies np.random.seed(0) # This will ensure reproducibility ps = [0.3] event_log_paths = [ e.path for e in get_event_log_files(EVENTLOG_DIR) if 'bpic' in e.name and e.p == 0.0 ] combinations = list(itertools.product(event_log_paths, ps)) for event_log_path, p in tqdm(combinations, desc='Add anomalies'): event_log_file = EventLogFile(event_log_path) event_log = april.processmining.log.EventLog.from_json(event_log_path) anomalies = [ SkipSequenceAnomaly(max_sequence_size=2), ReworkAnomaly(max_distance=5, max_sequence_size=3), EarlyAnomaly(max_distance=5, max_sequence_size=2), LateAnomaly(max_distance=5, max_sequence_size=2), InsertAnomaly(max_inserts=2), ]