Beispiel #1
0
def preprocess():
    for log in get_event_log_files():
        print(log.name)
        data = Dataset(log.name)

        data.event_log.save_csv("../Data/Nolle_Data/" + log.name + "_data.csv")
        with open("../Data/Nolle_Data/" + log.name + "_labels.csv",
                  "w") as fout:
            for label in data.text_labels:
                if label == "Normal":
                    fout.write("0\n")
                else:
                    fout.write("1\n")
Beispiel #2
0
    'bpic13-0.0-2.json.gz',
    'bpic13-0.0-3.json.gz',
    'bpic15-0.0-1.json.gz',
    'bpic15-0.0-2.json.gz',
    'bpic15-0.0-3.json.gz',
    'bpic15-0.0-4.json.gz',
    'bpic15-0.0-5.json.gz',
    'bpic12-0.0-0.json.gz',
    'bpic17-0.0-1.json.gz',
    'bpic17-0.0-2.json.gz'
]

np.random.seed(0)  # This will ensure reproducibility
ps = [0.3]
event_log_paths = [
    e.path for e in get_event_log_files(EVENTLOG_DIR)
    if 'bpic' in e.name and e.p == 0.0
]

combinations = list(itertools.product(event_log_paths, ps))
for event_log_path, p in tqdm(combinations, desc='Add anomalies'):
    event_log_file = EventLogFile(event_log_path)
    event_log = EventLog.from_json(event_log_path)

    anomalies = [
        ReplaceAnomaly(max_replacements=1),
        SkipSequenceAnomaly(max_sequence_size=2),
        ReworkAnomaly(max_distance=5, max_sequence_size=3),
        EarlyAnomaly(max_distance=5, max_sequence_size=2),
        LateAnomaly(max_distance=5, max_sequence_size=2),
        InsertAnomaly(max_inserts=2)
Beispiel #3
0
        Model(creation_date=end_time.datetime,
              algorithm=ad.name,
              training_duration=training_time,
              file_name=model_file.file,
              training_event_log_id=EventLog.get_id_by_name(dataset_name),
              training_host=socket.gethostname(),
              hyperparameters=str(dict(**ad_kwargs, **fit_kwargs))))
    session.commit()
    session.close()

    if isinstance(ad, NNAnomalyDetector):
        from keras.backend import clear_session
        clear_session()


datasets = sorted([e.name for e in get_event_log_files() if e.p == 0.01])
print(datasets)
ads = [
    dict(ad=OneClassSVM),
    dict(ad=NaiveAnomalyDetector),
    dict(ad=SamplingAnomalyDetector),
    dict(ad=DAE, fit_kwargs=dict(epochs=50, batch_size=500)),
    dict(ad=BINetv1, fit_kwargs=dict(epochs=20, batch_size=500))
]
for ad in ads:
    [fit_and_save(d, **ad) for d in tqdm(datasets, desc=ad['ad'].name)]

#######################################################################################################

import itertools
from multiprocessing.pool import Pool
Beispiel #4
0
json_files = [
    'largelog-0.0-0.json.gz',
    'smalllog-0.0-0.json.gz'
]

for xes_file, json_file in tqdm(list(zip(xes_files, json_files))):
    event_log = EventLog.from_xes(os.path.join(BPIC_DIR, xes_file))
    event_log.save_json(os.path.join(EVENTLOG_DIR, json_file))
    

#Add anomalies

for k in range(0,10):
    np.random.seed(k) # This will ensure reproducibility
    ps = [0.3]
    event_log_paths = [e.path for e in get_event_log_files(EVENTLOG_DIR) if 'log' in e.name and e.p == 0.0]

    combinations = list(itertools.product(event_log_paths, ps))
    for event_log_path, p in tqdm(combinations, desc='Add anomalies'):
        event_log_file = EventLogFile(event_log_path)
        event_log = EventLog.from_json(event_log_path)

        anomalies = [
            ReplaceAnomaly(max_replacements=1)
        ]
        
        for anomaly in anomalies:
            # This is necessary to initialize the likelihood graph correctly
            anomaly.activities = event_log.unique_activities
            #anomaly.attributes = [CategoricalAttributeGenerator(name=name, values=values) for name, values in
           #                       event_log.unique_attribute_values.items() if name != 'name']        
Beispiel #5
0
process_models = [
    m for m in get_process_model_files()
    if 'testing' not in m and 'paper' not in m
]
for process_model in tqdm(process_models, desc='Generate'):
    generate_for_process_model(process_model,
                               size=5000,
                               anomalies=anomalies,
                               num_attr=[1, 2, 3, 4],
                               seed=1337)

## Add Anomalies
np.random.seed(0)  # This will ensure reproducibility
ps = [0.3]
event_log_paths = [
    e.path for e in get_event_log_files(EVENTLOG_DIR)
    if 'bpic' in e.name and e.p == 0.0
]

combinations = list(itertools.product(event_log_paths, ps))
for event_log_path, p in tqdm(combinations, desc='Add anomalies'):
    event_log_file = EventLogFile(event_log_path)
    event_log = april.processmining.log.EventLog.from_json(event_log_path)

    anomalies = [
        SkipSequenceAnomaly(max_sequence_size=2),
        ReworkAnomaly(max_distance=5, max_sequence_size=3),
        EarlyAnomaly(max_distance=5, max_sequence_size=2),
        LateAnomaly(max_distance=5, max_sequence_size=2),
        InsertAnomaly(max_inserts=2),
    ]