Ejemplo n.º 1
0
    def load(self, dataset_name):
        """
        Load dataset from disk. If there exists a cached file, load from cache. If no cache file exists, load from
        Event Log and cache it.

        :param dataset_name:
        :return:
        """
        el_file = EventLogFile(dataset_name)
        self.dataset_name = el_file.name

        # Check for cache
        if el_file.cache_file.exists():
            self._load_dataset_from_cache(el_file.cache_file)

        # Else generate from event log
        elif el_file.path.exists():
            self._event_log = EventLog.load(el_file.path)
            self.from_event_log(self._event_log)
            self._cache_dataset(el_file.cache_file)
Ejemplo n.º 2
0
    'bpic15-0.0-5.json.gz',
    'bpic12-0.0-0.json.gz',
    'bpic17-0.0-1.json.gz',
    'bpic17-0.0-2.json.gz'
]

np.random.seed(0)  # This will ensure reproducibility
ps = [0.3]
event_log_paths = [
    e.path for e in get_event_log_files(EVENTLOG_DIR)
    if 'bpic' in e.name and e.p == 0.0
]

combinations = list(itertools.product(event_log_paths, ps))
for event_log_path, p in tqdm(combinations, desc='Add anomalies'):
    event_log_file = EventLogFile(event_log_path)
    event_log = EventLog.from_json(event_log_path)

    anomalies = [
        ReplaceAnomaly(max_replacements=1),
        SkipSequenceAnomaly(max_sequence_size=2),
        ReworkAnomaly(max_distance=5, max_sequence_size=3),
        EarlyAnomaly(max_distance=5, max_sequence_size=2),
        LateAnomaly(max_distance=5, max_sequence_size=2),
        InsertAnomaly(max_inserts=2)
    ]

    #     if event_log.num_event_attributes > 0:
    #         anomalies.append(AttributeAnomaly(max_events=3, max_attributes=min(2, event_log.num_activities)))

    for anomaly in anomalies: