def dataframe(self): """ Return pandas DataFrame containing the event log in matrix format. :return: pandas.DataFrame """ start_event = Event(timestamp=None, **dict((a, EventLog.start_symbol) for a in self.event_attribute_keys)) end_event = Event(timestamp=None, **dict((a, EventLog.end_symbol) for a in self.event_attribute_keys)) frames = [] for case_id, case in enumerate(self.cases): if case.id is not None: case_id = case.id for event_pos, event in enumerate([start_event] + case.events + [end_event]): frames.append({ 'case_id': case_id, 'event_position': event_pos, 'name': event.name, 'timestamp': event.timestamp, **dict([ i for i in event.attributes.items() if not i[0].startswith('_') ]) })
def generate_random_event(self): if self.activities is None: raise RuntimeError('activities has not bee set.') event = Event(name=f'Random activity {np.random.randint(1, len(self.activities))}') if self.attributes is not None: event.attributes = dict( (a.name, f'Random {a.name} {np.random.randint(1, len(a.values))}') for a in self.attributes) return event
def generate_random_event_v2(self, act): if self.activities is None: raise RuntimeError('activities has not bee set.') actset = [x for x in self.activities if x not in act] event = Event(name=f'{np.random.choice(actset)}') if self.attributes is not None: event.attributes = dict( (a.name, f'Random {a.name} {np.random.randint(1, 3)}') for a in self.attributes) return event
def from_sql(server, database, resource, password, schema='pm'): import pyodbc conn = pyodbc.connect(f'DRIVER={{ODBC Driver 17 for SQL Server}};' f'SERVER={{{server}}};' f'DATABASE={{{database}}};' f'UID={{{resource}}};' f'PWD={{{password}}}') cursor = conn.cursor() cursor.execute( f'SELECT CaseId, ActivityName, Resource, Timestamp FROM {schema}.EventLog ORDER BY CaseId, Timestamp, SortKey, ActivityName' ) event_log = EventLog() case = None current_case_id = None for row in cursor.fetchall(): case_id = row[0] timestamp = '' if row[3] is None else row[3].strftime( '"%Y-%m-%d %H:%M:%S"') name = row[1] resource = row[2] if case_id != current_case_id: case = Case(id=case_id) event_log.add_case(case) current_case_id = case_id case.add_event( Event(name=name, timestamp=timestamp, resource=resource))
def from_csv(file_path): """ Load an event log from a CSV file :param file_path: path to CSV file :return: EventLog object """ # parse file as pandas dataframe df = pd.read_csv(file_path) # create event log event_log = EventLog() # iterate by distinct case id for case_id in np.unique(df['case_id']): _case = Case(id=case_id) # iterate over rows per case for index, row in df[df.case_id == case_id].iterrows(): start_time = row['start_time'] end_time = row['end_time'] event_name = row['event'] user = row['user'] _event = Event(name=event_name, timestamp=start_time, end_time=end_time, user=user) _case.add_event(_event) event_log.add_case(_case) return event_log
class ReworkAnomaly(Anomaly): """Insert 1 sequence of n events coming from the case later in the case.""" def __init__(self, max_sequence_size=2, max_distance=0): self.max_sequence_size = max_sequence_size self.max_distance = max_distance super(ReworkAnomaly, self).__init__() def apply_to_case(self, case): if len(case) <= 1: return NoneAnomaly().apply_to_case(case) size = np.random.randint(2, min(len(case), self.max_sequence_size) + 1) start = np.random.randint(0, len(case) - size + 1) distance = np.random.randint( 0, min(len(case) - (start + size), self.max_distance) + 1) t = case.events dupe_sequence = [Event.clone(e) for e in t[start:start + size]] inserted = [e.json for e in dupe_sequence] anomalous_trace = t[:start + size + distance] + dupe_sequence + t[start + size + distance:] case.events = anomalous_trace case.attributes['label'] = dict(anomaly=str(self), attr=dict(size=int(size), start=int(start + size + distance), inserted=inserted)) return case
def from_dict(log): event_log = EventLog(**log['attributes']) for case in log['cases']: _case = Case(id=case['id'], **case['attributes']) for e in case['events']: event = Event(name=e['name'], timestamp=e['timestamp'], **e['attributes']) _case.add_event(event) event_log.add_case(_case) return event_log
def path_to_case(self, p, label=None): """ Converts a given path to a case by traversing the graph and returning a case. :param p: path of node identifiers :param label: is used to label the case :return: a case """ g = self.graph case = Case(label=label) for i in range(0, len(p), self.event_len): event = Event(name=g.node[p[i]]['value']) for j in range(1, self.event_len): att = g.node[p[i + j]]['name'] value = g.node[p[i + j]]['value'] event.attributes[att] = value case.add_event(event) return case
def parse_event(event): attr = dict( (attr.attrib['key'], attr.attrib['value']) for attr in event) timestamp = None # if 'time:timestamp' in global_attr['event'].keys(): if 'time:timestamp' in attr: timestamp = attr['time:timestamp'] name = '' if len(classifiers) > 0: keys = classifiers[0]['keys'] check_keys = [key for key in keys if key not in attr] if len(check_keys) > 0: print( f'Classifier key(s) {", ".join(check_keys)} could not be found in event.' ) return None values = [attr[key] for key in keys] name = '+'.join(values) return Event(name=name, timestamp=timestamp, **attr)
def from_json(file_path): """ Parse event log from JSON. JSON can be gzipped :param file_path: path to json file :return: """ if not isinstance(file_path, str): file_path = str(file_path) if file_path.endswith('gz'): import gzip open = gzip.open # Read the file with open(file_path, 'rb') as f: log = json.loads(f.read().decode('utf-8')) event_log = EventLog(**log['attributes']) # Compatibility: Check for traces in log if 'traces' in log: case_key = 'traces' else: case_key = 'cases' for case in log[case_key]: _case = Case(id=case['id'], **case['attributes']) for e in case['events']: event = Event(name=e['name'], timestamp=e['timestamp'], **e['attributes']) _case.add_event(event) event_log.add_case(_case) return event_log
def clone(trace): events = [Event.clone(event) for event in trace.events] return Case(id=trace.id, events=events, **dict(trace.attributes))