if row.transition == 'COMPLETE': if start == True: data[-1] = row.CompleteTimestamp else: data = list() data += [ temp_caseid, row.Activity, row.Resource, '', row.Amount, row.CompleteTimestamp ] start = False table.append(data) headers = [ 'CASE_ID', 'Activity', 'Resource', 'StartTimestamp', 'Amount', 'CompleteTimestamp' ] df = pd.DataFrame(table, columns=headers) eventlog = Eventlog(df) eventlog = eventlog.assign_timestamp(name='StartTimestamp', new_name='StartTimestamp', _format='%Y.%m.%d %H:%M:%S', errors='raise') eventlog = eventlog.assign_timestamp(name='CompleteTimestamp', new_name='CompleteTimestamp', _format='%Y/%m/%d %H:%M:%S', errors='raise') eventlog['Duration'] = (eventlog['CompleteTimestamp'] - eventlog['StartTimestamp']).apply(to_minute) eventlog.dropna(subset=['Resource', 'StartTimestamp', 'CompleteTimestamp'], inplace=True) eventlog.to_csv('../sample_data/BPIC2012.csv')
from PyProM.src.data.Eventlog import Eventlog import pandas as pd class Preprocessor(object): def load_eventlog(self, path, case, activity, timestamp, encoding=None, clear=True): eventlog = pd.read_csv(path, sep=',', engine='python', encoding=encoding) eventlog.sort_values([case, timestamp], inplace=True) eventlog.reset_index(drop=True, inplace=True) eventlog = Eventlog(eventlog) eventlog = eventlog.assign_caseid(case) eventlog = eventlog.assign_activity(activity) eventlog = eventlog.assign_timestamp(timestamp) if clear == True: eventlog = eventlog.clear_columns() return eventlog