def extract_sequence(self, path="./sample_data/testlog1_no_noise.csv"): eventlog = Eventlog.from_txt(path, sep=',') eventlog = eventlog.assign_caseid('Case ID') eventlog = eventlog.assign_activity('Activity') trace = eventlog.get_event_trace(workers=4, value='Activity') trace = trace.values() trace = ['_'.join(x) for x in trace] trace_count=collections.Counter(trace) test_trace_count = trace_count return trace_count
def preparation(path="../result/generatedlog1.csv"): eventlog = Eventlog.from_txt(path, sep=',') eventlog = eventlog.assign_caseid('CaseID') eventlog = eventlog.assign_activity('Activity') eventlog = eventlog.assign_resource('Resource') #eventlog = eventlog.assign_timestamp('Start Timestamp', name='Timestamp', format = '%Y/%m/%d %H:%M:%S') #eventlog = eventlog.clear_columns() eventlog['Duration'] = eventlog['Complete'] - eventlog['Start'] return eventlog
def prod_eventlog(self, start_point, columns=['CASE_ID', 'Activity', 'Resource', 'Start', 'Complete']): eventlog = pd.DataFrame.from_records(self.eventlist, columns=columns) eventlog['StartTimestamp']=start_point eventlog['CompleteTimestamp']=start_point eventlog['StartTimestamp'] = pd.to_datetime(eventlog['StartTimestamp'], format = '%Y-%m-%d %H:%M:%S', errors='ignore') eventlog['StartTimestamp'] += pd.to_timedelta(eventlog['Start'], unit='m') eventlog['CompleteTimestamp'] = pd.to_datetime(eventlog['CompleteTimestamp'], format = '%Y-%m-%d %H:%M:%S', errors='ignore') eventlog['CompleteTimestamp'] += pd.to_timedelta(eventlog['Complete'], unit='m') eventlog.sort_values(['CASE_ID', 'StartTimestamp'], inplace=True) eventlog = Eventlog(eventlog) return eventlog
def load_real_data(self,path): eventlog = Eventlog.from_txt(path, sep=',') eventlog = eventlog.assign_caseid('CASE_ID') eventlog = eventlog.assign_activity('Activity') eventlog['Resource'] = eventlog['Resource'].astype(int) eventlog = eventlog.assign_resource('Resource') eventlog = eventlog.assign_timestamp(name='StartTimestamp', new_name='StartTimestamp', _format = '%Y.%m.%d %H:%M:%S', errors='raise') def to_minute(x): t = x.time() minutes = t.hour * 60 + t.minute return minutes eventlog['Start'] = eventlog['StartTimestamp'].apply(to_minute) return eventlog
from PyProM.src.data.xes_reader import XesReader from PyProM.src.mining.transition_matrix import TransitionMatrix from PyProM.src.mining.dependency_graph import DependencyGraph from PyProM.src.model.fsm import FSM_Miner from PyProM.src.visualization.svg_widget import Visualization from PyQt5 import QtSvg, QtCore, QtGui, Qt, QtWidgets import multiprocessing if __name__ == '__main__': #eventlog eventlog = Eventlog.from_txt( '/Users/GYUNAM/Documents/example/repairExample.txt') eventlog = eventlog.assign_caseid('Case ID') eventlog = eventlog.assign_activity('Activity') eventlog = eventlog.assign_resource('Resource') eventlog = eventlog.assign_timestamp('Complete Timestamp') eventlog = eventlog.clear_columns() #preprocessing #Transition Matrix TM = TransitionMatrix() transition_matrix = TM.get_transition_matrix(eventlog, 4, type='sequence', horizon=2) transition_matrix = TM.annotate_transition_matrix(eventlog, 4,
def load_data(self,path): eventlog = Eventlog.from_txt(path, sep=',') eventlog = eventlog.assign_caseid('CASE_ID') eventlog = eventlog.assign_activity('Activity') eventlog = eventlog.assign_resource('Resource') return eventlog
def remove_micro_seconds(x): if len(x) > 19: x = x[:19] return x def to_minute(x): if np.isnan(x.seconds): return x #return int(x.seconds/60) return math.ceil(x.seconds / 60) if __name__ == '__main__': path = '../sample_data/BPI_Challenge_2012.csv' eventlog = Eventlog.from_txt(path, sep=',') eventlog = eventlog.assign_caseid('Case ID') eventlog = eventlog.assign_activity('Activity') #eventlog = eventlog.assign_resource('Resource') eventlog['transition'] = eventlog['lifecycle:transition'] eventlog['CompleteTimestamp'] = eventlog['Complete Timestamp'] eventlog['CompleteTimestamp'] = eventlog['CompleteTimestamp'].apply( remove_micro_seconds) eventlog['CompleteTimestamp'] = eventlog['CompleteTimestamp'].str.replace( '.', '/', regex=False) eventlog['Amount'] = eventlog['(case) AMOUNT_REQ'] #eventlog = eventlog.assign_timestamp('Start Timestamp', name='Timestamp', format = '%Y/%m/%d %H:%M:%S') eventlog = eventlog.loc[ (eventlog['Activity'].str.contains('W_', regex=False)) & ~(eventlog['Activity'].str.contains('SCHEDULE'))] caseid = ''
def remove_micro_seconds(x): if len(x) > 19: x = x[:19] return x def to_minute(x): if np.isnan(x.seconds): return x #return int(x.seconds/60) return math.ceil(x.seconds / 60) if __name__ == '__main__': path = './sample_data/BPI_Challenge_2012.csv' eventlog = Eventlog.from_txt(path, sep=',') eventlog = eventlog.assign_caseid('Case ID') eventlog = eventlog.assign_activity('Activity') #eventlog = eventlog.assign_resource('Resource') eventlog['transition'] = eventlog['lifecycle:transition'] eventlog['CompleteTimestamp'] = eventlog['Complete Timestamp'] eventlog['CompleteTimestamp'] = eventlog['CompleteTimestamp'].apply( remove_micro_seconds) eventlog['CompleteTimestamp'] = eventlog['CompleteTimestamp'].str.replace( '.', '/', regex=False) eventlog['Amount'] = eventlog['(case) AMOUNT_REQ'] #eventlog = eventlog.assign_timestamp('Start Timestamp', name='Timestamp', format = '%Y/%m/%d %H:%M:%S') eventlog = eventlog.loc[ (eventlog['Activity'].str.contains('W_', regex=False)) & ~(eventlog['Activity'].str.contains('SCHEDULE'))] caseid = ''
from PyProM.src.data.Eventlog import Eventlog def remove_micro_seconds(x): if len(x) > 19: x = x[:19] return x def to_minute(x): if np.isnan(x.seconds): return x #return int(x.seconds/60) return math.ceil(x.seconds / 60) if __name__ == '__main__': path = '../sample_data/BPIC15_1_unfiltered.csv' eventlog = Eventlog.from_txt(path, sep=',') eventlog = eventlog.assign_caseid('CASEOID') eventlog = eventlog.assign_activity('ACTIVITYOID') eventlog = eventlog.assign_timestamp('ENDAT') #eventlog = eventlog.assign_resource('Resource') #eventlog['transition'] = eventlog['lifecycle:transition'] import datetime date_after = datetime.date(2011, 1, 1) date_before = datetime.date(2014, 12, 31) eventlog = eventlog.loc[(eventlog['TIMESTAMP'] > date_after) & (eventlog['TIMESTAMP'] < date_before)] print(eventlog) eventlog.to_csv('../sample_data/BPIC15_1.csv')
from PyProM.src.data.Eventlog import Eventlog import pandas as pd class Preprocessor(object): def load_eventlog(self, path, case, activity, timestamp, encoding=None, clear=True): eventlog = pd.read_csv(path, sep=',', engine='python', encoding=encoding) eventlog.sort_values([case, timestamp], inplace=True) eventlog.reset_index(drop=True, inplace=True) eventlog = Eventlog(eventlog) eventlog = eventlog.assign_caseid(case) eventlog = eventlog.assign_activity(activity) eventlog = eventlog.assign_timestamp(timestamp) if clear == True: eventlog = eventlog.clear_columns() return eventlog