def extract_sequence(self, path="./sample_data/testlog1_no_noise.csv"): eventlog = Eventlog.from_txt(path, sep=',') eventlog = eventlog.assign_caseid('Case ID') eventlog = eventlog.assign_activity('Activity') trace = eventlog.get_event_trace(workers=4, value='Activity') trace = trace.values() trace = ['_'.join(x) for x in trace] trace_count=collections.Counter(trace) test_trace_count = trace_count return trace_count
def preparation(path="../result/generatedlog1.csv"): eventlog = Eventlog.from_txt(path, sep=',') eventlog = eventlog.assign_caseid('CaseID') eventlog = eventlog.assign_activity('Activity') eventlog = eventlog.assign_resource('Resource') #eventlog = eventlog.assign_timestamp('Start Timestamp', name='Timestamp', format = '%Y/%m/%d %H:%M:%S') #eventlog = eventlog.clear_columns() eventlog['Duration'] = eventlog['Complete'] - eventlog['Start'] return eventlog
def load_real_data(self,path): eventlog = Eventlog.from_txt(path, sep=',') eventlog = eventlog.assign_caseid('CASE_ID') eventlog = eventlog.assign_activity('Activity') eventlog['Resource'] = eventlog['Resource'].astype(int) eventlog = eventlog.assign_resource('Resource') eventlog = eventlog.assign_timestamp(name='StartTimestamp', new_name='StartTimestamp', _format = '%Y.%m.%d %H:%M:%S', errors='raise') def to_minute(x): t = x.time() minutes = t.hour * 60 + t.minute return minutes eventlog['Start'] = eventlog['StartTimestamp'].apply(to_minute) return eventlog
from PyProM.src.data.xes_reader import XesReader from PyProM.src.mining.transition_matrix import TransitionMatrix from PyProM.src.mining.dependency_graph import DependencyGraph from PyProM.src.model.fsm import FSM_Miner from PyProM.src.visualization.svg_widget import Visualization from PyQt5 import QtSvg, QtCore, QtGui, Qt, QtWidgets import multiprocessing if __name__ == '__main__': #eventlog eventlog = Eventlog.from_txt( '/Users/GYUNAM/Documents/example/repairExample.txt') eventlog = eventlog.assign_caseid('Case ID') eventlog = eventlog.assign_activity('Activity') eventlog = eventlog.assign_resource('Resource') eventlog = eventlog.assign_timestamp('Complete Timestamp') eventlog = eventlog.clear_columns() #preprocessing #Transition Matrix TM = TransitionMatrix() transition_matrix = TM.get_transition_matrix(eventlog, 4, type='sequence', horizon=2) transition_matrix = TM.annotate_transition_matrix(eventlog, 4,
def load_data(self,path): eventlog = Eventlog.from_txt(path, sep=',') eventlog = eventlog.assign_caseid('CASE_ID') eventlog = eventlog.assign_activity('Activity') eventlog = eventlog.assign_resource('Resource') return eventlog
def remove_micro_seconds(x): if len(x) > 19: x = x[:19] return x def to_minute(x): if np.isnan(x.seconds): return x #return int(x.seconds/60) return math.ceil(x.seconds / 60) if __name__ == '__main__': path = '../sample_data/BPI_Challenge_2012.csv' eventlog = Eventlog.from_txt(path, sep=',') eventlog = eventlog.assign_caseid('Case ID') eventlog = eventlog.assign_activity('Activity') #eventlog = eventlog.assign_resource('Resource') eventlog['transition'] = eventlog['lifecycle:transition'] eventlog['CompleteTimestamp'] = eventlog['Complete Timestamp'] eventlog['CompleteTimestamp'] = eventlog['CompleteTimestamp'].apply( remove_micro_seconds) eventlog['CompleteTimestamp'] = eventlog['CompleteTimestamp'].str.replace( '.', '/', regex=False) eventlog['Amount'] = eventlog['(case) AMOUNT_REQ'] #eventlog = eventlog.assign_timestamp('Start Timestamp', name='Timestamp', format = '%Y/%m/%d %H:%M:%S') eventlog = eventlog.loc[ (eventlog['Activity'].str.contains('W_', regex=False)) & ~(eventlog['Activity'].str.contains('SCHEDULE'))] caseid = ''