def extract_sequence(self, path="./sample_data/testlog1_no_noise.csv"):
		eventlog = Eventlog.from_txt(path, sep=',')
		eventlog = eventlog.assign_caseid('Case ID')
		eventlog = eventlog.assign_activity('Activity')
		trace = eventlog.get_event_trace(workers=4, value='Activity')
		trace = trace.values()
		trace = ['_'.join(x) for x in trace]
		trace_count=collections.Counter(trace)
		test_trace_count = trace_count
		return trace_count
def preparation(path="../result/generatedlog1.csv"):
    eventlog = Eventlog.from_txt(path, sep=',')
    eventlog = eventlog.assign_caseid('CaseID')
    eventlog = eventlog.assign_activity('Activity')
    eventlog = eventlog.assign_resource('Resource')
    #eventlog = eventlog.assign_timestamp('Start Timestamp', name='Timestamp', format = '%Y/%m/%d %H:%M:%S')
    #eventlog = eventlog.clear_columns()

    eventlog['Duration'] = eventlog['Complete'] - eventlog['Start']
    return eventlog
	def prod_eventlog(self, start_point, columns=['CASE_ID', 'Activity', 'Resource', 'Start', 'Complete']):
		eventlog = pd.DataFrame.from_records(self.eventlist, columns=columns)
		eventlog['StartTimestamp']=start_point
		eventlog['CompleteTimestamp']=start_point
		eventlog['StartTimestamp'] = pd.to_datetime(eventlog['StartTimestamp'], format = '%Y-%m-%d %H:%M:%S', errors='ignore')
		eventlog['StartTimestamp'] += pd.to_timedelta(eventlog['Start'], unit='m')
		eventlog['CompleteTimestamp'] = pd.to_datetime(eventlog['CompleteTimestamp'], format = '%Y-%m-%d %H:%M:%S', errors='ignore')
		eventlog['CompleteTimestamp'] += pd.to_timedelta(eventlog['Complete'], unit='m')
		eventlog.sort_values(['CASE_ID', 'StartTimestamp'], inplace=True)
		eventlog = Eventlog(eventlog)
		return eventlog
	def load_real_data(self,path):
		eventlog = Eventlog.from_txt(path, sep=',')
		eventlog = eventlog.assign_caseid('CASE_ID')
		eventlog = eventlog.assign_activity('Activity')
		eventlog['Resource'] = eventlog['Resource'].astype(int)
		eventlog = eventlog.assign_resource('Resource')
		eventlog = eventlog.assign_timestamp(name='StartTimestamp', new_name='StartTimestamp', _format = '%Y.%m.%d %H:%M:%S', errors='raise')

		def to_minute(x):
			t = x.time()
			minutes = t.hour * 60 + t.minute
			return minutes

		eventlog['Start'] = eventlog['StartTimestamp'].apply(to_minute)
		return eventlog
Beispiel #5
0
from PyProM.src.data.xes_reader import XesReader

from PyProM.src.mining.transition_matrix import TransitionMatrix
from PyProM.src.mining.dependency_graph import DependencyGraph

from PyProM.src.model.fsm import FSM_Miner

from PyProM.src.visualization.svg_widget import Visualization

from PyQt5 import QtSvg, QtCore, QtGui, Qt, QtWidgets

import multiprocessing

if __name__ == '__main__':
    #eventlog
    eventlog = Eventlog.from_txt(
        '/Users/GYUNAM/Documents/example/repairExample.txt')
    eventlog = eventlog.assign_caseid('Case ID')
    eventlog = eventlog.assign_activity('Activity')
    eventlog = eventlog.assign_resource('Resource')
    eventlog = eventlog.assign_timestamp('Complete Timestamp')
    eventlog = eventlog.clear_columns()

    #preprocessing

    #Transition Matrix
    TM = TransitionMatrix()
    transition_matrix = TM.get_transition_matrix(eventlog,
                                                 4,
                                                 type='sequence',
                                                 horizon=2)
    transition_matrix = TM.annotate_transition_matrix(eventlog, 4,
	def load_data(self,path):
		eventlog = Eventlog.from_txt(path, sep=',')
		eventlog = eventlog.assign_caseid('CASE_ID')
		eventlog = eventlog.assign_activity('Activity')
		eventlog = eventlog.assign_resource('Resource')
		return eventlog
Beispiel #7
0
def remove_micro_seconds(x):
    if len(x) > 19:
        x = x[:19]
    return x


def to_minute(x):
    if np.isnan(x.seconds):
        return x
    #return int(x.seconds/60)
    return math.ceil(x.seconds / 60)


if __name__ == '__main__':
    path = '../sample_data/BPI_Challenge_2012.csv'
    eventlog = Eventlog.from_txt(path, sep=',')
    eventlog = eventlog.assign_caseid('Case ID')
    eventlog = eventlog.assign_activity('Activity')
    #eventlog = eventlog.assign_resource('Resource')
    eventlog['transition'] = eventlog['lifecycle:transition']
    eventlog['CompleteTimestamp'] = eventlog['Complete Timestamp']
    eventlog['CompleteTimestamp'] = eventlog['CompleteTimestamp'].apply(
        remove_micro_seconds)
    eventlog['CompleteTimestamp'] = eventlog['CompleteTimestamp'].str.replace(
        '.', '/', regex=False)
    eventlog['Amount'] = eventlog['(case) AMOUNT_REQ']
    #eventlog = eventlog.assign_timestamp('Start Timestamp', name='Timestamp', format = '%Y/%m/%d %H:%M:%S')
    eventlog = eventlog.loc[
        (eventlog['Activity'].str.contains('W_', regex=False))
        & ~(eventlog['Activity'].str.contains('SCHEDULE'))]
    caseid = ''
def remove_micro_seconds(x):
    if len(x) > 19:
        x = x[:19]
    return x


def to_minute(x):
    if np.isnan(x.seconds):
        return x
    #return int(x.seconds/60)
    return math.ceil(x.seconds / 60)


if __name__ == '__main__':
    path = './sample_data/BPI_Challenge_2012.csv'
    eventlog = Eventlog.from_txt(path, sep=',')
    eventlog = eventlog.assign_caseid('Case ID')
    eventlog = eventlog.assign_activity('Activity')
    #eventlog = eventlog.assign_resource('Resource')
    eventlog['transition'] = eventlog['lifecycle:transition']
    eventlog['CompleteTimestamp'] = eventlog['Complete Timestamp']
    eventlog['CompleteTimestamp'] = eventlog['CompleteTimestamp'].apply(
        remove_micro_seconds)
    eventlog['CompleteTimestamp'] = eventlog['CompleteTimestamp'].str.replace(
        '.', '/', regex=False)
    eventlog['Amount'] = eventlog['(case) AMOUNT_REQ']
    #eventlog = eventlog.assign_timestamp('Start Timestamp', name='Timestamp', format = '%Y/%m/%d %H:%M:%S')
    eventlog = eventlog.loc[
        (eventlog['Activity'].str.contains('W_', regex=False))
        & ~(eventlog['Activity'].str.contains('SCHEDULE'))]
    caseid = ''
Beispiel #9
0
from PyProM.src.data.Eventlog import Eventlog


def remove_micro_seconds(x):
    if len(x) > 19:
        x = x[:19]
    return x


def to_minute(x):
    if np.isnan(x.seconds):
        return x
    #return int(x.seconds/60)
    return math.ceil(x.seconds / 60)


if __name__ == '__main__':
    path = '../sample_data/BPIC15_1_unfiltered.csv'
    eventlog = Eventlog.from_txt(path, sep=',')
    eventlog = eventlog.assign_caseid('CASEOID')
    eventlog = eventlog.assign_activity('ACTIVITYOID')
    eventlog = eventlog.assign_timestamp('ENDAT')
    #eventlog = eventlog.assign_resource('Resource')
    #eventlog['transition'] = eventlog['lifecycle:transition']
    import datetime
    date_after = datetime.date(2011, 1, 1)
    date_before = datetime.date(2014, 12, 31)
    eventlog = eventlog.loc[(eventlog['TIMESTAMP'] > date_after)
                            & (eventlog['TIMESTAMP'] < date_before)]
    print(eventlog)
    eventlog.to_csv('../sample_data/BPIC15_1.csv')
Beispiel #10
0
from PyProM.src.data.Eventlog import Eventlog
import pandas as pd


class Preprocessor(object):
    def load_eventlog(self,
                      path,
                      case,
                      activity,
                      timestamp,
                      encoding=None,
                      clear=True):
        eventlog = pd.read_csv(path,
                               sep=',',
                               engine='python',
                               encoding=encoding)
        eventlog.sort_values([case, timestamp], inplace=True)
        eventlog.reset_index(drop=True, inplace=True)
        eventlog = Eventlog(eventlog)
        eventlog = eventlog.assign_caseid(case)
        eventlog = eventlog.assign_activity(activity)
        eventlog = eventlog.assign_timestamp(timestamp)
        if clear == True:
            eventlog = eventlog.clear_columns()
        return eventlog