예제 #1
0
def createFiles(logFile, kOptions):
    log = xes_factory.apply(logFile)
    dataVectors, seq = preprocess.dataPreprocess2012(log)
    pairWiseData = outlierPairWise.preprocess(log)
    #if needed to calculate time to create tree
    timeTreeStart = time.time()
    tree = outlierPairWise.createRtree(pairWiseData)  #returns values orderd
    timeTreeEnd = time.time()

    with open("tests/data/timeDistancesEvent.txt", "w") as f:
        for k in kOptions:
            pairs, executionTime = outlierDistanceActivities.main(
                dataVectors, seq, k)
            f.write(str(k) + "," + str(executionTime) + "\n")

    with open('tests/data/timeDistancesRtree.txt', 'w') as f:
        for k in kOptions:
            startPairWist = time.time()
            scores = outlierPairWise.outlierScore(k, tree, pairWiseData)
            scoreTimeEnd = time.time()
            outliers = []
            for s in scores:
                pairData = pairWiseData[s[0]]
                outliers.append(pairData + [s[1]])
            f.write(str(k) + "," + str(scoreTimeEnd - startPairWist) + "\n")
예제 #2
0
from algorithms import outlierDistanceActivities, preprocess, outlierPairsDistribution
import algorithms.outlierDistanceActivities as activities
import algorithms.outlierPairsDistribution as distribution
kOptions=[250,500,750,1000,1250,1500,1750,2000]
thresholds=[0.0025,0.005,0.0075,0.01,0.0125,0.0150,0.0175,0.02]

logFiles=["../BPI_Challenge_2012.xes","BPI Challenge 2017.xes"]
from pm4py.objects.log.importer.xes import factory as xes_factory
for logFile in logFiles:
    print("importing log")
    log = xes_factory.apply(logFile)
    # [trace,activity index,time]
    print("preprocess ...")
    if logFile=="../BPI_Challenge_2012.xes":
        dataVectors, seq = preprocess.dataPreprocess2012(log)
        identifier="2012"
    else:
         dataVectors, seq = preprocess.dataPreprocess2017(log)
         identifier="2017"
    with open("tests/data/events-{}-distance.txt".format(identifier),"w") as f:
        for k in kOptions:
            pairs,time=outlierDistanceActivities.main(dataVectors,seq,k)
            f.write(str(k)+","+str(time)+"\n")
    with open("tests/data/events-{}-distribution.txt".format(identifier),"w") as f:
        for threshold in thresholds:
            pairs,time=outlierPairsDistribution.main(log,dataVectors,seq,threshold)
            f.write(str(threshold)+","+str(time)+"\n")
            os.remove("distributions.txt")

#plot the times
예제 #3
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from algorithms.outlierPairsCurveFitting import getDistributionsFitting
from pm4py.algo.filtering.log.attributes import attributes_filter as log_attributes_filter
from pm4py.objects.log.importer.xes import factory as xes_factory
from algorithms import preprocess
from statistics import mean
import numpy as np
import warnings, scipy
from sklearn.preprocessing import StandardScaler
#time: per activity [30] => [trace index, activity index, time in seconds]
#sequence: per trace [10.000]=> [[activity index, time]]
logFile = "../BPM Temporal Anomalies/scala_trace_outlier/input/outliers_30_activities_10k_0.005.xes"
logFileResults = "../BPM Temporal Anomalies/scala_trace_outlier/input/results_30_activities_10k_0.005_description"
log = xes_factory.apply(logFile)
time, sequence = preprocess.dataPreprocess2012(log)

times = [[i[2] for i in x] for x in time]
means = [mean(i) for i in times]
distributionsDF = getDistributionsFitting(times, log)

#get the distributions in a array
thresholds = [0.020, 0.01, 0.0075, 0.005, 0.0025, 0.001]
for threshold in thresholds:
    warnings.filterwarnings("ignore")
    distributions = []
    for index in range(len(distributionsDF)):
        if distributionsDF.iloc[index]["R2"] >= 0.9:
            dist = getattr(scipy.stats,
                           distributionsDF.iloc[index]["Distribution"])
            param = dist.fit(times[index])