Example #1
0
 def test_13(self):
     from pm4py.algo.filtering.pandas.timestamp import timestamp_filter
     dataframe = self.load_running_example_df()
     df_timest_events = timestamp_filter.apply_events(dataframe, "2011-03-09 00:00:00", "2012-01-18 23:59:59",
                                                      parameters={
                                                          timestamp_filter.Parameters.CASE_ID_KEY: "case:concept:name",
                                                          timestamp_filter.Parameters.TIMESTAMP_KEY: "time:timestamp"})
Example #2
0
def social_position(
        log: EventLog,
        t1_0: Union[datetime, str],
        t2_0: Union[datetime, str],
        r: str,
        parameters: Optional[Dict[Union[str, Parameters],
                                  Any]] = None) -> float:
    """
    The fraction of resources involved in the same cases with a given resource during a given time slot with
    respect to the total number of resources active during the time slot.

    Metric RBI 5.2 in Pika, Anastasiia, et al.
    "Mining resource profiles from event logs." ACM Transactions on Management Information Systems (TMIS) 8.1 (2017): 1-30.

    Parameters
    -----------------
    df
        Dataframe
    t1_0
        Left interval
    t2_0
        Right interval
    r
        Resource

    Returns
    ----------------
    metric
        Value of the metric
    """
    if parameters is None:
        parameters = {}

    resource_key = exec_utils.get_param_value(
        Parameters.RESOURCE_KEY, parameters,
        xes_constants.DEFAULT_RESOURCE_KEY)

    from pm4py.algo.filtering.log.timestamp import timestamp_filter
    log = timestamp_filter.apply_events(log, t1_0, t2_0, parameters=parameters)

    from pm4py.algo.filtering.log.attributes import attributes_filter
    parameters_filter = {
        attributes_filter.Parameters.ATTRIBUTE_KEY: resource_key
    }

    filtered_log = attributes_filter.apply(log, [r],
                                           parameters=parameters_filter)

    q1 = float(len(filtered_log))
    q2 = float(len(log))

    return q1 / q2 if q2 > 0 else 0.0
Example #3
0
def subdivide_log(path,start,end,windowsize):
    '''Subidivides a log for the given time interval and the fiven sublog size
    args:
        path: path to the log
        start_str: String of the starting date
        end_str: String of the end date
        start: Datetime of the starting date
        end: Datetime of the end date
        windowsize: Windowsize/sublogsize in days
    returns:
        list of sublogs
    
    '''
    log = xes_importer.apply(path)
    logs = []
    #Calculate the number of windows
    n = (end - start).days//(windowsize)
    for i in range(0,n):
        logs.append(timestamp_filter.apply_events(log, (start+ timedelta(days=i*windowsize)).strftime("%Y-%m-%d %H:%M:%S"), (start+ timedelta(days=(i+1)*windowsize)).strftime("%Y-%m-%d %H:%M:%S")))
    return logs
Example #4
0
def apply(log, filter, parameters=None):
    """
    Apply a timestamp filter to the log

    Parameters
    ------------
    log
        log where the filter should be applied
    filter
        Filter (two timestamps separated by @@@)
    parameters
        Parameters of the algorithm
    """
    if parameters is None:
        parameters = {}

    dt1 = str(datetime.utcfromtimestamp(int(filter[1].split("@@@")[0])))
    dt2 = str(datetime.utcfromtimestamp(int(filter[1].split("@@@")[1])))

    return timestamp_filter.apply_events(log, dt1, dt2, parameters=parameters)
def replay_core(replay_job: Job, training_initial_job: Job) -> list:

    split = replay_job.split
    log = get_log(split.train_log)
    requests_list = list()

    eventlog = EventLog()
    for key in log.attributes.keys():
        eventlog.attributes[key] = log.attributes[key]
    for trace in log:
        new_trace = Trace(trace)
        for key in trace.attributes:
            new_trace.attributes[key] = trace.attributes[key]
        eventlog.append(new_trace)

    times = sorted(
        set([event['time:timestamp'] for trace in eventlog
             for event in trace]))

    for t in times[2:]:
        filtered_eventlog = timestamp_filter.apply_events(
            eventlog, times[0].replace(tzinfo=None), t.replace(tzinfo=None))

        try:  #TODO check logger usage
            logger.info("Sending request for replay_prediction task.")
            r = requests.post(
                url="http://server:8000/runtime/replay_prediction/",
                data=export_log_as_string(filtered_eventlog),
                params={
                    'jobId': replay_job.id,
                    'training_job': training_initial_job.id
                },
                headers={
                    'Content-Type': 'text/plain',
                    'charset': 'UTF-8'
                })
            requests_list.append(str(r))
        except Exception as e:
            requests_list.append(str(e))
            logger.warning(str(e))
    return requests_list
Example #6
0
 def test_12(self):
     from pm4py.algo.filtering.log.timestamp import timestamp_filter
     log = self.load_running_example_xes()
     filtered_log_events = timestamp_filter.apply_events(log, "2011-03-09 00:00:00", "2012-01-18 23:59:59")
Example #7
0
log_path = os.path.join("E:\data", "pm", "receipt.xes")
log_path
log = xes_importer.import_log(log_path)

from pm4py.objects.log.util import sorting
log = sorting.sort_timestamp(log)

#Filtering on timeframe
from pm4py.algo.filtering.log.timestamp import timestamp_filter
filtered_log = timestamp_filter.filter_traces_contained(
    log, "2011-03-09 00:00:00", "2012-01-18 23:59:59")
print(len(log))
print(len(filtered_log))

from pm4py.algo.filtering.log.timestamp import timestamp_filter
filtered_log_events = timestamp_filter.apply_events(log, "2011-03-09 00:00:00",
                                                    "2012-01-18 23:59:59")

print(sum([len(trace) for trace in filtered_log]))
print(sum([len(trace) for trace in filtered_log_events]))

#case performance
from pm4py.algo.filtering.log.cases import case_filter
filtered_log = case_filter.filter_on_case_performance(log, 86400, 864000)
filtered_log
print(len(filtered_log))

#start Activities
from pm4py.algo.filtering.log.start_activities import start_activities_filter

log_start = start_activities_filter.get_start_activities(log)
log_start
Example #8
0
def replay_core(replay_job: Job, training_initial_job: Job) -> list:
    """The function create a set with timestamps of events, then create a list of requests
        simulating the log in the time passing

        :param replay_job: job dictionary
        :param training_initial_job: job dictionary
        :return: List of requests
    """

    split = replay_job.split
    log = get_log(split.train_log)
    requests_list = list()

    eventlog = EventLog()
    for key in log.attributes.keys():
        eventlog.attributes[key] = log.attributes[key]
    for trace in log:
        new_trace = Trace(trace)
        for key in trace.attributes:
            new_trace.attributes[key] = trace.attributes[key]
        eventlog.append(new_trace)

    times = sorted(
        set([event['time:timestamp'] for trace in eventlog
             for event in trace]))

    for t in times[2::int((len(times) - 2) / 5)]:
        filtered_eventlog = timestamp_filter.apply_events(
            eventlog, times[0].replace(tzinfo=None), t.replace(tzinfo=None))
        trace_list = list()
        event_number = dict()
        for trace in filtered_eventlog:
            trace_list.append(trace.attributes['concept:name'])
            event_number[trace.attributes['concept:name']] = len(trace)
        replay_job.case_id = trace_list
        replay_job.event_number = event_number
        replay_job.save()
        try:  #TODO check logger usage
            logger.info("Sending request for replay_prediction task.")
            r = requests.post(
                url="http://server:8000/runtime/replay_prediction/",
                data=export_log_as_string(filtered_eventlog),
                params={
                    'jobId': replay_job.id,
                    'training_job': training_initial_job.id
                },
                headers={
                    'Content-Type': 'text/plain',
                    'charset': 'UTF-8'
                })
            requests_list.append(str(r))
        except Exception as e:
            requests_list.append(str(e))
            logger.warning(str(e))

    training_log, test_log, additional_columns = get_train_test_log(
        replay_job.split)
    training_df, _ = encode_label_logs(training_log,
                                       test_log,
                                       replay_job,
                                       additional_columns=additional_columns)

    gold_values = dict(zip(training_df['trace_id'], training_df['label']))
    parent_id = replay_job.id
    # final_job = duplicate_orm_row(replay_job)  #todo: replace with simple CREATE
    final_job = Job.objects.create(
        created_date=replay_job.created_date,
        modified_date=replay_job.modified_date,
        error=replay_job.error,
        status=replay_job.status,
        type=replay_job.type,
        create_models=replay_job.create_models,
        case_id=replay_job.case_id,
        event_number=replay_job.event_number,
        gold_value=replay_job.gold_value,
        results=replay_job.results,
        parent_job=replay_job.parent_job,
        split=replay_job.split,
        encoding=replay_job.encoding,
        labelling=replay_job.labelling,
        clustering=replay_job.clustering,
        predictive_model=replay_job.predictive_model,
        evaluation=replay_job.evaluation,
        hyperparameter_optimizer=replay_job.hyperparameter_optimizer,
        incremental_train=replay_job.incremental_train)
    final_job.parent_job = Job.objects.filter(pk=parent_id)[0]
    final_job.gold_value = gold_values
    final_job.type = JobTypes.REPLAY_PREDICT.value
    final_job.save()
    return requests_list