Esempio n. 1
0
def apply_events(log, values, parameters=None):
    """
    Filter log by keeping only events with an attribute value that belongs to the provided values list

    Parameters
    -----------
    log
        log
    values
        Allowed attributes
    parameters
        Parameters of the algorithm, including:
            Parameters.ACTIVITY_KEY -> Attribute identifying the activity in the log
            Parameters.POSITIVE -> Indicate if events should be kept/removed

    Returns
    -----------
    filtered_log
        Filtered log
    """
    if parameters is None:
        parameters = {}

    attribute_key = exec_utils.get_param_value(Parameters.ATTRIBUTE_KEY, parameters, DEFAULT_NAME_KEY)
    positive = exec_utils.get_param_value(Parameters.POSITIVE, parameters, True)

    stream = log_converter.apply(log, variant=log_converter.TO_EVENT_STREAM)
    if positive:
        stream = EventStream(list(filter(lambda x: x[attribute_key] in values, stream)))
    else:
        stream = EventStream(list(filter(lambda x: x[attribute_key] not in values, stream)))

    filtered_log = log_converter.apply(stream)

    return filtered_log
Esempio n. 2
0
def sample_stream(event_log, no_events=100):
    """
    Randomly sample a fixed number of events from the original event log

    Parameters
    -----------
    event_log
        Event log
    no_events
        Number of events that the sample should have

    Returns
    -----------
    newLog
        Filtered log
    """
    new_log = EventStream(attributes=event_log.attributes,
                          extensions=event_log.extensions,
                          globals=event_log._omni,
                          classifiers=event_log.classifiers)
    set_events = set()
    for i in range(0, min(no_events, len(event_log._list))):
        set_events.add(random.randrange(0, len(event_log._list)))
    set_events = list(set_events)
    for event in set_events:
        new_log.append(copy(event_log._list[event]))
    return new_log
Esempio n. 3
0
def apply_numeric(log, int1, int2, parameters=None):
    """
    Apply a filter on cases (numerical filter)

    Parameters
    --------------
    log
        Log
    int1
        Lower bound of the interval
    int2
        Upper bound of the interval
    parameters
        Possible parameters of the algorithm

    Returns
    --------------
    filtered_df
        Filtered dataframe
    """
    if parameters is None:
        parameters = {}

    attribute_key = parameters[
        PARAMETER_CONSTANT_ATTRIBUTE_KEY] if PARAMETER_CONSTANT_ATTRIBUTE_KEY in parameters else DEFAULT_NAME_KEY
    case_key = parameters[
        PARAMETER_CONSTANT_CASEID_KEY] if PARAMETER_CONSTANT_CASEID_KEY in parameters else xes.DEFAULT_TRACEID_KEY
    # stream_filter_key is helpful to filter on cases containing an event with an attribute
    # in the specified value set, but such events shall have an activity in particular.
    stream_filter_key1 = parameters["stream_filter_key1"] if "stream_filter_key1" in parameters else None
    stream_filter_value1 = parameters["stream_filter_value1"] if "stream_filter_value1" in parameters else None
    stream_filter_key2 = parameters["stream_filter_key2"] if "stream_filter_key2" in parameters else None
    stream_filter_value2 = parameters["stream_filter_value2"] if "stream_filter_value2" in parameters else None

    positive = parameters["positive"] if "positive" in parameters else True

    stream = log_conv_fact.apply(log, variant=log_conv_fact.TO_EVENT_STREAM)
    if stream_filter_key1 is not None:
        stream = EventStream(
            list(filter(lambda x: stream_filter_key1 in x and x[stream_filter_key1] == stream_filter_value1, stream)))
    if stream_filter_key2 is not None:
        stream = EventStream(
            list(filter(lambda x: stream_filter_key2 in x and x[stream_filter_key2] == stream_filter_value2, stream)))

    if positive:
        stream = EventStream(list(filter(lambda x: attribute_key in x and int1 <= x[attribute_key] <= int2, stream)))
    else:
        stream = EventStream(
            list(filter(lambda x: attribute_key in x and (x[attribute_key] < int1 or x[attribute_key] > int2), stream)))

    all_cases_ids = set(x["case:" + case_key] for x in stream)

    filtered_log = EventLog()

    for case in log:
        if case.attributes[case_key] in all_cases_ids:
            filtered_log.append(case)
Esempio n. 4
0
def apply_numeric(log, int1, int2, parameters=None):
    """
    Apply a filter on cases (numerical filter)

    Parameters
    --------------
    log
        Log
    int1
        Lower bound of the interval
    int2
        Upper bound of the interval
    parameters
        Possible parameters of the algorithm

    Returns
    --------------
    filtered_df
        Filtered dataframe
    """
    if parameters is None:
        parameters = {}

    attribute_key = exec_utils.get_param_value(Parameters.ATTRIBUTE_KEY, parameters, DEFAULT_NAME_KEY)
    case_key = exec_utils.get_param_value(Parameters.CASE_ID_KEY, parameters, xes.DEFAULT_TRACEID_KEY)
    positive = exec_utils.get_param_value(Parameters.POSITIVE, parameters, True)
    # stream_filter_key is helpful to filter on cases containing an event with an attribute
    # in the specified value set, but such events shall have an activity in particular.

    stream_filter_key1 = exec_utils.get_param_value(Parameters.STREAM_FILTER_KEY1, parameters, None)
    stream_filter_value1 = exec_utils.get_param_value(Parameters.STREAM_FILTER_VALUE1, parameters, None)
    stream_filter_key2 = exec_utils.get_param_value(Parameters.STREAM_FILTER_KEY2, parameters, None)
    stream_filter_value2 = exec_utils.get_param_value(Parameters.STREAM_FILTER_VALUE2, parameters, None)

    stream = log_converter.apply(log, variant=log_converter.TO_EVENT_STREAM)
    if stream_filter_key1 is not None:
        stream = EventStream(
            list(filter(lambda x: stream_filter_key1 in x and x[stream_filter_key1] == stream_filter_value1, stream)))
    if stream_filter_key2 is not None:
        stream = EventStream(
            list(filter(lambda x: stream_filter_key2 in x and x[stream_filter_key2] == stream_filter_value2, stream)))

    if positive:
        stream = EventStream(list(filter(lambda x: attribute_key in x and int1 <= x[attribute_key] <= int2, stream)))
    else:
        stream = EventStream(
            list(filter(lambda x: attribute_key in x and (x[attribute_key] < int1 or x[attribute_key] > int2), stream)))

    all_cases_ids = set(x["case:" + case_key] for x in stream)

    filtered_log = EventLog()

    for case in log:
        if case.attributes[case_key] in all_cases_ids:
            filtered_log.append(case)
def apply_numeric(log, int1, int2, parameters=None):
    """
    Apply a filter on cases (numerical filter)

    Parameters
    --------------
    log
        Log
    int1
        Lower bound of the interval
    int2
        Upper bound of the interval
    parameters
        Possible parameters of the algorithm

    Returns
    --------------
    filtered_df
        Filtered dataframe
    """
    if parameters is None:
        parameters = {}

    attribute_key = parameters[
        PARAMETER_CONSTANT_ATTRIBUTE_KEY] if PARAMETER_CONSTANT_ATTRIBUTE_KEY in parameters else DEFAULT_NAME_KEY
    case_key = parameters[
        PARAMETER_CONSTANT_CASEID_KEY] if PARAMETER_CONSTANT_CASEID_KEY in parameters else xes.DEFAULT_TRACEID_KEY

    positive = parameters["positive"] if "positive" in parameters else True

    stream = log_conv_fact.apply(log, variant=log_conv_fact.TO_EVENT_STREAM)
    if positive:
        stream = EventStream(
            list(
                filter(
                    lambda x: attribute_key in x and int1 <= x[attribute_key]
                    <= int2, stream)))
    else:
        stream = EventStream(
            list(
                filter(
                    lambda x: attribute_key in x and
                    (x[attribute_key] < int1 or x[attribute_key] > int2),
                    stream)))

    all_cases_ids = set(x["case:" + case_key] for x in stream)

    filtered_log = EventLog()

    for case in log:
        if case.attributes[case_key] in all_cases_ids:
            filtered_log.append(case)
def apply_events(log, values, parameters=None):
    """
    Filter log by keeping only events with an attribute value that belongs to the provided values list

    Parameters
    -----------
    log
        log
    values
        Allowed attributes
    parameters
        Parameters of the algorithm, including:
            activity_key -> Attribute identifying the activity in the log
            positive -> Indicate if events should be kept/removed

    Returns
    -----------
    filtered_log
        Filtered log
    """
    if parameters is None:
        parameters = {}

    attribute_key = parameters[
        PARAMETER_CONSTANT_ATTRIBUTE_KEY] if PARAMETER_CONSTANT_ATTRIBUTE_KEY in parameters else DEFAULT_NAME_KEY
    positive = parameters["positive"] if "positive" in parameters else True

    stream = log_conv_fact.apply(log, variant=log_conv_fact.TO_EVENT_STREAM)
    if positive:
        stream = EventStream(
            list(filter(lambda x: x[attribute_key] in values, stream)))
    else:
        stream = EventStream(
            list(filter(lambda x: x[attribute_key] not in values, stream)))

    filtered_log = log_conv_fact.apply(stream)

    # filtered_log = EventLog()
    # for trace in log:
    #    new_trace = Trace()

    #    for j in range(len(trace)):
    #        if attribute_key in trace[j]:
    #            attribute_value = trace[j][attribute_key]
    #            if (positive and attribute_value in values) or (not positive and attribute_value not in values):
    #                new_trace.append(trace[j])
    #    if len(new_trace) > 0:
    #        for attr in trace.attributes:
    #            new_trace.attributes[attr] = trace.attributes[attr]
    #        filtered_log.append(new_trace)
    return filtered_log
Esempio n. 7
0
def preprocess_log(log, activities=None, parameters=None):
    """
    Preprocess a log to enable correlation mining

    Parameters
    --------------
    log
        Log object
    activities
        (if provided) list of activities of the log
    parameters
        Parameters of the algorithm

    Returns
    --------------
    transf_stream
        Transformed stream
    activities_grouped
        Grouped activities
    activities
        List of activities of the log
    """
    if parameters is None:
        parameters = {}

    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes_constants.DEFAULT_NAME_KEY)
    timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY, parameters,
                                               xes_constants.DEFAULT_TIMESTAMP_KEY)
    start_timestamp_key = exec_utils.get_param_value(Parameters.START_TIMESTAMP_KEY, parameters,
                                                     xes_constants.DEFAULT_TIMESTAMP_KEY)
    index_key = exec_utils.get_param_value(Parameters.INDEX_KEY, parameters, DEFAULT_INDEX_KEY)

    if type(log) is pd.DataFrame:
        # keep only the two columns before conversion
        log = log[list(set([activity_key, timestamp_key, start_timestamp_key]))]

    log = converter.apply(log, variant=converter.TO_EVENT_STREAM, parameters=parameters)
    transf_stream = EventStream()
    for idx, ev in enumerate(log):
        transf_stream.append(
            Event({activity_key: ev[activity_key], timestamp_key: ev[timestamp_key].timestamp(),
                   start_timestamp_key: ev[start_timestamp_key].timestamp(), index_key: idx}))
    transf_stream = sorted(transf_stream, key=lambda x: (x[start_timestamp_key], x[timestamp_key], x[index_key]))

    if activities is None:
        activities = sorted(list(set(x[activity_key] for x in transf_stream)))

    activities_grouped = {x: [y for y in transf_stream if y[activity_key] == x] for x in activities}

    return transf_stream, activities_grouped, activities
Esempio n. 8
0
def apply_numeric_events(log, int1, int2, parameters=None):
    """
    Apply a filter on events (numerical filter)

    Parameters
    --------------
    log
        Log
    int1
        Lower bound of the interval
    int2
        Upper bound of the interval
    parameters
        Possible parameters of the algorithm:
            Parameters.ATTRIBUTE_KEY => indicates which attribute to filter
            Parameters.POSITIVE => keep or remove traces with such events?

    Returns
    --------------
    filtered_log
        Filtered log_skeleton
    """
    if parameters is None:
        parameters = {}

    attribute_key = exec_utils.get_param_value(Parameters.ATTRIBUTE_KEY,
                                               parameters, DEFAULT_NAME_KEY)
    positive = exec_utils.get_param_value(Parameters.POSITIVE, parameters,
                                          True)

    stream = log_converter.apply(log, variant=log_converter.TO_EVENT_STREAM)
    if positive:
        stream = EventStream(
            list(
                filter(
                    lambda x: attribute_key in x and int1 <= x[attribute_key]
                    <= int2, stream)))
    else:
        stream = EventStream(
            list(
                filter(
                    lambda x: attribute_key in x and
                    (x[attribute_key] < int1 or x[attribute_key] > int2),
                    stream)))

    filtered_log = log_converter.apply(stream)

    return filtered_log
Esempio n. 9
0
def table_to_log(table, parameters=None):
    """
    Converts a Pyarrow table to an event log

    Parameters
    ------------
    table
        Pyarrow table
    parameters
        Possible parameters of the algorithm
    """
    if parameters is None:
        parameters = {}

    dict0 = table.to_pydict()
    keys = list(dict0.keys())
    # for legacy format support
    if LEGACY_PARQUET_CASECONCEPTNAME in keys:
        for key in keys:
            dict0[key.replace(LEGACY_PARQUET_TP_REPLACER,
                              ":")] = dict0.pop(key)

    stream = EventStream([dict(zip(dict0, i)) for i in zip(*dict0.values())])

    return log_conv_factory.apply(stream, parameters=parameters)
Esempio n. 10
0
def apply_events(log, dt1, dt2, parameters=None):
    """
    Get a new log containing all the events contained in the given interval

    Parameters
    -----------
    log
        Log
    dt1
        Lower bound to the interval
    dt2
        Upper bound to the interval
    parameters
        Possible parameters of the algorithm, including:
            timestamp_key -> Attribute to use as timestamp

    Returns
    ------------
    filtered_log
        Filtered log
    """
    if parameters is None:
        parameters = {}
    timestamp_key = parameters[
        PARAMETER_CONSTANT_TIMESTAMP_KEY] if PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else DEFAULT_TIMESTAMP_KEY
    dt1 = get_dt_from_string(dt1)
    dt2 = get_dt_from_string(dt2)

    stream = log_converter.apply(log, variant=log_converter.TO_EVENT_STREAM)
    filtered_stream = EventStream([x for x in stream if dt1 < x[timestamp_key].replace(tzinfo=None) < dt2])
    filtered_log = log_converter.apply(filtered_stream)

    return filtered_log
def apply_numeric_events(log, int1, int2, parameters=None):
    """
    Apply a filter on events (numerical filter)

    Parameters
    --------------
    log
        Log
    int1
        Lower bound of the interval
    int2
        Upper bound of the interval
    parameters
        Possible parameters of the algorithm:
            PARAMETER_CONSTANT_ATTRIBUTE_KEY => indicates which attribute to filter
            positive => keep or remove traces with such events?

    Returns
    --------------
    filtered_log
        Filtered log
    """
    if parameters is None:
        parameters = {}

    attribute_key = parameters[
        PARAMETER_CONSTANT_ATTRIBUTE_KEY] if PARAMETER_CONSTANT_ATTRIBUTE_KEY in parameters else DEFAULT_NAME_KEY
    positive = parameters["positive"] if "positive" in parameters else True

    stream = log_conv_fact.apply(log, variant=log_conv_fact.TO_EVENT_STREAM)
    if positive:
        stream = EventStream(
            list(
                filter(
                    lambda x: attribute_key in x and int1 <= x[attribute_key]
                    <= int2, stream)))
    else:
        stream = EventStream(
            list(
                filter(
                    lambda x: attribute_key in x and
                    (x[attribute_key] < int1 or x[attribute_key] > int2),
                    stream)))

    filtered_log = log_conv_fact.apply(stream)

    return filtered_log
Esempio n. 12
0
def apply(stream0, parameters=None):
    """
    Applies a grouping into sentences to a stream

    Parameters
    -------------
    stream0
        Event stream
    parameters
        Parameters of the algorithm, including:
            resource_group_column => The column that is associated to the resource, and is used to firstly
                group the stream
            equiv_columns => List of columns that shall be equal in order for two successive events to belong to
                the same group
            time_delay => The delay that is considered for grouping two successive events into the
                same group

    Returns
    -------------
    grouped_stream
        Grouped event stream
    """
    if parameters is None:
        parameters = {}

    res_group_column = parameters[
        RESOURCE_GROUP_COLUMN] if RESOURCE_GROUP_COLUMN in parameters else DEFAULT_RES_GROUP_COLUMN
    equiv_columns = set(
        parameters[EQUIV_COLUMNS]) if EQUIV_COLUMNS in parameters else set(
            DEFAULT_EQUIV_COLUMNS)
    time_delay = parameters[TIME_DELAY] if TIME_DELAY in parameters else 5
    timestamp_column = parameters[
        TIMESTAMP_COLUMN] if TIMESTAMP_COLUMN in parameters else DEFAULT_TIMEST_COLUMN

    resources = set(ev[res_group_column] for ev in stream0)

    grouped_stream = []

    for res in resources:
        stream = [x for x in stream0 if x[res_group_column] == res]
        for i in range(len(stream)):
            if i == 0:
                grouped_stream.append([stream[i]])
            else:
                e_col = set([
                    col for col in stream[i].keys()
                    if stream[i][col] == stream[i - 1][col]
                ])
                time_i = stream[i][timestamp_column]
                time_i1 = stream[i - 1][timestamp_column]
                if e_col == equiv_columns or (time_i - time_i1) < time_delay:
                    grouped_stream[-1].append(stream[i])
                else:
                    grouped_stream.append([stream[i]])

    return EventStream(grouped_stream)
Esempio n. 13
0
 def get_log_obj_type(self, objtype):
     columns = [x for x in self.exploded_dataframe.columns if x.startswith("event_")] + [objtype]
     dataframe = self.exploded_dataframe[columns].dropna(how="any", subset=[objtype])
     dataframe = succint_mdl_to_exploded_mdl.apply(dataframe)
     dataframe = dataframe.rename(columns={"event_activity": "concept:name", "event_timestamp": "time:timestamp",
                                           objtype: "case:concept:name"})
     stream = EventStream(dataframe.to_dict('r'))
     log = log_conv_factory.apply(stream)
     log = sorting.sort_timestamp(log, "time:timestamp")
     exported_log = base64.b64encode(xes_exporter.export_log_as_string(log)).decode("utf-8")
     return self.name + "_" + objtype, "xes", exported_log
Esempio n. 14
0
def sample_stream(event_log, no_events=100):
    """
    Randomly sample a fixed number of events from the original event log_skeleton

    Parameters
    -----------
    event_log
        Event log_skeleton
    no_events
        Number of events that the sample should have

    Returns
    -----------
    newLog
        Filtered log_skeleton
    """
    new_log = EventStream(attributes=event_log.attributes, extensions=event_log.extensions, globals=event_log._omni,
                          classifiers=event_log.classifiers)
    new_log._list = random.sample(event_log, min(no_events, len(event_log)))
    return new_log
def filter_log_events_attr(log, values, parameters=None):
    """
    Filter log by keeping only events with an attribute value that belongs to the provided values list

    Parameters
    -----------
    log
        log
    values
        Allowed attributes
    parameters
        Parameters of the algorithm, including:
            activity_key -> Attribute identifying the activity in the log
            positive -> Indicate if events should be kept/removed

    Returns
    -----------
    filtered_log
        Filtered log
    """

    # CODE SAVING FROM FILTERS

    if parameters is None:
        parameters = {}

    attribute_key = parameters[
        PARAMETER_CONSTANT_ATTRIBUTE_KEY] if PARAMETER_CONSTANT_ATTRIBUTE_KEY in parameters else DEFAULT_NAME_KEY
    positive = parameters["positive"] if "positive" in parameters else True

    stream = log_conv_fact.apply(log, variant=log_conv_fact.TO_EVENT_STREAM)
    if positive:
        stream = EventStream(list(filter(lambda x: x[attribute_key] in values, stream)))
    else:
        stream = EventStream(list(filter(lambda x: x[attribute_key] not in values, stream)))

    filtered_log = log_conv_fact.apply(stream)

    return filtered_log
Esempio n. 16
0
def apply_events(log, dt1, dt2, parameters=None):
    """
    Get a new log containing all the events contained in the given interval

    Parameters
    -----------
    log
        Log
    dt1
        Lower bound to the interval
    dt2
        Upper bound to the interval
    parameters
        Possible parameters of the algorithm, including:
            Parameters.TIMESTAMP_KEY -> Attribute to use as timestamp

    Returns
    ------------
    filtered_log
        Filtered log
    """
    if parameters is None:
        parameters = {}
    timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY,
                                               parameters,
                                               DEFAULT_TIMESTAMP_KEY)
    dt1 = get_dt_from_string(dt1)
    dt2 = get_dt_from_string(dt2)

    stream = log_converter.apply(log, variant=log_converter.TO_EVENT_STREAM)
    filtered_stream = EventStream([
        x
        for x in stream if dt1 <= x[timestamp_key].replace(tzinfo=None) <= dt2
    ],
                                  attributes=log.attributes,
                                  extensions=log.extensions,
                                  omni_present=log.omni_present,
                                  classifiers=log.classifiers)
    filtered_log = log_converter.apply(filtered_stream)

    return filtered_log
Esempio n. 17
0
def apply(bytes, parameters=None):
    """
    Apply the deserialization to the bytes produced by Pyarrow serialization

    Parameters
    --------------
    bytes
        Bytes
    parameters
        Parameters of the algorithm

    Returns
    --------------
    deser
        Deserialized object
    """
    if parameters is None:
        parameters = {}
    buffer = pyarrow.py_buffer(bytes)
    list_events = pyarrow.deserialize(buffer)
    for i in range(len(list_events)):
        list_events[i] = Event(list_events[i])
    return EventStream(list_events)
Esempio n. 18
0
def apply(df0, classifier_function=None, parameters=None):
    if parameters is None:
        parameters = {}

    if classifier_function is None:
        classifier_function = lambda x: x["event_activity"]

    min_acti_freq = parameters[
        "min_acti_freq"] if "min_acti_freq" in parameters else 0
    min_edge_freq = parameters[
        "min_edge_freq"] if "min_edge_freq" in parameters else 0

    df = df0.copy()
    df = general.preprocess(df, parameters=parameters)

    df = clean_frequency.apply(df, min_acti_freq=min_acti_freq)
    df = clean_arc_frequency.apply(df, min_freq=min_edge_freq)

    models = {}

    obj_types = [x for x in df.columns if not x.startswith("event_")]
    activities = set()
    activities_repeated = Counter()
    edges = Counter()
    start_activities = dict()
    end_activities = dict()
    acti_spec = Counter()

    for ot in obj_types:
        start_activities[ot] = set()
        end_activities[ot] = set()

        new_df = df[["event_id", "event_activity", "event_timestamp",
                     ot]].dropna(subset=[ot])
        new_df = new_df.rename(columns={
            ot: "case:concept:name",
            "event_timestamp": "time:timestamp"
        })
        log = new_df.to_dict("r")
        for ev in log:
            ev["event_objtype"] = ot
            ev["concept:name"] = classifier_function(ev)
            del ev["event_objtype"]
            del ev["event_activity"]
            activities.add((ev["event_id"], ev["concept:name"]))

        log = EventStream(log)
        this_activities = set(x["concept:name"] for x in log)
        for act in this_activities:
            activities_repeated[act] += 1
        log = log_conv_factory.apply(log,
                                     variant=log_conv_factory.TO_EVENT_LOG)

        for trace in log:
            if trace:
                start_activities[ot].add(trace[0]["concept:name"])
                end_activities[ot].add(trace[-1]["concept:name"])
                for i in range(len(trace) - 1):
                    ev0 = trace[i]
                    ev1 = trace[i + 1]
                    edges[(ot, ev0["concept:name"], ev1["concept:name"],
                           ev0["event_id"], ev1["event_id"],
                           trace.attributes["concept:name"],
                           ev0["time:timestamp"], ev1["time:timestamp"])] += 1
                    acti_spec[(ot, trace[i]["concept:name"],
                               trace[i]["event_id"],
                               trace.attributes["concept:name"],
                               trace[i]["time:timestamp"])] += 1
                if len(trace) > 0:
                    acti_spec[(ot, trace[-1]["concept:name"],
                               trace[-1]["event_id"],
                               trace.attributes["concept:name"],
                               trace[-1]["time:timestamp"])] += 1

        models[ot] = tsystem.apply(log)

    activities = dict(Counter(list(x[1] for x in activities)))
    activities_repeated = set(x for x in activities_repeated
                              if activities_repeated[x] > 1)

    return {
        "type": "trans_system",
        "models": models,
        "activities": activities,
        "activities_repeated": activities_repeated,
        "edges": edges,
        "acti_spec": acti_spec
    }
Esempio n. 19
0
def modi_apply_events(log, dt1, dt2, parameters=None):
    """
    Get a new log containing all the events contained in the given interval

    Parameters
    -----------
    log
        Log
    dt1
        Lower bound to the interval
    dt2
        Upper bound to the interval
    parameters
        Possible parameters of the algorithm, including:
            timestamp_key -> Attribute to use as timestamp

    Returns
    ------------
    filtered_log
        Filtered log
    """
    if parameters is None:
        parameters = {}
    timestamp_key = parameters[
        PARAMETER_CONSTANT_TIMESTAMP_KEY] if PARAMETER_CONSTANT_TIMESTAMP_KEY in parameters else DEFAULT_TIMESTAMP_KEY
    dt1 = get_dt_from_string(dt1)
    dt2 = get_dt_from_string(dt2)

    stream = log_converter.apply(log, variant=log_converter.TO_EVENT_STREAM)
    valid_stream = list()
    for i, x in enumerate(stream):
        if i != len(stream) and x['lifecycle:transition'] == 'start' and stream[i+1]['lifecycle:transition'] == 'complete':
            if x[timestamp_key].replace(tzinfo=None) < dt1 and stream[i+1][timestamp_key].replace(tzinfo=None) > dt1:
                x[timestamp_key] = dt1
                valid_stream.append(x)
            elif dt1 < x[timestamp_key].replace(tzinfo=None) < dt2:
                x[timestamp_key] = x[timestamp_key].replace(tzinfo=None)
                valid_stream.append(x)

        elif i != 0 and x['lifecycle:transition'] == 'complete' and stream[i-1]['lifecycle:transition'] == 'start':
            if x[timestamp_key].replace(tzinfo=None) > dt2 and stream[i-1][timestamp_key].replace(tzinfo=None) < dt2:
                x[timestamp_key] = dt2
                valid_stream.append(x)
            elif dt1 < x[timestamp_key].replace(tzinfo=None) < dt2:
                x[timestamp_key] = x[timestamp_key].replace(tzinfo=None)
                valid_stream.append(x)
        """
        if dt1 < x[timestamp_key].replace(tzinfo=None) < dt2:
            x[timestamp_key] = x[timestamp_key].replace(tzinfo=None)
            valid_stream.append(x)
        else:
            if i != len(stream) and x['lifecycle:transition'] == 'start' and stream[i+1]['lifecycle:transition'] == 'complete':
                if x[timestamp_key].replace(tzinfo=None) < dt1 and stream[i+1][timestamp_key].replace(tzinfo=None) < dt2:
                    x[timestamp_key] = dt1
                    valid_stream.append(x)
            elif i != 0 and x['lifecycle:transition'] == 'complete' and stream[i-1]['lifecycle:transition'] == 'start':
                if x[timestamp_key].replace(tzinfo=None) > dt2 and stream[i-1][timestamp_key].replace(tzinfo=None) > dt1:
                    x[timestamp_key] = dt2
                    valid_stream.append(x)
        """




    filtered_stream = EventStream(valid_stream)
    filtered_log = log_converter.apply(filtered_stream)

    return filtered_log
def apply(grouped_stream, parameters=None):
    """
    Applies a grouping based on a set of equivalence columns and the distance between the clicks,
    in order to give eventually a label to each click

    Parameters
    ---------------
    grouped_stream
        Stream of events grouped (possibly, by resource and temporal constraints)
    parameters
        Parameters of the algorithm, including:
            remove_duplicates => Boolean that tells if duplicates shall be removed from the stream
            equiv_columns => Columns that are used for grouping, keeping all the events having
                an equivalent set of values
            spatial_column => Columns that contains the relative position of the mouse
            window_name => Column that contains the name of the window
            final_label_idx => Attribute that is going to store the index of the label
                associated to the event
            final_label => Attribute that is going to store the final event label
            dbscan_eps => Parameter of the clustering algorithm

    Returns
    ---------------
    new_grouped_stream
        New grouped stream
    all_labels
        All labels indexed
    """
    if parameters is None:
        parameters = {}

    equiv_columns = parameters[
        EQUIV_COLUMNS] if EQUIV_COLUMNS in parameters else DEFAULT_EQUIV_COLUMNS
    spatial_column = parameters[
        SPATIAL_COLUMN] if SPATIAL_COLUMN in parameters else DEFAULT_SPATIAL_COLUMN
    window_name = parameters[
        WINDOW_NAME] if WINDOW_NAME in parameters else DEFAULT_WINDOW_NAME
    final_label_idx = parameters[
        FINAL_LABEL_IDX] if FINAL_LABEL_IDX in parameters else DEFAULT_FINAL_LABEL_IDX
    final_label = parameters[
        FINAL_LABEL] if FINAL_LABEL in parameters else DEFAULT_FINAL_LABEL
    remove_duplicates = parameters[
        REMOVE_DUPLICATES] if REMOVE_DUPLICATES in parameters else True

    dbscan_eps = parameters[
        DBSCAN_EPS] if DBSCAN_EPS in parameters else DEFAULT_DBSCAN_EPS

    new_groups = {}

    for index, g in enumerate(grouped_stream):
        for index2, ev in enumerate(g):
            ev_features = "@@".join(ev[c] for c in equiv_columns)
            if ev_features not in new_groups:
                new_groups[ev_features] = []
            new_groups[ev_features].append((ev, index, index2))

    all_features = list(new_groups.keys())
    for fea in all_features:
        if len(new_groups[fea]) == 1:
            del new_groups[fea]

    all_labels = {}

    all_features = list(new_groups.keys())
    for fea in all_features:
        values = [eval(x[0][spatial_column]) for x in new_groups[fea]]
        db = DBSCAN(eps=dbscan_eps).fit(values)
        labels = db.labels_
        set_labels = list(set(labels))
        labels_corr = {}
        for x in set_labels:
            if not x == -1:
                labels_corr[x] = []
                for i in range(len(new_groups[fea])):
                    if labels[i] == x:
                        labels_corr[x].append(new_groups[fea][i])
        this_new_group = []
        for x in labels_corr:
            all_points = [eval(y[0][spatial_column]) for y in labels_corr[x]]
            centroid = " (%.1f, %.1f) " % (
                sum(x[0] for x in all_points) / len(all_points),
                sum(x[1] for x in all_points) / len(all_points))

            window_names = [y[0][window_name] for y in labels_corr[x]]
            wn_lp = os.path.commonprefix(window_names)
            wn_ls = longest_common_suffix(window_names)

            title = ""
            if len(wn_lp) > 0 or len(wn_ls) > 0:
                if len(wn_lp) >= len(wn_ls):
                    title = wn_lp
                else:
                    title = wn_ls

            e_cols_values = [
                " ".join(y[0][col] for col in equiv_columns)
                for y in labels_corr[x]
            ][0]
            new_label = e_cols_values + centroid + title

            this_label_idx = len(all_labels)
            all_labels[this_label_idx] = new_label

            for y in labels_corr[x]:
                y[0][final_label_idx] = this_label_idx
                y[0][final_label] = new_label
                this_new_group.append(y)
        if len(this_new_group) > 1:
            new_groups[fea] = this_new_group
        else:
            del new_groups[fea]

    indexed_events = {}
    all_features = list(new_groups.keys())
    for fea in all_features:
        for ev, index, index2 in new_groups[fea]:
            if index not in indexed_events:
                indexed_events[index] = []
            indexed_events[index].append((index2, ev))

    ret_grouped_stream = []
    for index in indexed_events:
        ret_grouped_stream.append(
            [y[1] for y in sorted(indexed_events[index], key=lambda x: x[0])])

    if remove_duplicates:
        for g in ret_grouped_stream:
            i = 1
            while i < len(g):
                if g[i][final_label] == g[i - 1][final_label]:
                    del g[i]
                    continue
                i = i + 1
        i = 0
        while i < len(ret_grouped_stream):
            g = ret_grouped_stream[i]
            if len(g) <= 1:
                del ret_grouped_stream[i]
                continue
            i = i + 1

    return EventStream(ret_grouped_stream), all_labels