def get_default_representation_with_attribute_names(log,
                                                    parameters=None,
                                                    feature_names=None):
    """
    Gets the default data representation of an event log (for process tree building)
    returning also the attribute names

    Parameters
    -------------
    log
        Trace log
    parameters
        Possible parameters of the algorithm
    feature_names
        (If provided) Feature to use in the representation of the log

    Returns
    -------------
    data
        Data to provide for decision tree learning
    feature_names
        Names of the features, in order
    """
    if parameters is None:
        parameters = {}

    enable_activity_def_representation = parameters[
        ENABLE_ACTIVITY_DEF_REPRESENTATION] if ENABLE_ACTIVITY_DEF_REPRESENTATION in parameters else False
    enable_succ_def_representation = parameters[
        ENABLE_SUCC_DEF_REPRESENTATION] if ENABLE_SUCC_DEF_REPRESENTATION in parameters else False
    activity_key = parameters[
        constants.
        PARAMETER_CONSTANT_ACTIVITY_KEY] if constants.PARAMETER_CONSTANT_ACTIVITY_KEY in parameters else xes.DEFAULT_NAME_KEY
    blacklist = parameters["blacklist"] if "blacklist" in parameters else []

    str_tr_attr, str_ev_attr, num_tr_attr, num_ev_attr = select_attributes_from_log_for_tree(
        log)
    str_evsucc_attr = None

    if enable_succ_def_representation:
        str_evsucc_attr = [activity_key]
    if enable_activity_def_representation and activity_key not in str_ev_attr:
        str_ev_attr.append(activity_key)

    str_tr_attr = [x for x in str_tr_attr if x not in blacklist]
    str_ev_attr = [x for x in str_ev_attr if x not in blacklist]
    num_tr_attr = [x for x in num_tr_attr if x not in blacklist]
    num_ev_attr = [x for x in num_ev_attr if x not in blacklist]
    if str_evsucc_attr is not None:
        str_evsucc_attr = [x for x in str_evsucc_attr if x not in blacklist]

    data, feature_names = get_representation(log,
                                             str_tr_attr,
                                             str_ev_attr,
                                             num_tr_attr,
                                             num_ev_attr,
                                             str_evsucc_attr=str_evsucc_attr,
                                             feature_names=feature_names)

    return data, feature_names, str_tr_attr, str_ev_attr, num_tr_attr, num_ev_attr
Exemple #2
0
def apply(log,
          net,
          initial_marking,
          final_marking,
          decision_point=None,
          attributes=None,
          parameters=None):
    """
    Gets the essential information (features, target class and names of the target class)
    in order to learn a classifier

    Parameters
    --------------
    log
        Event log
    net
        Petri net
    initial_marking
        Initial marking
    final_marking
        Final marking
    decision_point
        Point of the process in which a decision happens:
        - if not specified, the method crashes, but provides a list of possible decision points
        - if specified, the method goes on and produce the decision tree
    attributes
        Attributes of the log. If not specified, then an automatic attribute selection
        is performed.
    parameters
        Parameters of the algorithm

    Returns
    ---------------
    X
        features
    y
        Target class
    class_name
        Target class names
    """
    if parameters is None:
        parameters = {}
    log = log_converter.apply(log, parameters=parameters)
    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY,
                                              parameters,
                                              xes_constants.DEFAULT_NAME_KEY)
    if decision_point is None:
        decision_points_names = get_decision_points(net,
                                                    labels=True,
                                                    parameters=parameters)
        raise Exception(
            "please provide decision_point as argument of the method. Possible decision points: ",
            decision_points_names)
    if attributes is None:
        str_tr_attr, str_ev_attr, num_tr_attr, num_ev_attr = select_attributes_from_log_for_tree(
            log)
        attributes = list(str_ev_attr) + list(num_ev_attr)
    I, dp = get_decisions_table(log,
                                net,
                                initial_marking,
                                final_marking,
                                attributes=attributes,
                                pre_decision_points=[decision_point],
                                parameters=parameters)
    x_attributes = [a for a in attributes if not a == activity_key]
    x = []
    y = []
    for el in I[decision_point]:
        x.append({a: v for a, v in el[0].items() if a in x_attributes})
        y.append(el[1])
    X = pd.DataFrame(x)
    X = pd.get_dummies(data=X, columns=x_attributes)
    Y = pd.DataFrame(y, columns=["Name"])
    Y, targets = encode_target(Y, "Name")
    y = Y['Target']
    return X, y, targets
 def test_select_attributes(self):
     from pm4py.statistics.attributes.log import select
     log = self.get_log()
     select.get_trace_attribute_values(log, "concept:name")
     select.select_attributes_from_log_for_tree(log)
Exemple #4
0
def apply(
        log: Union[EventLog, pd.DataFrame],
        net: PetriNet,
        initial_marking: Marking,
        final_marking: Marking,
        decision_point=None,
        attributes=None,
        parameters: Optional[Dict[Union[str, Parameters], Any]] = None) -> Any:
    """
    Gets the essential information (features, target class and names of the target class)
    in order to learn a classifier

    Parameters
    --------------
    log
        Event log
    net
        Petri net
    initial_marking
        Initial marking
    final_marking
        Final marking
    decision_point
        Point of the process in which a decision happens:
        - if not specified, the method crashes, but provides a list of possible decision points
        - if specified, the method goes on and produce the decision tree
    attributes
        Attributes of the log. If not specified, then an automatic attribute selection
        is performed.
    parameters
        Parameters of the algorithm

    Returns
    ---------------
    X
        features
    y
        Target class
    class_name
        Target class names
    """
    import pandas as pd

    if parameters is None:
        parameters = {}

    labels = exec_utils.get_param_value(Parameters.LABELS, parameters, True)

    log = log_converter.apply(log,
                              variant=log_converter.Variants.TO_EVENT_LOG,
                              parameters=parameters)
    activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY,
                                              parameters,
                                              xes_constants.DEFAULT_NAME_KEY)
    if decision_point is None:
        decision_points_names = get_decision_points(net,
                                                    labels=labels,
                                                    parameters=parameters)
        raise Exception(
            "please provide decision_point as argument of the method. Possible decision points: ",
            decision_points_names)
    if attributes is None:
        str_tr_attr, str_ev_attr, num_tr_attr, num_ev_attr = select_attributes_from_log_for_tree(
            log)
        attributes = list(str_ev_attr) + list(num_ev_attr)
    I, dp = get_decisions_table(log,
                                net,
                                initial_marking,
                                final_marking,
                                attributes=attributes,
                                pre_decision_points=[decision_point],
                                parameters=parameters)
    x_attributes = [a for a in attributes if not a == activity_key]
    str_attributes = set()
    non_str_attributes = set()
    x = []
    x2 = []
    y = []
    for el in I[decision_point]:
        for a, v in el[0].items():
            if a in x_attributes:
                if type(v) is str:
                    str_attributes.add(a)
                else:
                    non_str_attributes.add(a)
        x.append({
            a: v
            for a, v in el[0].items() if a in x_attributes and type(v) is str
        })
        x2.append({
            a: v
            for a, v in el[0].items()
            if a in x_attributes and type(v) is not str
        })
        y.append(el[1])
    X = pd.DataFrame(x)
    X = pd.get_dummies(data=X, columns=list(str_attributes))
    X2 = pd.DataFrame(x2)
    X = pd.concat([X, X2], axis=1)
    Y = pd.DataFrame(y, columns=["Name"])
    Y, targets = encode_target(Y, "Name")
    y = Y['Target']
    return X, y, targets