def get_kde_numeric_attribute(log, attribute, parameters=None): """ Gets the KDE estimation for the distribution of a numeric attribute values Parameters ------------- log Event stream object (if log, is converted) attribute Numeric attribute to analyse parameters Possible parameters of the algorithm, including: graph_points -> number of points to include in the graph Returns -------------- x X-axis values to represent y Y-axis values to represent """ if type(log) is EventLog: event_log = log_conversion.apply(log, variant=log_conversion.TO_EVENT_STREAM) else: event_log = log values = [event[attribute] for event in event_log if attribute in event] return attributes_common.get_kde_numeric_attribute(values, parameters=parameters)
def get_kde_numeric_attribute(df, attribute, parameters=None): """ Gets the KDE estimation for the distribution of a numeric attribute values Parameters ------------- df Pandas dataframe attribute Numeric attribute to analyse parameters Possible parameters of the algorithm, including: graph_points -> number of points to include in the graph Returns -------------- x X-axis values to represent y Y-axis values to represent """ if parameters is None: parameters = {} max_no_of_points_to_sample = exec_utils.get_param_value( Parameters.MAX_NO_POINTS_SAMPLE, parameters, 100000) red_df = df.dropna(subset=[attribute]) if len(red_df) > max_no_of_points_to_sample: red_df = red_df.sample(n=max_no_of_points_to_sample) values = list(red_df[attribute]) return attributes_common.get_kde_numeric_attribute(values, parameters=parameters)