def get_kde_date_attribute(log, attribute=DEFAULT_TIMESTAMP_KEY, parameters=None): """ Gets the KDE estimation for the distribution of a date attribute values Parameters ------------- log Event stream object (if log, is converted) attribute Date attribute to analyse parameters Possible parameters of the algorithm, including: graph_points -> number of points to include in the graph Returns -------------- x X-axis values to represent y Y-axis values to represent """ if type(log) is EventLog: event_log = log_conversion.apply(log, variant=log_conversion.TO_EVENT_STREAM) else: event_log = log values = [event[attribute].replace(tzinfo=None) for event in event_log if attribute in event] return attributes_common.get_kde_date_attribute(values, parameters=parameters)
def get_kde_date_attribute(df, attribute=DEFAULT_TIMESTAMP_KEY, parameters=None): """ Gets the KDE estimation for the distribution of a date attribute values Parameters ------------- df Pandas dataframe attribute Date attribute to analyse parameters Possible parameters of the algorithm, including: graph_points -> number of points to include in the graph Returns -------------- x X-axis values to represent y Y-axis values to represent """ if parameters is None: parameters = {} max_no_of_points_to_sample = exec_utils.get_param_value( Parameters.MAX_NO_POINTS_SAMPLE, parameters, 100000) red_df = df.dropna(subset=[attribute]) if len(red_df) > max_no_of_points_to_sample: red_df = red_df.sample(n=max_no_of_points_to_sample) date_values = list(red_df[attribute]) return attributes_common.get_kde_date_attribute(date_values, parameters=parameters)