def gradient(window_size, states_path, states_list): ctx = click.get_current_context() properties = ctx.meta['properties'] if states_path: states = ctx.meta['files_manager'].read_csv(states_path) else: bins = [-inf] + [float(x) for x in states_list.split(' ')] + [inf] bins_lows_per_property = bins[:-1] bins_highs_per_property = bins[1:] bins_lows = np.tile(bins_lows_per_property, len(properties)) bins_highs = np.tile(bins_highs_per_property, len(properties)) states = DataframesGenerator.generate_states( np.arange(1, len(bins_lows) + 1), np.repeat(properties, len(bins_lows_per_property)), np.tile(np.arange(len(bins_lows_per_property)), len(properties)), bins_lows, bins_highs, np.full(len(bins_lows), np.nan)) params_df = DataframesGenerator.generate_temporal_abstraction_params( properties, ['gradient'] * len(properties), np.full(len(properties), nan), np.full(len(properties), window_size)) system_flow(ctx.meta['files_manager'], ctx.meta['preprocessing_params_df'], params_df, states, kfold=ctx.meta['kfold'])
def discretize_property(self, prop_df): if prop_df.empty: return DataframesGenerator.generate_empty_states( ), DataframesGenerator.generate_empty_symbolic_time_series() prop_id = prop_df[DatasetColumns.TemporalPropertyID].values[0] return self.__states[self.__states[StatesColumns.TemporalPropertyID] == prop_id], \ KnowledgeBased.knowledge_based(self.__states, prop_df)
def create_symbolic_time_series(states, time_point_series): """ Creates a symbolic time series from a given states dataframe and a time point series :param states: Dataframe, a states dataframe :param time_point_series: Dataframe, a time point series :return: Dataframe, a symbolic time series so that each TemporalPropertyValue was replaced with its corresponding StateID """ if states.empty or time_point_series.empty: return DataframesGenerator.generate_empty_symbolic_time_series() dataset_df = time_point_series.copy() cutpoints = TemporalAbstraction.extract_cutpoints_from_states(states) for prop_id, cutoffs in cutpoints.items(): df_prop_idx = (dataset_df.TemporalPropertyID == prop_id) dataset_df.loc[df_prop_idx, StatesColumns.BinID] = \ TemporalAbstraction.discretize_to_bins(cutpoints[prop_id], dataset_df[df_prop_idx].TemporalPropertyValue) symbolic_time_series = dataset_df.drop(DatasetColumns.TemporalPropertyValue, axis='columns') symbolic_time_series = symbolic_time_series.join( states[[StatesColumns.StateID, StatesColumns.TemporalPropertyID, StatesColumns.BinID]].set_index([StatesColumns.TemporalPropertyID, StatesColumns.BinID]), how='left', on=[StatesColumns.TemporalPropertyID, StatesColumns.BinID] ) return symbolic_time_series.drop(StatesColumns.BinID, axis='columns')
def create_time_intervals(self, symbolic_time_series): """ convert the discretized dataset to time-intervals sorted by start-time, end-time, state-id :param symbolic_time_series: Dataframe :return: Dataframe, symbolic time intervals """ if symbolic_time_series.empty: return DataframesGenerator.generate_empty_symbolic_time_intervals() time_intervals_df = symbolic_time_series \ .groupby(by=[TimeIntervalsColumns.TemporalPropertyID, TimeIntervalsColumns.EntityID]) \ .apply(lambda df: self.__connect_timestamps(df)) time_intervals_df.sort_values( by=[TimeIntervalsColumns.Start, TimeIntervalsColumns.End, TimeIntervalsColumns.StateID], inplace=True) time_intervals_df.reset_index(inplace=True) time_intervals_df.drop('level_2', axis=1, inplace=True) time_intervals_df = time_intervals_df[[ TimeIntervalsColumns.EntityID, TimeIntervalsColumns.TemporalPropertyID, TimeIntervalsColumns.StateID, TimeIntervalsColumns.Start, TimeIntervalsColumns.End ]] return time_intervals_df
def discretize_property(self, prop_df): if prop_df.empty: return self.__states,\ DataframesGenerator.generate_empty_symbolic_time_series() prop_id = prop_df[DatasetColumns.TemporalPropertyID].values[0] return self.__states[self.__states[StatesColumns.TemporalPropertyID] == prop_id], \ Gradient.gradient(prop_df, self.__window_size, self.__states)
def temporal_abstraction_per_property(self, dataset, entity_class_relations=None, states=None): if dataset.empty: return DataframesGenerator.generate_empty_states( ), DataframesGenerator.generate_empty_symbolic_time_series() symbolic_time_series = [ AbstractionPerProperty.__generate_abstraction_by_name( row[1], entity_class_relations, states).discretize_property(dataset[dataset[ DatasetColumns.TemporalPropertyID] == row[1][ TemporalAbstractionParamsColumns.TemporalPropertyID]]) for row in self.__temporal_abstraction_params_df.iterrows() ] """ It counts the number of states and accumulates them to re-index the StateID column """ state_ids_ctr = reduce( lambda acc, new: acc + [acc[-1] + new[0].shape[0]], symbolic_time_series, [0]) """ For each given states df, it re-indexes the StateID to start from the last max StateID + 1 to avoid collisions between states ids """ def reindex_states(tmp_states, states_ctr): return { StatesColumns.StateID: tmp_states[StatesColumns.StateID] + states_ctr - (tmp_states[StatesColumns.StateID].min() - 1) } """ Just a map to update and re-assign the StateID column """ symbolic_time_series = [ (tmp_states.assign(**reindex_states(tmp_states, states_ctr)), sts.assign(**reindex_states(sts, states_ctr))) for ((tmp_states, sts), states_ctr) in zip(symbolic_time_series, state_ids_ctr) ] """ Concatenating the data-frames one after the other """ combined_symbolic_time_series = pd.concat( [p[1] for p in symbolic_time_series], ignore_index=True) combined_states = pd.concat([p[0] for p in symbolic_time_series], ignore_index=True) return combined_states, combined_symbolic_time_series
def per_dataset(paa_window_size, std_coef, max_gap): ctx = click.get_current_context() properties = np.sort( ctx.meta['dataset'][DatasetColumns.TemporalPropertyID].unique()) ctx.meta['properties'] = properties preprocessing_params_df = DataframesGenerator.generate_preprocessing_params( properties, np.full(len(properties), paa_window_size), np.full(len(properties), std_coef), np.full(len(properties), max_gap)) ctx.meta['preprocessing_params_df'] = preprocessing_params_df
def discretization(method, nb_bins): ctx = click.get_current_context() properties = ctx.meta['properties'] params_df = DataframesGenerator.generate_temporal_abstraction_params( properties, [method] * len(properties), np.full(len(properties), nb_bins), np.full(len(properties), nan)) system_flow(ctx.meta['files_manager'], ctx.meta['preprocessing_params_df'], params_df, kfold=ctx.meta['kfold'])
def intervalization_per_property(self, symbolic_time_series): if symbolic_time_series.empty: return DataframesGenerator.generate_empty_symbolic_time_intervals() intervalized_props = [ TimeIntervalsAdapter(row[1][PreprocessingParamsColumns.MaxGap]). create_time_intervals(symbolic_time_series[symbolic_time_series[ SymbolicTimeSeriesColumns.TemporalPropertyID] == row[1][ PreprocessingParamsColumns.TemporalPropertyID]]) for row in self.__preprocessing_params_df.iterrows() ] return pd.concat(intervalized_props, ignore_index=True)
def discretize_property(self, prop_df): """ A template method in which the only change between algorithms is the _generate_cutpoints method which is common to all child classes. Each discretization method creates cutpoints in a different way :param prop_df: Dataframe, A time-point series of a single property :return: Tuple, (Dataframe , Dataframe) - states, , and the symbolic-point-series created from prop_df """ if prop_df.empty: return DataframesGenerator.generate_empty_states(), DataframesGenerator.generate_empty_symbolic_time_series() cutpoints = self._generate_cutpoints(prop_df) # in case it is td4c or persist, they also return a list of bins scores if len(cutpoints) == 2 and type(cutpoints[0]) is list: cutpoints, scores = cutpoints else: scores = np.full(len(cutpoints), nan) states = Discretization.create_prop_states(prop_df[DatasetColumns.TemporalPropertyID].values[0], cutpoints, scores) symbolic_point_series = TemporalAbstraction.create_symbolic_time_series(states, prop_df) return states, symbolic_point_series
def knowledge_based(states_path): ctx = click.get_current_context() properties = ctx.meta['properties'] states = ctx.meta['files_manager'].read_csv(states_path) params_df = DataframesGenerator.generate_temporal_abstraction_params( properties, ['knowledge-based'] * len(properties), np.full(len(properties), nan), np.full(len(properties), nan), ) system_flow(ctx.meta['files_manager'], ctx.meta['preprocessing_params_df'], params_df, states, kfold=ctx.meta['kfold'])
def create_prop_states(prop_id, cutpoints, scores=None): """ Creates a states dataframe for a single property, containing the cutpoints and scores given as parameter :param prop_id: int, the property id :param cutpoints: List, a list of cutpoints :param scores: List, a list of scores, one for each cut-point :return: Dataframe, a states dataframe """ nb_states = len(cutpoints) + 1 bin_low = [-inf] + cutpoints bin_high = cutpoints + [inf] if scores is None: scores = np.full(nb_states - 1, np.nan) states = DataframesGenerator.generate_states( np.arange(1, nb_states + 1), np.full(nb_states, prop_id), np.arange(nb_states), bin_low, bin_high, np.append([nan], scores) ) return states