Esempio n. 1
0
 def discretize_property(self, prop_df):
     if prop_df.empty:
         return DataframesGenerator.generate_empty_states(
         ), DataframesGenerator.generate_empty_symbolic_time_series()
     prop_id = prop_df[DatasetColumns.TemporalPropertyID].values[0]
     return self.__states[self.__states[StatesColumns.TemporalPropertyID] == prop_id], \
            KnowledgeBased.knowledge_based(self.__states, prop_df)
    def temporal_abstraction_per_property(self,
                                          dataset,
                                          entity_class_relations=None,
                                          states=None):
        if dataset.empty:
            return DataframesGenerator.generate_empty_states(
            ), DataframesGenerator.generate_empty_symbolic_time_series()

        symbolic_time_series = [
            AbstractionPerProperty.__generate_abstraction_by_name(
                row[1], entity_class_relations,
                states).discretize_property(dataset[dataset[
                    DatasetColumns.TemporalPropertyID] == row[1][
                        TemporalAbstractionParamsColumns.TemporalPropertyID]])
            for row in self.__temporal_abstraction_params_df.iterrows()
        ]
        """ It counts the number of states and accumulates them to re-index the StateID column """
        state_ids_ctr = reduce(
            lambda acc, new: acc + [acc[-1] + new[0].shape[0]],
            symbolic_time_series, [0])
        """ For each given states df, it re-indexes the StateID to start from the last max StateID + 1
            to avoid collisions between states ids """
        def reindex_states(tmp_states, states_ctr):
            return {
                StatesColumns.StateID:
                tmp_states[StatesColumns.StateID] + states_ctr -
                (tmp_states[StatesColumns.StateID].min() - 1)
            }

        """ Just a map to update and re-assign the StateID column """
        symbolic_time_series = [
            (tmp_states.assign(**reindex_states(tmp_states, states_ctr)),
             sts.assign(**reindex_states(sts, states_ctr)))
            for ((tmp_states, sts),
                 states_ctr) in zip(symbolic_time_series, state_ids_ctr)
        ]
        """ Concatenating the data-frames one after the other """
        combined_symbolic_time_series = pd.concat(
            [p[1] for p in symbolic_time_series], ignore_index=True)
        combined_states = pd.concat([p[0] for p in symbolic_time_series],
                                    ignore_index=True)
        return combined_states, combined_symbolic_time_series
    def discretize_property(self, prop_df):
        """
        A template method in which the only change between algorithms is the _generate_cutpoints method
        which is common to all child classes.
        Each discretization method creates cutpoints in a different way
        :param prop_df: Dataframe, A time-point series of a single property
        :return: Tuple, (Dataframe , Dataframe) - states, , and the symbolic-point-series created from prop_df
        """
        if prop_df.empty:
            return DataframesGenerator.generate_empty_states(), DataframesGenerator.generate_empty_symbolic_time_series()

        cutpoints = self._generate_cutpoints(prop_df)

        # in case it is td4c or persist, they also return a list of bins scores
        if len(cutpoints) == 2 and type(cutpoints[0]) is list:
            cutpoints, scores = cutpoints
        else:
            scores = np.full(len(cutpoints), nan)

        states = Discretization.create_prop_states(prop_df[DatasetColumns.TemporalPropertyID].values[0],
                                                   cutpoints, scores)
        symbolic_point_series = TemporalAbstraction.create_symbolic_time_series(states, prop_df)

        return states, symbolic_point_series