Beispiel #1
0
 def pre_compute(self):
     super().pre_compute()
     self.pipe_df = self.pipe_df.groupby(
         level=1)['net_main_inflows'].rolling(window=self.window).mean()
     self.pipe_df = self.pipe_df.reset_index(level=0, drop=True)
     self.pipe_df = self.pipe_df.reset_index()
     self.pipe_df = normal_index_df(self.pipe_df)
Beispiel #2
0
    def score(self, input_df):
        self.score_levels.sort(reverse=True)

        quantile_df = input_df.groupby(level=1).quantile(self.score_levels)
        quantile_df.index.names = [self.time_field, 'score']

        self.logger.info('factor:{},quantile:\n{}'.format(
            self.factor_name, quantile_df))

        result_df = input_df.copy()
        result_df.reset_index(inplace=True, level='entity_id')
        result_df['quantile'] = None
        for timestamp in quantile_df.index.levels[0]:
            length = len(result_df.loc[result_df.index == timestamp,
                                       'quantile'])
            result_df.loc[result_df.index == timestamp,
                          'quantile'] = [quantile_df.loc[timestamp].to_dict()
                                         ] * length

        self.logger.info('factor:{},df with quantile:\n{}'.format(
            self.factor_name, result_df))

        # result_df = result_df.set_index(['entity_id'], append=True)
        # result_df = result_df.sort_index(level=[0, 1])
        #
        # self.logger.info(result_df)
        #
        def calculate_score(df, factor_name, quantile):
            original_value = df[factor_name]
            score_map = quantile.get(factor_name)
            min_score = self.score_levels[-1]

            if original_value < score_map.get(min_score):
                return 0

            for score in self.score_levels[:-1]:
                if original_value >= score_map.get(score):
                    return score

        for factor in input_df.columns.to_list():
            result_df[factor] = result_df.apply(
                lambda x: calculate_score(x, factor, x['quantile']), axis=1)

        result_df = result_df.reset_index()
        result_df = normal_index_df(result_df)
        result_df = result_df.loc[:, self.factors]

        result_df = result_df.loc[~result_df.index.duplicated(keep='first')]

        self.logger.info('factor:{},df:\n{}'.format(self.factor_name,
                                                    result_df))

        return result_df
Beispiel #3
0
    def normalize(self):
        """
        normalize data_df to
                                    col1    col2    col3
        entity_id    timestamp

        """
        if pd_is_not_null(self.data_df):
            if not is_normal_df(self.data_df):
                self.data_df = normal_index_df(self.data_df,
                                               self.category_field,
                                               self.time_field)

            self.entity_ids = self.data_df.index.levels[0].to_list()

            for entity_id in self.entity_ids:
                df = self.data_df.loc[(entity_id, )]
                self.df_list.append(df)
                self.entity_map_df[entity_id] = df

            if len(self.df_list) > 1 and self.fill_index:
                self.df_list = fill_with_same_index(df_list=self.df_list)