コード例 #1
0
    def test_transform(self, X, fe=None):
        if fe is None:
            fe = set()
        df = X if isinstance(X, pd.DataFrame) else X.data

        cats = df[self.key].values
        vals = []
        for i in cats:
            if i in self.cat2label:
                vals.append(self.cat2label[i])
            else:
                vals.append([])
        for val in vals:
            if len(val) > 0:
                val.extend([val[-1] for _ in range(len(val), self.max_win)])
                break
        new_cols = [
            f'{self.key}_{self.primary_time}_{self.label}_{i}'
            for i in range(1, self.max_win + 1)
        ]
        res = pd.DataFrame(vals, columns=new_cols, index=cats, dtype='float32')
        for col in res.columns:
            new_col = gen_feat_name(self.__class__.__name__, col, 'num')
            df[new_col] = downcast(res[col].values)
        self.feat_expend(df, fe)
コード例 #2
0
 def transform(self, ss):
     for cat in self.cats:
         codes = pd.Categorical(
             ss[cat], categories=self.cats2unique[cat]).codes + CAT_SHIFT
         codes = codes.astype('float')
         codes = downcast(codes, accuracy_loss=False)
         ss[cat] = codes
コード例 #3
0
 def transform(self, X):
     df = X if isinstance(X, pd.DataFrame) else X.data
     for atr in self.attrs:
         new_col = self.primary_timestamp + '_' + atr
         df[new_col] = downcast(getattr(df[self.primary_timestamp].dt, atr),
                                accuracy_loss=False)
     df[self.primary_timestamp] = df[self.primary_timestamp].astype(
         'int64') // 10**9
コード例 #4
0
 def func(shift):
     new_col = f'{self.key}_{self.primary_time}_{self.label}_{shift}'
     ss = self.record[self.label].shift(shift)
     ss[self.record[self.key] != self.record[self.key].shift(
         shift)] = np.nan
     ss.name = new_col
     ss = downcast(ss)
     return ss
コード例 #5
0
 def train_transform(self, X):
     df = X if isinstance(X, pd.DataFrame) else X.data
     category = df[self.cats[0]].astype('float64')
     for i in range(1, len(self.cats)):
         category *= self.cat_max[i]
         category += df[self.cats[i]]
     category[category == (CAT_SHIFT - 1)] = np.nan
     new_col = '_'.join(self.cats) + '_combineID'
     new_col = gen_feat_name(self.__class__.__name__, new_col, 'cat')
     category = downcast(category, accuracy_loss=False)
     df[new_col] = category
     return new_col
コード例 #6
0
    def feat_expend(self, df, drop_fe=None):
        if drop_fe is None:
            drop_fe = set()
        if self.feat_exp:
            # 就用一个
            j = 0
            new_col = f'{self.key}_{self.primary_time}_{self.label}_{j+1}_slope'
            new_col = gen_feat_name(self.__class__.__name__, new_col, 'num')
            if new_col not in drop_fe:
                tmp = (df[self.new_cols[j]] - df[self.new_cols[j + 1]]) / (
                    df[self.new_cols[j + 1]] + 1)
                tmp[(tmp == np.inf) | (tmp == -np.inf)] = np.nan
                df[new_col] = tmp.values
            for j in range(min(3, len(self.new_cols) - 1)):
                new_col = f'{self.key}_{self.primary_time}_{self.label}_{j + 1}_div'
                new_col = gen_feat_name(self.__class__.__name__, new_col,
                                        'num')
                if new_col not in drop_fe:
                    tmp = df[self.new_cols[j]] / (df[self.new_cols[j + 1]] + 1)
                    tmp[(tmp == np.inf) | (tmp == -np.inf)] = np.nan
                    df[new_col] = tmp.values
            j = 0
            new_col = f'{self.key}_{self.primary_time}_{self.label}_{j + 1}_iszero'
            new_col = gen_feat_name(self.__class__.__name__, new_col, 'cat')
            if new_col not in drop_fe:
                df[new_col] = downcast((df[self.new_cols[j]] != 0).astype(int))

        if self.palm_size > 0:
            feat_num = len(self.new_cols) // self.palm_size
            for i in range(feat_num):
                j = i * self.palm_size
                col = self.new_cols[j:j + self.palm_size]
                new_col = f'{self.key}_{self.primary_time}_{self.label}_{j+1}_{j + self.palm_size}'
                new_col = gen_feat_name(self.__class__.__name__, new_col,
                                        'num')
                # ex_cols.append(new_col)
                if new_col not in drop_fe:
                    df[new_col] = df[col].mean(axis=1)

        todo_func = ['mean', 'max', 'min']
        new_col1 = f'{self.key}_{self.primary_time}_{self.label}'
        for j in self.palt_list:
            if j > self.max_win:
                break
            col = self.new_cols[:j]
            new_col2 = f'{new_col1}_{j}'
            for f in todo_func:
                new_col3 = f'{new_col2}_{f}'
                new_col3 = gen_feat_name(self.__class__.__name__, new_col3,
                                         'num')
                if new_col3 not in drop_fe:
                    df[new_col3] = getattr(df[col], f)(axis=1)
コード例 #7
0
 def test_transform(self, X):
     df = X if isinstance(X, pd.DataFrame) else X.data
     df_1 = df.loc[~self.judge, :]
     self.train_transform(df_1)
     df_2 = df[self.judge]
     new_col = '_'.join(self.cats) + '_combineID'
     new_col = gen_feat_name(self.__class__.__name__, new_col, 'cat')
     if not df_2.empty:
         codes = pd.Categorical(
             self.combine_cat,
             categories=self.unique).codes + CAT_SHIFT + self.shift
         codes = codes.astype('float')
         codes[codes == (CAT_SHIFT - 1)] = np.nan
         codes = downcast(codes, accuracy_loss=False)
         df_2[new_col] = codes
         df_1 = pd.concat([df_1, df_2], sort=False)
         df_1 = df_1.sort_index()
     df[new_col] = df_1[new_col].values
コード例 #8
0
 def func(shift):
     new_col = f'{self.primary_time}_{self.label}_{shift}'
     ss = downcast(self.record[self.label].shift(shift))
     ss.name = new_col
     return ss