Example #1
0
    def feat_expend(self, df, drop_fe=None):

        if drop_fe is None:
            drop_fe = set()
        if self.palm_size > 0:
            feat_num = len(self.new_cols) // self.palm_size
            for i in range(feat_num):
                j = i * self.palm_size
                col = self.new_cols[j:j + self.palm_size]
                new_col = f'{self.key}_{self.primary_time}_{self.label}_{j}_{j + self.palm_size}'
                new_col = gen_feat_name(self.__class__.__name__, new_col,
                                        'num')
                if new_col not in drop_fe:
                    df[new_col] = df[col].mean(axis=1)

        todo_func = ['mean', 'max', 'min']
        new_col1 = f'{self.key}_{self.primary_time}_{self.label}'
        for j in self.palt_list:
            if j > self.max_win:
                break
            col = self.new_cols[:j]
            new_col2 = f'{new_col1}_{j}'
            for f in todo_func:
                new_col3 = f'{new_col2}_{f}'
                new_col3 = gen_feat_name(self.__class__.__name__, new_col3,
                                         'num')
                if new_col3 not in drop_fe:
                    df[new_col3] = getattr(df[col], f)(axis=1)
Example #2
0
    def feat_expend(self, df, drop_fe=None):
        if drop_fe is None:
            drop_fe = set()
        if self.feat_exp:
            # 就用一个
            j = 0
            new_col = f'{self.key}_{self.primary_time}_{self.label}_{j+1}_slope'
            new_col = gen_feat_name(self.__class__.__name__, new_col, 'num')
            if new_col not in drop_fe:
                tmp = (df[self.new_cols[j]] - df[self.new_cols[j + 1]]) / (
                    df[self.new_cols[j + 1]] + 1)
                tmp[(tmp == np.inf) | (tmp == -np.inf)] = np.nan
                df[new_col] = tmp.values
            for j in range(min(3, len(self.new_cols) - 1)):
                new_col = f'{self.key}_{self.primary_time}_{self.label}_{j + 1}_div'
                new_col = gen_feat_name(self.__class__.__name__, new_col,
                                        'num')
                if new_col not in drop_fe:
                    tmp = df[self.new_cols[j]] / (df[self.new_cols[j + 1]] + 1)
                    tmp[(tmp == np.inf) | (tmp == -np.inf)] = np.nan
                    df[new_col] = tmp.values
            j = 0
            new_col = f'{self.key}_{self.primary_time}_{self.label}_{j + 1}_iszero'
            new_col = gen_feat_name(self.__class__.__name__, new_col, 'cat')
            if new_col not in drop_fe:
                df[new_col] = downcast((df[self.new_cols[j]] != 0).astype(int))

        if self.palm_size > 0:
            feat_num = len(self.new_cols) // self.palm_size
            for i in range(feat_num):
                j = i * self.palm_size
                col = self.new_cols[j:j + self.palm_size]
                new_col = f'{self.key}_{self.primary_time}_{self.label}_{j+1}_{j + self.palm_size}'
                new_col = gen_feat_name(self.__class__.__name__, new_col,
                                        'num')
                # ex_cols.append(new_col)
                if new_col not in drop_fe:
                    df[new_col] = df[col].mean(axis=1)

        todo_func = ['mean', 'max', 'min']
        new_col1 = f'{self.key}_{self.primary_time}_{self.label}'
        for j in self.palt_list:
            if j > self.max_win:
                break
            col = self.new_cols[:j]
            new_col2 = f'{new_col1}_{j}'
            for f in todo_func:
                new_col3 = f'{new_col2}_{f}'
                new_col3 = gen_feat_name(self.__class__.__name__, new_col3,
                                         'num')
                if new_col3 not in drop_fe:
                    df[new_col3] = getattr(df[col], f)(axis=1)
Example #3
0
    def train_transform(self, X):
        def func(shift):
            new_col = f'{self.key}_{self.primary_time}_{self.label}_{shift}'
            ss = self.record[self.label].shift(shift)
            ss[self.record[self.key] != self.record[self.key].shift(
                shift)] = np.nan
            ss.name = new_col
            ss = downcast(ss)
            return ss

        df = X if isinstance(X, pd.DataFrame) else X.data
        res = Parallel(n_jobs=JOBS, require='sharedmem')(
            delayed(func)(i) for i in range(1, self.max_win + 1))

        new_cols = []
        if res:
            res = pd.concat(res, sort=False, axis=1)
            res[self.primary_time] = self.record[self.primary_time]
            res[self.key] = self.record[self.key]
            tmp = df[[self.primary_time, self.key]]
            tmp = tmp.merge(res, how='left', on=[self.primary_time, self.key])
            for col in tmp.columns[2:]:
                new_col = gen_feat_name(self.__class__.__name__, col, 'num')
                new_cols.append(new_col)
                df[new_col] = tmp[col].values
        self.reduce_memory()
        return new_cols
Example #4
0
    def test_transform(self, X, fe=None):
        if fe is None:
            fe = set()
        df = X if isinstance(X, pd.DataFrame) else X.data

        cats = df[self.key].values
        vals = []
        for i in cats:
            if i in self.cat2label:
                vals.append(self.cat2label[i])
            else:
                vals.append([])
        for val in vals:
            if len(val) > 0:
                val.extend([val[-1] for _ in range(len(val), self.max_win)])
                break
        new_cols = [
            f'{self.key}_{self.primary_time}_{self.label}_{i}'
            for i in range(1, self.max_win + 1)
        ]
        res = pd.DataFrame(vals, columns=new_cols, index=cats, dtype='float32')
        for col in res.columns:
            new_col = gen_feat_name(self.__class__.__name__, col, 'num')
            df[new_col] = downcast(res[col].values)
        self.feat_expend(df, fe)
Example #5
0
 def train_transform(self, X):
     df = X if isinstance(X, pd.DataFrame) else X.data
     category = df[self.cats[0]].astype('float64')
     for i in range(1, len(self.cats)):
         category *= self.cat_max[i]
         category += df[self.cats[i]]
     category[category == (CAT_SHIFT - 1)] = np.nan
     new_col = '_'.join(self.cats) + '_combineID'
     new_col = gen_feat_name(self.__class__.__name__, new_col, 'cat')
     category = downcast(category, accuracy_loss=False)
     df[new_col] = category
     return new_col
Example #6
0
 def test_transform(self, X):
     df = X if isinstance(X, pd.DataFrame) else X.data
     df_1 = df.loc[~self.judge, :]
     self.train_transform(df_1)
     df_2 = df[self.judge]
     new_col = '_'.join(self.cats) + '_combineID'
     new_col = gen_feat_name(self.__class__.__name__, new_col, 'cat')
     if not df_2.empty:
         codes = pd.Categorical(
             self.combine_cat,
             categories=self.unique).codes + CAT_SHIFT + self.shift
         codes = codes.astype('float')
         codes[codes == (CAT_SHIFT - 1)] = np.nan
         codes = downcast(codes, accuracy_loss=False)
         df_2[new_col] = codes
         df_1 = pd.concat([df_1, df_2], sort=False)
         df_1 = df_1.sort_index()
     df[new_col] = df_1[new_col].values
Example #7
0
 def get_feat_name(self):
     new_col = '_'.join(self.cats) + '_combineID'
     new_col = gen_feat_name(self.__class__.__name__, new_col, 'cat')
     return new_col
Example #8
0
 def test_transform(self, X):
     df = X if isinstance(X, pd.DataFrame) else X.data
     for shift in range(1, self.max_win + 1):
         new_col = f'{self.primary_time}_{self.label}_{shift}'
         new_col = gen_feat_name(self.__class__.__name__, new_col, 'num')
         df[new_col] = self.label_list[shift - 1]