예제 #1
0
파일: cyclic.py 프로젝트: mzoll/sklearnext
 def fit(self, X, y=None, **fit_params):
     assert_dfncol(X, 1)
     self.incols = X.columns.values
     self.feature_names = [
         self.incols[0] + '_cyclicsin', self.incols[0] + '_cycliccos'
     ]
     return self
예제 #2
0
    def transform(self, X):
        assert_isfitted(self)
        assert_dfncol(X, 1)
        #transform to aligned multicolumn
        if self.prioretize_head:
            Xt = X.iloc[:, 0].apply(_pad_priohead,
                                    maxentries=self.maxentries_,
                                    padding_level=self.padding_level)
        else:
            Xt = X.iloc[:, 0].apply(_pad_priotail,
                                    maxentries=self.maxentries_,
                                    padding_level=self.padding_level)
        Xt.columns = self.feature_names_

        #now resolve defaulting of entries not contained categories
        cat_type = CategoricalDtype(categories=self.classes_, ordered=True)

        def xt_col_helper(col):
            col = col.astype(cat_type).fillna(self.default_level)
            if self.integerencode:
                col = col.cat.rename_categories(
                    list(range(len(col.cat.categories))))
            return col

        Xtt = Xt.apply(xt_col_helper, axis=0)

        logger.info("transform done")
        return Xtt
예제 #3
0
    def fit(self, X, y=None, **fit_params):
        assert_dfncol(X, 1)
        self._incols = X.columns.values
        if self._fit_maxentries:
            self.maxentries_ = max([len(vec) for vec in X.iloc[:, 0].values])

        self.feature_names_ = [
            "{}_{}".format(X.columns.values[0], i)
            for i in range(self.maxentries_)
        ]

        if self._fit_categories:
            s = set([])
            for vec in X.iloc[:, 0].values:
                s = s | set(vec)
            self.categories_ = list(s)
            #check for problems
            if self.padding_level in self.categories_:
                raise Exception(
                    "Cannot currently handle if padding-level is contained in categories"
                )
            if self.default_level in self.categories_:
                self.categories_.remove(self.default_level)

        self.classes_ = [self.padding_level
                         ] + self.categories_ + [self.default_level]
        logger.info("fit done")
        #a bit of preparation, for speed later
        if self.integerencode:
            self._trans_enum_dict = {k: i for i, k in enumerate(self.classes_)}
        return self
예제 #4
0
    def transform(self, X):
        assert_isfitted(self)
        assert_dfncol(X, 1)

        def _pad_priohead(vec):
            """ clip and pad a list 'maxentries', so that it fits exactly the size of 'maxentries', prioretize preserving the head of that list """
            if len(vec) > self.maxentries_:
                vec = vec[:self.maxentries_]
            outvec = []
            for v in vec:
                v_code = self.translation_dict.get(v)
                if v_code is None:
                    outvec.append(-1)
                else:
                    outvec.append(v_code)
            if len(outvec) < self.maxentries_:
                outvec.extend([0] * (self.maxentries_ - len(vec)))
            return pd.Series(outvec)

        def _pad_priotail(vec):
            """ clip and pad a list 'maxentries', so that it fits exactly the size of 'maxentries', prioretize preserving the head of that list """
            if len(vec) > self.maxentries_:
                vec = vec[-self.maxentries_:]
            outvec = []
            if len(vec) < self.maxentries_:
                outvec = [0] * (self.maxentries_ - len(vec))
            for v in vec:
                v_code = self.translation_dict.get(v)
                if v_code is None:
                    outvec.append(-1)
                else:
                    outvec.append(v_code)

            return pd.Series(outvec)

        if self.prioretize_head:
            Xt = X.iloc[:, 0].apply(_pad_priohead)
        else:
            Xt = X.iloc[:, 0].apply(_pad_priotail)
        Xt.columns = self.feature_names_

        #now resolve defaulting of entries not contained categories
        if not self.integerencode:
            cat_type = CategoricalDtype(
                categories=self.translation_dict.values(), ordered=True)

            def xt_col_helper(col):
                col = col.astype(cat_type)
                col = col.cat.rename_categories(self.translate_dict_rev)
                return col

            Xt = Xt.apply(xt_col_helper, axis=0)

        logger.info("transform done")
        return Xt
예제 #5
0
파일: cyclic.py 프로젝트: mzoll/sklearnext
    def transform(self, X):
        assert_dfncol(X, 1)

        def xthelper(val):
            t = val / self.periodicity * 2. * math.pi
            return pd.Series([math.sin(t), math.cos(t)])

        Xt = X.iloc[:, 0].apply(xthelper)
        if self.pure_positive:
            Xt = Xt.apply(lambda t: 0.5 * (t + 1.), axis=1)
        Xt.columns = self.feature_names
        return Xt
예제 #6
0
    def transform(self, X):
        assert_isfitted(self)
        assert_dfncol(X, 1)

        def xt_helper(val):
            if isinstance(val, list):
                try:
                    el = val[self.nth]
                except:
                    el = self.default
                return el
            return self.default

        xt = X.loc[:, self._incols[0]].apply(xt_helper)
        Xt = pd.DataFrame(xt)
        Xt.columns = self.feature_names_
        return Xt
예제 #7
0
    def transform(self, X):
        assert_isfitted(self)
        assert_dfncol(X, 1)

        def xthelper(vec):
            cb = copy.copy(self._dummy_checkbox)
            for c in set(vec):
                e = self.tick_dict.get(c)
                if e is None:
                    if self.default_name is not None:
                        cb[self.tick_dict.get(self.default_name)] = True
                    continue
                cb[e] = True
            return pd.Series(cb)

        Xt = X.iloc[:, 0].apply(xthelper)
        Xt.columns = self.feature_names_

        logger.info("transform done")
        return Xt
예제 #8
0
    def transform(self, X):
        assert_isfitted(self)
        assert_dfncol(X, 1)

        if self.at_front:

            def xt_helper(val):
                #assert(isinstance(val, list))
                if len(val) <= self.n_many:
                    return []
                return val[self.n_many:]
        else:

            def xt_helper(val):
                #assert(isinstance(val, list))
                if len(val) <= self.n_many:
                    return []
                return val[:-self.n_many]

        xt = X.loc[:, self._incols[0]].apply(xt_helper)
        Xt = pd.DataFrame(xt)
        Xt.columns = self.feature_names_
        return Xt
예제 #9
0
    def fit(self, X, y=None, **fit_params):
        assert_dfncol(X, 1)
        self._incols = X.columns.values

        if self._fit_classes:
            s = set()
            for vec in X.iloc[:, 0].values:
                #if not isinstance(vec, list):
                #    logger.error("Got unexpected non-list value while processing column: {}".format(vec))
                #    continue
                s = s | set(vec)
            self.classes_ = list(s)

        if self.default_name is not None and self.default_name not in self.classes_:
            self.classes_.append(self.default_name)

        self.feature_names_ = [
            self._class_to_feature_name(c) for c in self.classes_
        ]
        self.tick_dict = {k: e for e, k in enumerate(self.classes_)}
        self._dummy_checkbox = [False] * (len(self.classes_))

        logger.info("fit done")
        return self
예제 #10
0
 def testDfassert(self):
     df = pd.DataFrame({'A': [0,1]})
     assert_dfncol(df, 1)
     
     df = pd.DataFrame({'A': [0,1], 'B':[0,1]})
     assert_dfncol(df, 2)
예제 #11
0
 def fit(self, X, y=None, **fit_params):
     assert_dfncol(X, 1)
     self._incols = X.columns.values
     self.feature_names_ = [self._incols[0] + '_mod']
     return self
예제 #12
0
 def transform(self, X):
     assert_dfncol(X, 1)
     Xt = pd.DataFrame(X.iloc[:, 0].apply(lambda v: len(v)))
     Xt.columns = self.feature_names_
     return Xt