def fit(self, X, y=None, **fit_params): assert_dfncol(X, 1) self.incols = X.columns.values self.feature_names = [ self.incols[0] + '_cyclicsin', self.incols[0] + '_cycliccos' ] return self
def transform(self, X): assert_isfitted(self) assert_dfncol(X, 1) #transform to aligned multicolumn if self.prioretize_head: Xt = X.iloc[:, 0].apply(_pad_priohead, maxentries=self.maxentries_, padding_level=self.padding_level) else: Xt = X.iloc[:, 0].apply(_pad_priotail, maxentries=self.maxentries_, padding_level=self.padding_level) Xt.columns = self.feature_names_ #now resolve defaulting of entries not contained categories cat_type = CategoricalDtype(categories=self.classes_, ordered=True) def xt_col_helper(col): col = col.astype(cat_type).fillna(self.default_level) if self.integerencode: col = col.cat.rename_categories( list(range(len(col.cat.categories)))) return col Xtt = Xt.apply(xt_col_helper, axis=0) logger.info("transform done") return Xtt
def fit(self, X, y=None, **fit_params): assert_dfncol(X, 1) self._incols = X.columns.values if self._fit_maxentries: self.maxentries_ = max([len(vec) for vec in X.iloc[:, 0].values]) self.feature_names_ = [ "{}_{}".format(X.columns.values[0], i) for i in range(self.maxentries_) ] if self._fit_categories: s = set([]) for vec in X.iloc[:, 0].values: s = s | set(vec) self.categories_ = list(s) #check for problems if self.padding_level in self.categories_: raise Exception( "Cannot currently handle if padding-level is contained in categories" ) if self.default_level in self.categories_: self.categories_.remove(self.default_level) self.classes_ = [self.padding_level ] + self.categories_ + [self.default_level] logger.info("fit done") #a bit of preparation, for speed later if self.integerencode: self._trans_enum_dict = {k: i for i, k in enumerate(self.classes_)} return self
def transform(self, X): assert_isfitted(self) assert_dfncol(X, 1) def _pad_priohead(vec): """ clip and pad a list 'maxentries', so that it fits exactly the size of 'maxentries', prioretize preserving the head of that list """ if len(vec) > self.maxentries_: vec = vec[:self.maxentries_] outvec = [] for v in vec: v_code = self.translation_dict.get(v) if v_code is None: outvec.append(-1) else: outvec.append(v_code) if len(outvec) < self.maxentries_: outvec.extend([0] * (self.maxentries_ - len(vec))) return pd.Series(outvec) def _pad_priotail(vec): """ clip and pad a list 'maxentries', so that it fits exactly the size of 'maxentries', prioretize preserving the head of that list """ if len(vec) > self.maxentries_: vec = vec[-self.maxentries_:] outvec = [] if len(vec) < self.maxentries_: outvec = [0] * (self.maxentries_ - len(vec)) for v in vec: v_code = self.translation_dict.get(v) if v_code is None: outvec.append(-1) else: outvec.append(v_code) return pd.Series(outvec) if self.prioretize_head: Xt = X.iloc[:, 0].apply(_pad_priohead) else: Xt = X.iloc[:, 0].apply(_pad_priotail) Xt.columns = self.feature_names_ #now resolve defaulting of entries not contained categories if not self.integerencode: cat_type = CategoricalDtype( categories=self.translation_dict.values(), ordered=True) def xt_col_helper(col): col = col.astype(cat_type) col = col.cat.rename_categories(self.translate_dict_rev) return col Xt = Xt.apply(xt_col_helper, axis=0) logger.info("transform done") return Xt
def transform(self, X): assert_dfncol(X, 1) def xthelper(val): t = val / self.periodicity * 2. * math.pi return pd.Series([math.sin(t), math.cos(t)]) Xt = X.iloc[:, 0].apply(xthelper) if self.pure_positive: Xt = Xt.apply(lambda t: 0.5 * (t + 1.), axis=1) Xt.columns = self.feature_names return Xt
def transform(self, X): assert_isfitted(self) assert_dfncol(X, 1) def xt_helper(val): if isinstance(val, list): try: el = val[self.nth] except: el = self.default return el return self.default xt = X.loc[:, self._incols[0]].apply(xt_helper) Xt = pd.DataFrame(xt) Xt.columns = self.feature_names_ return Xt
def transform(self, X): assert_isfitted(self) assert_dfncol(X, 1) def xthelper(vec): cb = copy.copy(self._dummy_checkbox) for c in set(vec): e = self.tick_dict.get(c) if e is None: if self.default_name is not None: cb[self.tick_dict.get(self.default_name)] = True continue cb[e] = True return pd.Series(cb) Xt = X.iloc[:, 0].apply(xthelper) Xt.columns = self.feature_names_ logger.info("transform done") return Xt
def transform(self, X): assert_isfitted(self) assert_dfncol(X, 1) if self.at_front: def xt_helper(val): #assert(isinstance(val, list)) if len(val) <= self.n_many: return [] return val[self.n_many:] else: def xt_helper(val): #assert(isinstance(val, list)) if len(val) <= self.n_many: return [] return val[:-self.n_many] xt = X.loc[:, self._incols[0]].apply(xt_helper) Xt = pd.DataFrame(xt) Xt.columns = self.feature_names_ return Xt
def fit(self, X, y=None, **fit_params): assert_dfncol(X, 1) self._incols = X.columns.values if self._fit_classes: s = set() for vec in X.iloc[:, 0].values: #if not isinstance(vec, list): # logger.error("Got unexpected non-list value while processing column: {}".format(vec)) # continue s = s | set(vec) self.classes_ = list(s) if self.default_name is not None and self.default_name not in self.classes_: self.classes_.append(self.default_name) self.feature_names_ = [ self._class_to_feature_name(c) for c in self.classes_ ] self.tick_dict = {k: e for e, k in enumerate(self.classes_)} self._dummy_checkbox = [False] * (len(self.classes_)) logger.info("fit done") return self
def testDfassert(self): df = pd.DataFrame({'A': [0,1]}) assert_dfncol(df, 1) df = pd.DataFrame({'A': [0,1], 'B':[0,1]}) assert_dfncol(df, 2)
def fit(self, X, y=None, **fit_params): assert_dfncol(X, 1) self._incols = X.columns.values self.feature_names_ = [self._incols[0] + '_mod'] return self
def transform(self, X): assert_dfncol(X, 1) Xt = pd.DataFrame(X.iloc[:, 0].apply(lambda v: len(v))) Xt.columns = self.feature_names_ return Xt