def compute(self, frame=None, report=None, train=None): if report is None: report = SilentFeatureComputingReport() if frame is None: assert train is not None if train: frame = self.train_frame else: frame = self.valid_frame else: train = False cached = [i for i in self.after_split if i.cache] parallel = [i for i in cached if i.parallel] run_manager.run(parallel, frame, train=train, fold=self.fold_id, ret=False, report=report) run_manager.supervise(report) not_parallel = [i for i in cached if not i.parallel] run_manager.run(not_parallel, frame, train=train, fold=self.fold_id, ret=False, report=report)
def __call__(self, frame: AnyFrame) -> np.ndarray: results = dict() results.update( run_manager.run(self.before_split, frame, train=False, fold=self.train_id, ret=True)) results.update( run_manager.run(self.after_split, frame, train=False, fold=self.fold_id, ret=True)) result_frame = concat(results.values()) if self.columns is None: raise UserWarning(".train should be called before inference") assert all(self.columns == result_frame.columns) return result_frame.values
def __getitem__(self, key) -> PreviewDataFrame: report = FeatureComputingReport() frame = self.train_frame[key] results = run_manager.run(self.features, frame, train=True, fold='preview', ret=True, report=report) result_frame = concat(results.values()) run_manager.merge_scheduled() return PreviewDataFrame(result_frame)
def compute(self, frame=None, report=None): if report is None: report = SilentFeatureComputingReport() if frame is None: frame = self.train_frame train = True else: train = False stackings = [i for i in self.before_split if isinstance(i, Stacker)] run_manager.run(stackings, frame, train=train, fold=self.train_id, ret=False, report=report) parallel = [i for i in self.before_split if i.parallel] run_manager.run(parallel, frame, train=train, fold=self.train_id, ret=False, report=report) run_manager.supervise(report) not_parallel = [i for i in self.before_split if not i.parallel] run_manager.run(not_parallel, frame, train=train, fold=self.train_id, ret=False, report=report) report.finish() run_manager.merge_scheduled()
def valid(self) -> np.ndarray: results = dict() frame = self.feature_set.train_frame results.update( run_manager.run(self.before_split, frame, train=True, fold=self.train_id, ret=True)) frame = self.valid_frame results.update( run_manager.run(self.after_split, frame, train=False, fold=self.fold_id, ret=True)) results.update({'_': self.valid_frame[[]]}) result_frame = concat(results.values()) if self.columns is None: raise UserWarning(".train should be called before .valid") assert all(self.columns == result_frame.columns) return result_frame.values
def train(self) -> np.ndarray: results = dict() frame = self.feature_set.train_frame results.update( run_manager.run(self.before_split, frame, train=True, fold=self.train_id, ret=True)) frame = self.train_frame results.update( run_manager.run(self.after_split, frame, train=True, fold=self.fold_id, ret=True)) results.update({'_': self.train_frame[[]]}) result_frame = concat(results.values()) if self.columns is None: self.columns = list(result_frame.columns) else: assert all(self.columns == result_frame.columns) return result_frame.values
def aux(self) -> Union[PreviewDataFrame, AnyFrame]: frame = self.train_frame self.compute() aux_fcs = [ fc for fc in self.before_split if len(set(self.auxiliary) & set(fc.columns)) ] results = list( run_manager.run(aux_fcs, frame, train=True, fold=self.train_id, ret=True).values()) results.append(frame) for i, frame in enumerate(results): results[i] = frame[frame.columns.intersection(self.auxiliary)] res = concat(results) if cfg.preview_mode: return PreviewDataFrame(res) return res
def _preview(obj): report = FeatureComputingReport(feature_list) if isinstance(obj, GenericFeatureConstructor): feature_constructor = obj() elif not isinstance(obj, BaseFeatureConstructor): # in case of stl preview feature_constructor = FeatureConstructor(obj) else: feature_constructor = obj feature_constructor.parallel = parallel try: cfg.feature_computing_report = report for size in sizes: results = run_manager.run([feature_constructor], frame=frame.head(size), train=train, fold='preview', ret=True, report=report) report.finish() display(results[feature_constructor.name]) finally: run_manager.merge_scheduled() cfg.feature_computing_report = None