Beispiel #1
0
 def compute(self, frame=None, report=None, train=None):
     if report is None:
         report = SilentFeatureComputingReport()
     if frame is None:
         assert train is not None
         if train:
             frame = self.train_frame
         else:
             frame = self.valid_frame
     else:
         train = False
     cached = [i for i in self.after_split if i.cache]
     parallel = [i for i in cached if i.parallel]
     run_manager.run(parallel,
                     frame,
                     train=train,
                     fold=self.fold_id,
                     ret=False,
                     report=report)
     run_manager.supervise(report)
     not_parallel = [i for i in cached if not i.parallel]
     run_manager.run(not_parallel,
                     frame,
                     train=train,
                     fold=self.fold_id,
                     ret=False,
                     report=report)
Beispiel #2
0
 def __call__(self, frame: AnyFrame) -> np.ndarray:
     results = dict()
     results.update(
         run_manager.run(self.before_split,
                         frame,
                         train=False,
                         fold=self.train_id,
                         ret=True))
     results.update(
         run_manager.run(self.after_split,
                         frame,
                         train=False,
                         fold=self.fold_id,
                         ret=True))
     result_frame = concat(results.values())
     if self.columns is None:
         raise UserWarning(".train should be called before inference")
     assert all(self.columns == result_frame.columns)
     return result_frame.values
Beispiel #3
0
 def __getitem__(self, key) -> PreviewDataFrame:
     report = FeatureComputingReport()
     frame = self.train_frame[key]
     results = run_manager.run(self.features,
                               frame,
                               train=True,
                               fold='preview',
                               ret=True,
                               report=report)
     result_frame = concat(results.values())
     run_manager.merge_scheduled()
     return PreviewDataFrame(result_frame)
Beispiel #4
0
 def compute(self, frame=None, report=None):
     if report is None:
         report = SilentFeatureComputingReport()
     if frame is None:
         frame = self.train_frame
         train = True
     else:
         train = False
     stackings = [i for i in self.before_split if isinstance(i, Stacker)]
     run_manager.run(stackings,
                     frame,
                     train=train,
                     fold=self.train_id,
                     ret=False,
                     report=report)
     parallel = [i for i in self.before_split if i.parallel]
     run_manager.run(parallel,
                     frame,
                     train=train,
                     fold=self.train_id,
                     ret=False,
                     report=report)
     run_manager.supervise(report)
     not_parallel = [i for i in self.before_split if not i.parallel]
     run_manager.run(not_parallel,
                     frame,
                     train=train,
                     fold=self.train_id,
                     ret=False,
                     report=report)
     report.finish()
     run_manager.merge_scheduled()
Beispiel #5
0
 def valid(self) -> np.ndarray:
     results = dict()
     frame = self.feature_set.train_frame
     results.update(
         run_manager.run(self.before_split,
                         frame,
                         train=True,
                         fold=self.train_id,
                         ret=True))
     frame = self.valid_frame
     results.update(
         run_manager.run(self.after_split,
                         frame,
                         train=False,
                         fold=self.fold_id,
                         ret=True))
     results.update({'_': self.valid_frame[[]]})
     result_frame = concat(results.values())
     if self.columns is None:
         raise UserWarning(".train should be called before .valid")
     assert all(self.columns == result_frame.columns)
     return result_frame.values
Beispiel #6
0
 def train(self) -> np.ndarray:
     results = dict()
     frame = self.feature_set.train_frame
     results.update(
         run_manager.run(self.before_split,
                         frame,
                         train=True,
                         fold=self.train_id,
                         ret=True))
     frame = self.train_frame
     results.update(
         run_manager.run(self.after_split,
                         frame,
                         train=True,
                         fold=self.fold_id,
                         ret=True))
     results.update({'_': self.train_frame[[]]})
     result_frame = concat(results.values())
     if self.columns is None:
         self.columns = list(result_frame.columns)
     else:
         assert all(self.columns == result_frame.columns)
     return result_frame.values
Beispiel #7
0
 def aux(self) -> Union[PreviewDataFrame, AnyFrame]:
     frame = self.train_frame
     self.compute()
     aux_fcs = [
         fc for fc in self.before_split
         if len(set(self.auxiliary) & set(fc.columns))
     ]
     results = list(
         run_manager.run(aux_fcs,
                         frame,
                         train=True,
                         fold=self.train_id,
                         ret=True).values())
     results.append(frame)
     for i, frame in enumerate(results):
         results[i] = frame[frame.columns.intersection(self.auxiliary)]
     res = concat(results)
     if cfg.preview_mode:
         return PreviewDataFrame(res)
     return res
Beispiel #8
0
 def _preview(obj):
     report = FeatureComputingReport(feature_list)
     if isinstance(obj, GenericFeatureConstructor):
         feature_constructor = obj()
     elif not isinstance(obj,
                         BaseFeatureConstructor):  # in case of stl preview
         feature_constructor = FeatureConstructor(obj)
     else:
         feature_constructor = obj
     feature_constructor.parallel = parallel
     try:
         cfg.feature_computing_report = report
         for size in sizes:
             results = run_manager.run([feature_constructor],
                                       frame=frame.head(size),
                                       train=train,
                                       fold='preview',
                                       ret=True,
                                       report=report)
             report.finish()
             display(results[feature_constructor.name])
     finally:
         run_manager.merge_scheduled()
         cfg.feature_computing_report = None