コード例 #1
0
ファイル: parallel.py プロジェクト: konodyuk/kts
 def get_futures(self, kf: KTSFrame) -> Tuple[Dict[Tuple, ObjectID], Dict[Tuple, AnyFrame]]:
     scheduled_dfs = dict()
     result_dfs = dict()
     for args in self.split(kf):
         scope = self.get_scope(*args)
         run_id = RunID(scope, kf._fold, kf.hash())
         res_df = self.request_resource(run_id, kf)
         if res_df is not None:
             result_dfs[args] = res_df
             continue
         state = self.request_resource(run_id.state_id, kf)
         if self.parallel:
             kf_arg = kf.clear_states()
         else:
             kf_arg = kf
         if state is not None:
             kf_arg.set_scope(scope)
             kf_arg.__states__[kf_arg._state_key] = state
         run_id, res_df, res_state, stats = self.schedule(*args, scope=scope, kf=kf_arg)
         self.sync(run_id, res_df, res_state, stats, kf)
         if self.parallel:
             scheduled_dfs[args] = res_df
         else:
             result_dfs[args] = res_df
     return scheduled_dfs, result_dfs
コード例 #2
0
    def local_worker(self, *args, kf: KTSFrame):
        run_id = RunID(kf._scope, kf._fold, kf.hash())
        return_state = kf._train  # default for cached FCs or first calls of not cached FCs
        if not self.cache and bool(kf._state):
            # second call of not cached FC does not return state, as it is saved previously
            # refer to https://github.com/konodyuk/kts/tree/master/kts/core#caching-policy
            return_state = False
        stats = Stats(kf)
        if in_worker() and self.verbose:
            report = None
            io = self.remote_io(run_id)
            rs.send(ProgressSignal(0, 1, None, None, None, run_id))
        elif not in_worker() and self.verbose:
            report = kf.__meta__['report']
            io = self.local_io(report, run_id)
            report.update(run_id, 0, 1)
        else:
            report = None
            io = self.suppress_io()
        with stats, io, self.suppress_stderr(), pbar.local_mode(report, run_id):
            res_kf = self.compute(*args, kf)

        if 'columns' in dir(res_kf) and '__columns' not in kf._state:
            kf._state['__columns'] = list(res_kf.columns)

        if return_state:
            res_state = kf._state
        else:
            res_state = None
        if in_worker() and self.verbose:
            rs.send(ProgressSignal(1, 1, stats.data['took'], None, None, run_id))
        elif not in_worker() and self.verbose:
            report = kf.__meta__['report']
            report.update(run_id, 1, 1, stats.data['took'])
        return res_kf, res_state, stats.data
コード例 #3
0
ファイル: worker.py プロジェクト: konodyuk/kts
def worker(self, *args, df: pd.DataFrame, meta: Dict):
    assert 'run_manager' not in meta
    assert 'report' not in meta
    assert 'pid' in meta
    signal.pid = meta['pid']
    address_manager.pid = meta['pid']
    kf = KTSFrame(df, meta=meta)
    kf.__meta__['remote'] = True
    return_state = kf._train
    if self.verbose:
        rs.send(ProgressSignal(0, 1, None, None, None))
        io = self.remote_io()
    else:
        io = self.suppress_io()
    rs.send(RunPID(os.getpid()))

    stats = Stats(df)
    with stats, io, self.suppress_stderr():
        try:
            res_kf = self.compute(*args, kf)
        except:
            rs.send(rs.ErrorSignal(traceback.format_exc()))
            return None, None, None

    if 'columns' in dir(res_kf) and '__columns' not in kf._state:
        kf._state['__columns'] = list(res_kf.columns)

    if return_state:
        res_state = kf._state
    else:
        res_state = None
    if self.verbose:
        rs.send(ProgressSignal(1, 1, stats.data['took'], None, None))
    return res_kf, res_state, stats.data
コード例 #4
0
ファイル: parallel.py プロジェクト: konodyuk/kts
 def assemble_futures(self, scheduled_dfs: Dict[Tuple, ObjectID], result_dfs: Dict[Tuple, AnyFrame], kf: KTSFrame) -> KTSFrame:
     for k, v in scheduled_dfs.items():
         result_dfs[k] = ray.get(v)
     res_list = list()
     for args in self.split(kf):
         res_list.append(result_dfs[args])
     res = self.reduce(res_list)
     res = KTSFrame(res)
     res.__meta__ = kf.__meta__
     return res
コード例 #5
0
def safe_put(kf: KTSFrame):
    address_manager = get_address_manager()
    h = kf.hash()
    if ray.get(address_manager.has.remote(h)):
        oid = ray.get(address_manager.get.remote(h))
    else:
        oid = ray.put(kf)
        address_manager.put.remote((h, oid, False))
    return oid
コード例 #6
0
 def schedule(self, *args, scope: str, kf: KTSFrame) -> Tuple[RunID, Union[ObjectID, AnyFrame], Union[ObjectID, Dict], Union[ObjectID, Dict]]:
     run_id = RunID(scope, kf._fold, kf.hash())
     with self.set_scope(kf, scope):
         if self.parallel:
             meta = kf.__meta__
             oid = safe_put(kf)
             res_df, res_state, stats = worker.remote(self, *args, df=oid, meta=meta)
         else:
             res_df, res_state, stats = self.local_worker(*args, kf=kf)
     return run_id, res_df, res_state, stats
コード例 #7
0
ファイル: run_manager.py プロジェクト: konodyuk/kts
 def run(self,
         feature_constructors: List[BaseFeatureConstructor],
         frame: AnyFrame,
         *,
         train: bool,
         fold: str,
         ret: bool = False,
         report=None) -> Optional[Dict[str, AnyFrame]]:
     ensure_ray()
     if report is None:
         report = SilentFeatureComputingReport()
     frame = KTSFrame(frame)
     results = dict()
     for feature_constructor in feature_constructors:
         frame.__meta__['train'] = train
         frame.__meta__['fold'] = fold
         frame.__meta__['run_manager'] = self
         frame.__meta__['report'] = report
         frame.__meta__['pid'] = os.getpid()
         run_id = RunID(feature_constructor.name, frame._fold, frame.hash())
         with pbar.local_mode(report, run_id):
             results[feature_constructor.name] = feature_constructor(frame, ret=ret)
     if ret:
         return results
コード例 #8
0
ファイル: user_defined.py プロジェクト: konodyuk/kts
 def compute(self, kf: KTSFrame):
     kwargs = {
         key: self.request_resource(value, kf)
         for key, value in self.dependencies.items()
     }
     result = self.func(kf, **kwargs)
     assert result.shape[0] == kf.shape[0]
     if isinstance(result, pd.DataFrame):
         assert all(result.index == kf.index)
     else:
         result = pd.DataFrame(
             data=result,
             index=kf.index,
             columns=[f"{self.name}_{i}" for i in range(result.shape[1])])
     if (not kf.train and '__columns' in kf._state
             and not (len(result.columns) == len(kf._state['__columns'])
                      and all(result.columns == kf._state['__columns']))):
         fixed_columns = kf._state['__columns']
         for col in set(fixed_columns) - set(result.columns):
             result[col] = None
         return result[fixed_columns]
     if '__columns' not in kf._state:
         kf._state['__columns'] = list(result.columns)
     return result
コード例 #9
0
 def set_scope(self, kf: KTSFrame, scope: str):
     tmp = kf.__meta__['scope']
     kf.__meta__['scope'] = scope
     yield
     kf.__meta__['scope'] = tmp