def _train_ii(): algo = knn.ItemItem(20, min_sim=1.0e-6) timer = Stopwatch() _log.info('training %s on ml data', algo) algo.fit(lktu.ml_test.ratings) _log.info('trained in %s', timer) shr = persist(algo) return shr.transfer()
def __init__(self, model, func, n_jobs, persist_method): if isinstance(model, PersistedModel): key = model else: key = persist(model, method=persist_method) ctx = LKContext.INSTANCE kid_tc = proc_count(level=1) _log.info('setting up multiprocessing.Pool w/ %d workers', n_jobs) self.pool = ctx.Pool(n_jobs, _initialize_mp_worker, (key, func, kid_tc, log_queue()))
def __init__(self, model, func, n_jobs, persist_method): if isinstance(model, PersistedModel): key = model else: key = persist(model, method=persist_method) ctx = LKContext.INSTANCE _log.info('setting up ProcessPoolExecutor w/ %d workers', n_jobs) kid_tc = proc_count(level=1) self.executor = ProcessPoolExecutor(n_jobs, ctx, _initialize_mp_worker, (key, func, kid_tc, log_queue()))
def test_persist(): "Test default persistence" matrix = np.random.randn(1000, 100) share = lks.persist(matrix) try: m2 = share.get() assert m2 is not matrix assert np.all(m2 == matrix) del m2 finally: share.close()
def __init__(self, model, func, n_jobs, persist_method): if isinstance(model, PersistedModel): key = model else: key = persist(model, method=persist_method) func = pickle.dumps(func) ctx = LKContext.INSTANCE kid_tc = proc_count(level=1) os.environ['_LK_IN_MP'] = 'yes' _log.info('setting up multiprocessing.Pool w/ %d workers', n_jobs) self.pool = ctx.Pool(n_jobs, _initialize_mp_worker, (key, func, kid_tc, log_queue(), get_root_seed()))
def test_persist_method(): "Test persistence with a specified method" matrix = np.random.randn(1000, 100) share = lks.persist(matrix, method='binpickle') assert isinstance(share, lks.BPKPersisted) try: m2 = share.get() assert m2 is not matrix assert np.all(m2 == matrix) del m2 finally: share.close()
def test_persist_dir(tmp_path): "Test persistence with a configured directory" matrix = np.random.randn(1000, 100) with lktu.set_env_var('LK_TEMP_DIR', os.fspath(tmp_path)): share = lks.persist(matrix) assert isinstance(share, lks.BPKPersisted) try: m2 = share.get() assert m2 is not matrix assert np.all(m2 == matrix) del m2 finally: share.close()
def __init__(self, model, func, n_jobs, persist_method): if isinstance(model, PersistedModel): _log.debug('model already persisted') key = model else: _log.debug('persisting model with method %s', persist_method) key = persist(model, method=persist_method) self._close_key = key _log.debug('persisting function') func = pickle.dumps(func) ctx = LKContext.INSTANCE _log.info('setting up ProcessPoolExecutor w/ %d workers', n_jobs) os.environ['_LK_IN_MP'] = 'yes' kid_tc = proc_count(level=1) self.executor = ProcessPoolExecutor( n_jobs, ctx, _initialize_mp_worker, (key, func, kid_tc, log_queue(), get_root_seed()))
def test_store_als(): algo = BiasedMF(10) algo.fit(lktu.ml_test.ratings) shared = lks.persist(algo) k2 = pickle.loads(pickle.dumps(shared)) try: a2 = k2.get() assert a2 is not algo assert a2.item_features_ is not algo.item_features_ assert np.all(a2.item_features_ == algo.item_features_) assert a2.user_features_ is not algo.user_features_ assert np.all(a2.user_features_ == algo.user_features_) del a2 k2.close() del k2 finally: shared.close()
def _sp_matmul_p(a1, a2, *, method=None, fail=False): _log.info('in worker process') return persist(a1 @ a2, method=method).transfer()