def test(): logging.basicConfig(level=logging.DEBUG) from dpark.context import DparkContext dc = DparkContext("local") dc.start() nums = dc.parallelize(range(100), 10) tracker = CacheTracker(True) tracker.registerRDD(nums.id, len(nums)) split = nums.splits[0] print list(tracker.getOrCompute(nums, split)) print list(tracker.getOrCompute(nums, split)) print tracker.getLocationsSnapshot() tracker.stop()
def __init__(self, batchDuration, sc=None, graph=None, batchCallback=None): if isinstance(sc, str) or not sc: # None sc = DparkContext(sc) self.sc = sc sc.is_dstream = True batchDuration = int(batchDuration) self.batchDuration = batchDuration self.graph = graph or DStreamGraph(batchDuration) self.checkpointDir = None self.checkpointDuration = None self.scheduler = None self.lastCheckpointTime = 0 self.batchCallback = batchCallback
def __init__(self, batchDuration, sc=None): if isinstance(sc, str) or not sc: # None sc = DparkContext(sc) self.sc = sc batchDuration = int(batchDuration) self.batchDuration = batchDuration self.graph = DStreamGraph(batchDuration) self.checkpointDir = None self.checkpointDuration = None self.scheduler = None
def test(): logging.basicConfig(level=logging.DEBUG) cache = mmapCache pool = multiprocessing.Pool(2) assert pool.apply(set_cache) == True assert pool.apply(get_cache) == 'b' pool.close() pool.join() assert cache.get('a') == 'b' from dpark.context import DparkContext dc = DparkContext("local") nums = dc.parallelize(range(100), 10) cache = mmapCache tracker = CacheTracker(True) tracker.registerRDD(nums.id, len(nums)) split = nums.splits[0] print tracker.getOrCompute(nums, split) print tracker.getOrCompute(nums, split) print tracker.getLocationsSnapshot() tracker.stop()