Esempio n. 1
0
def test():
    logging.basicConfig(level=logging.DEBUG)
    from dpark.context import DparkContext
    dc = DparkContext("local")
    dc.start()
    nums = dc.parallelize(range(100), 10)
    tracker = CacheTracker(True)
    tracker.registerRDD(nums.id, len(nums))
    split = nums.splits[0]
    print list(tracker.getOrCompute(nums, split))
    print list(tracker.getOrCompute(nums, split))
    print tracker.getLocationsSnapshot()
    tracker.stop()
Esempio n. 2
0
def test():
    logging.basicConfig(level=logging.DEBUG)
    from dpark.context import DparkContext
    dc = DparkContext("local")
    dc.start()
    nums = dc.parallelize(range(100), 10)
    tracker = CacheTracker(True)
    tracker.registerRDD(nums.id, len(nums))
    split = nums.splits[0]
    print list(tracker.getOrCompute(nums, split))
    print list(tracker.getOrCompute(nums, split))
    print tracker.getLocationsSnapshot()
    tracker.stop()
Esempio n. 3
0
    def __init__(self, batchDuration, sc=None, graph=None, batchCallback=None):
        if isinstance(sc, str) or not sc:  # None
            sc = DparkContext(sc)
        self.sc = sc
        sc.is_dstream = True

        batchDuration = int(batchDuration)
        self.batchDuration = batchDuration
        self.graph = graph or DStreamGraph(batchDuration)
        self.checkpointDir = None
        self.checkpointDuration = None
        self.scheduler = None
        self.lastCheckpointTime = 0
        self.batchCallback = batchCallback
Esempio n. 4
0
    def __init__(self, batchDuration, sc=None, graph=None, batchCallback=None):
        if isinstance(sc, str) or not sc:  # None
            sc = DparkContext(sc)
        self.sc = sc
        sc.is_dstream = True

        batchDuration = int(batchDuration)
        self.batchDuration = batchDuration
        self.graph = graph or DStreamGraph(batchDuration)
        self.checkpointDir = None
        self.checkpointDuration = None
        self.scheduler = None
        self.lastCheckpointTime = 0
        self.batchCallback = batchCallback
Esempio n. 5
0
 def __init__(self, batchDuration, sc=None):
     if isinstance(sc, str) or not sc:  # None
         sc = DparkContext(sc)
     self.sc = sc
     batchDuration = int(batchDuration)
     self.batchDuration = batchDuration
     self.graph = DStreamGraph(batchDuration)
     self.checkpointDir = None
     self.checkpointDuration = None
     self.scheduler = None
Esempio n. 6
0
def test():
    logging.basicConfig(level=logging.DEBUG)
    cache = mmapCache
    pool = multiprocessing.Pool(2)
    assert pool.apply(set_cache) == True
    assert pool.apply(get_cache) == 'b'
    pool.close()
    pool.join()
    assert cache.get('a') == 'b'
    
    from dpark.context import DparkContext
    dc = DparkContext("local")
    nums = dc.parallelize(range(100), 10)
    cache = mmapCache
    tracker = CacheTracker(True)
    tracker.registerRDD(nums.id, len(nums))
    split = nums.splits[0]
    print tracker.getOrCompute(nums, split)
    print tracker.getOrCompute(nums, split)
    print tracker.getLocationsSnapshot()
    tracker.stop()