Python VectorDataManager Examples, ufora.FORA.VectorDataManager.VectorDataManager Python Examples

Example #1

0

Show file

 def createCumulusGateway(self, callbackScheduler, vdm=None):
     logging.info("InMemoryCluster creating a RemoteGateway")
     return CumulusGatewayRemote.RemoteGateway(
         self.callbackScheduler,
         VectorDataManager.constructVDM(self.callbackScheduler,
                                        self.remoteGatewayCacheSize),
         self.channelManager.createChannelFactory(),
         CumulusActiveMachines.CumulusActiveMachines(
             self.sharedStateViewFactory), self.sharedStateViewFactory)

Example #2

0

Show file

File: ComputedValueGateway.py Project: nkhuyu/ufora

    def __init__(self, callbackSchedulerFactory, callbackScheduler, ramCacheSize = None):
        self.callbackScheduler = callbackScheduler
        self.lock_ = threading.RLock()
        self.vectorDataIDRequestCount_ = {}
        self.vectorDataIDToVectorSlices_ = {}
        self.vdm = VectorDataManager.constructVDM(callbackScheduler, ramCacheSize)
        self.vdm.setDropUnreferencedPagesWhenFull(True)

        self.ramCacheOffloadRecorder = CumulusNative.TrackingOfflineStorage(self.callbackScheduler)
        self.vdm.setOfflineCache(self.ramCacheOffloadRecorder)

Example #3

0

Show file

File: ComputedValueGateway.py Project: vishnur/ufora

    def __init__(self, callbackSchedulerFactory, callbackScheduler, ramCacheSize = None):
        self.callbackScheduler = callbackScheduler
        self.lock_ = threading.RLock()
        self.vectorDataIDRequestCount_ = {}
        self.vectorDataIDToVectorSlices_ = {}
        self.vdm = VectorDataManager.constructVDM(callbackScheduler, ramCacheSize)
        self.vdm.setDropUnreferencedPagesWhenFull(True)

        self.ramCacheOffloadRecorder = CumulusNative.TrackingOfflineStorage(self.callbackScheduler)
        self.vdm.setOfflineCache(self.ramCacheOffloadRecorder)

Example #4

0

Show file

File: Layout_test.py Project: Sandy4321/ufora

    def runSimpleEvaluation(self, inputType, outputType):
        mainRuntime = Runtime.getMainRuntime()
        foraCompiler = mainRuntime.getTypedForaCompiler()

        while foraCompiler.anyCompilingOrPending():
            time.sleep(.01)

        aParticularStringValue = ForaNative.ImplValContainer(aBigString)

        callable = self.generateSimpleCallable(inputType, outputType)

        jumpTarget = foraCompiler.compile(callable)

        import gc
        gc.collect()

        for passIndex in range(PASS_COUNT):
            #type values are memoized, so we can't assume that the value has a refcount
            # of exactly one
            totalStringCount = ForaNative.totalStringCount()
            totalImplvalCount = ForaNative.totalImplvalCount()

            anExecutionContext = ExecutionContext.ExecutionContext(
                dataManager = VectorDataManager.constructVDM(self.callbackScheduler)
                )

            anExecutionContext.evaluateFunctionPointer(jumpTarget, aParticularStringValue)

            self.assertTrue(anExecutionContext.isFinished())

            res = anExecutionContext.getFinishedResult()

            self.assertTrue(not res.isException())
            self.assertEqual(res.asResult.result, aParticularStringValue)

            anExecutionContext.teardown()

            res = None

            #verify final refcounts
            self.assertEqual(
                aParticularStringValue.getStringObjectRefcount(),
                1,
                "refcounts weren't maintained in %s->%s. %s != 1" % (
                    inputType,
                    outputType,
                    aParticularStringValue.getStringObjectRefcount()
                    )
                )
            self.assertEqual(
                (totalStringCount, totalImplvalCount),
                (ForaNative.totalStringCount(), ForaNative.totalImplvalCount()),
                "refcounts weren't maintained in " + str(inputType) + "->" + str(outputType)
                )

Example #5

0

Show file

File: ClusterSimulation.py Project: ufora/ufora

    def createCumulusGateway(self, callbackScheduler, vdm=None):
        if vdm is None:
            vdm = VectorDataManager.constructVDM(callbackScheduler)

        vdm.setDropUnreferencedPagesWhenFull(True)

        viewFactory = self.getViewFactory()
        return CumulusGatewayRemote.RemoteGateway(
            self.callbackScheduler, vdm,
            TcpChannelFactory.TcpStringChannelFactory(self.callbackScheduler),
            CumulusActiveMachines.CumulusActiveMachines(viewFactory),
            viewFactory)

Example #6

0

Show file

    def runSimpleEvaluation(self, inputType, outputType):
        mainRuntime = Runtime.getMainRuntime()
        foraCompiler = mainRuntime.getTypedForaCompiler()

        while foraCompiler.anyCompilingOrPending():
            time.sleep(.01)

        aParticularStringValue = ForaNative.ImplValContainer(aBigString)

        callable = self.generateSimpleCallable(inputType, outputType)

        jumpTarget = foraCompiler.compile(callable)

        import gc
        gc.collect()

        for passIndex in range(PASS_COUNT):
            #type values are memoized, so we can't assume that the value has a refcount
            # of exactly one
            totalStringCount = ForaNative.totalStringCount()
            totalImplvalCount = ForaNative.totalImplvalCount()

            anExecutionContext = ExecutionContext.ExecutionContext(
                dataManager=VectorDataManager.constructVDM(
                    self.callbackScheduler))

            anExecutionContext.evaluateFunctionPointer(jumpTarget,
                                                       aParticularStringValue)

            self.assertTrue(anExecutionContext.isFinished())

            res = anExecutionContext.getFinishedResult()

            self.assertTrue(not res.isException())
            self.assertEqual(res.asResult.result, aParticularStringValue)

            anExecutionContext.teardown()

            res = None

            #verify final refcounts
            self.assertEqual(
                aParticularStringValue.getStringObjectRefcount(), 1,
                "refcounts weren't maintained in %s->%s. %s != 1" %
                (inputType, outputType,
                 aParticularStringValue.getStringObjectRefcount()))
            self.assertEqual((totalStringCount, totalImplvalCount),
                             (ForaNative.totalStringCount(),
                              ForaNative.totalImplvalCount()),
                             "refcounts weren't maintained in " +
                             str(inputType) + "->" + str(outputType))

Example #7

0

Show file

File: ClusterSimulation.py Project: ufora/ufora

    def createCumulusGateway(self, callbackScheduler, vdm=None):
        if vdm is None:
            vdm = VectorDataManager.constructVDM(callbackScheduler)

        vdm.setDropUnreferencedPagesWhenFull(True)

        viewFactory = self.getViewFactory()
        return CumulusGatewayRemote.RemoteGateway(
            self.callbackScheduler,
            vdm,
            TcpChannelFactory.TcpStringChannelFactory(self.callbackScheduler),
            CumulusActiveMachines.CumulusActiveMachines(viewFactory),
            viewFactory
            )

Example #8

0

Show file

File: CumulusEvaluator.py Project: Sandy4321/ufora

    def __init__(self, callbackScheduler, cumulusGatewayFactory):
        self.vdm = VectorDataManager.constructVDM(callbackScheduler)
        self.vdm.setDropUnreferencedPagesWhenFull(True)

        self.cumulusGateway = cumulusGatewayFactory(callbackScheduler, self.vdm)
        self.lock_ = threading.RLock()
        self.curPriorityIndex = 0

        self.cumulusGateway.onCPUCountChanged = self.onCPUCountChanged
        self.cumulusGateway.onCacheLoad = self.onCacheLoad
        self.cumulusGateway.onComputationResult = self.onComputationResult
        self.currentComputationId = None

        self.results_ = Queue.Queue()

Example #9

0

Show file

File: InMemoryCluster.py Project: WantonSoup/ufora

 def createCumulusGateway(self, callbackScheduler, vdm=None):
     logging.info("InMemoryCluster creating a RemoteGateway")
     return CumulusGatewayRemote.RemoteGateway(
         self.callbackScheduler,
         VectorDataManager.constructVDM(
             self.callbackScheduler,
             self.remoteGatewayCacheSize
             ),
         self.channelManager.createChannelFactory(),
         CumulusActiveMachines.CumulusActiveMachines(
             self.sharedStateViewFactory
             ),
         self.sharedStateViewFactory
         )

Example #10

0

Show file

File: CumulusEvaluator.py Project: vishnur/ufora

    def __init__(self, callbackScheduler, cumulusGatewayFactory):
        self.vdm = VectorDataManager.constructVDM(callbackScheduler)
        self.vdm.setDropUnreferencedPagesWhenFull(True)

        self.cumulusGateway = cumulusGatewayFactory(callbackScheduler,
                                                    self.vdm)
        self.lock_ = threading.RLock()
        self.curPriorityIndex = 0

        self.cumulusGateway.onCPUCountChanged = self.onCPUCountChanged
        self.cumulusGateway.onCacheLoad = self.onCacheLoad
        self.cumulusGateway.onComputationResult = self.onComputationResult
        self.currentComputationId = None

        self.results_ = Queue.Queue()

Example #11

0

Show file

def simpleEval(callbackScheduler, *args):
    """convert 'args' to FORA ImplValContainers, evaluates, and returns a python value.
    
    Assumes you don't use cache or vector loads.  If you return an exception, this function
    asserts false. Otherwise, it returns the ImplValContainer result.
    """
    e = ExecutionContext(
        dataManager=VectorDataManager.constructVDM(callbackScheduler))
    e.evaluate(*[FORANative.ImplValContainer(x) for x in args])
    tr = e.getFinishedResult()
    if tr.isFailure():
        assert False
    if tr.isException():
        assert False
    try:
        return tr.asResult.result.pyval
    except:
        return tr.asResult.result

Example #12

0

Show file

File: ExecutionContext.py Project: WantonSoup/ufora

def simpleEval(callbackScheduler, *args):
    """convert 'args' to FORA ImplValContainers, evaluates, and returns a python value.
    
    Assumes you don't use cache or vector loads.  If you return an exception, this function
    asserts false. Otherwise, it returns the ImplValContainer result.
    """
    e = ExecutionContext(
        dataManager = VectorDataManager.constructVDM(callbackScheduler)
        )
    e.evaluate(*[FORANative.ImplValContainer(x) for x in args])
    tr = e.getFinishedResult()
    if tr.isFailure():
        assert False
    if tr.isException():
        assert False
    try:
        return tr.asResult.result.pyval
    except:
        return tr.asResult.result

Example #13

0

Show file

File: CreateTupleExpression_test.py Project: vishnur/ufora

    def runSimpleEvaluation(self, callable, arguments, validator):
        mainRuntime = Runtime.getMainRuntime()
        foraCompiler = mainRuntime.getTypedForaCompiler()
        jumpTarget = foraCompiler.compile(callable)

        while foraCompiler.anyCompilingOrPending():
            time.sleep(.01)

        gc.collect()

        for passIndex in range(PASS_COUNT):
            totalStringCount = ForaNative.totalStringCount()
            totalImplvalCount = ForaNative.totalImplvalCount()

            anExecutionContext = ExecutionContext.ExecutionContext(
                dataManager = VectorDataManager.constructVDM(self.callbackScheduler)
                )

            anExecutionContext.evaluateFunctionPointer(jumpTarget, *arguments)

            self.assertTrue(anExecutionContext.isFinished())

            res = anExecutionContext.getFinishedResult()

            self.assertTrue(validator(res.asResult.result),
                "invalid result in " + str(callable) + " with " + str(arguments) +
                ". got " + str(res)
                )

            res = None
            anExecutionContext.teardown()

            curRefs = (ForaNative.totalStringCount(), ForaNative.totalImplvalCount())

            self.assertEqual(
                (totalStringCount, totalImplvalCount),
                curRefs,
                "refcounts weren't maintained in " + str(callable) + " with " + str(arguments) +
                ". %s != %s" % (
                    (totalStringCount, totalImplvalCount),
                    curRefs
                    )
                )

Example #14

0

Show file

    def __init__(self, ownAddress, channelListener, channelFactory,
                 eventHandler, callbackScheduler, diagnosticsDir, config,
                 viewFactory):
        Stoppable.Stoppable.__init__(self)

        #acquire a machineId randomly, using uuid
        self.machineId = CumulusNative.MachineId(
            Hash.Hash.sha1(str(uuid.uuid4())))

        self.ownAddress = ownAddress
        self.callbackScheduler = callbackScheduler
        self.viewFactory = viewFactory
        self.threadsStarted_ = False
        self.connectedMachines = set()
        self.connectingMachines = set(
        )  # machines we are in the process of connecting to
        self.droppedMachineIds = set()
        self.lock = threading.RLock()
        self.cumulusMaxRamCacheSizeOverride = config.cumulusMaxRamCacheMB * 1024 * 1024
        self.cumulusVectorRamCacheSizeOverride = config.cumulusVectorRamCacheMB * 1024 * 1024
        self.cumulusThreadCountOverride = config.cumulusServiceThreadCount
        self.cumulusTrackTcMalloc = config.cumulusTrackTcmalloc

        self.reconnectPersistentCacheIndexViewThreads = []

        if config.cumulusDiskCacheStorageSubdirectory is not None:
            self.cumulusDiskCacheWantsDeletionOnTeardown = True
            self.cumulusDiskCacheStorageDir = os.path.join(
                config.cumulusDiskCacheStorageDir,
                config.cumulusDiskCacheStorageSubdirectory)
        else:
            self.cumulusDiskCacheWantsDeletionOnTeardown = False
            self.cumulusDiskCacheStorageDir = config.cumulusDiskCacheStorageDir

        logging.info(
            "Creating a CumulusService with ram cache of %s / %s MB and %s threads",
            self.cumulusVectorRamCacheSizeOverride / 1024.0 / 1024.0,
            self.cumulusMaxRamCacheSizeOverride / 1024.0 / 1024.0,
            self.cumulusThreadCountOverride)

        self._stopEvent = threading.Event()

        self._channelListener = channelListener
        assert len(self._channelListener.ports) == 2
        self._channelFactory = channelFactory

        Runtime.initialize()
        ModuleImporter.initialize()

        self.cumulusActiveMachines = CumulusActiveMachines.CumulusActiveMachines(
            self.viewFactory)

        self.cumulusChannelFactoryThread = ManagedThread.ManagedThread(
            target=self._channelListener.start)

        self.vdm = VectorDataManager.constructVDM(
            callbackScheduler, self.cumulusVectorRamCacheSizeOverride,
            self.cumulusMaxRamCacheSizeOverride)

        if self.cumulusTrackTcMalloc:
            logging.info(
                "CumulusService enabling track-tc-malloc memory with a max cache of %s MB",
                self.cumulusMaxRamCacheSizeOverride / 1024 / 1024.0)
            self.vdm.getMemoryManager().enableCountTcMallocMemoryAsEcMemory()

        self.persistentCacheIndex = CumulusNative.PersistentCacheIndex(
            viewFactory.createView(retrySeconds=10.0, numRetries=10),
            callbackScheduler)

        self.vdm.setPersistentCacheIndex(self.persistentCacheIndex)

        self.deleteCumulusDiskCacheIfNecessary()

        self.offlineCache = CumulusNative.DiskOfflineCache(
            callbackScheduler, self.cumulusDiskCacheStorageDir,
            config.cumulusDiskCacheStorageMB * 1024 * 1024,
            config.cumulusDiskCacheStorageFileCount)

        checkpointInterval = config.cumulusCheckpointIntervalSeconds
        if checkpointInterval == 0:
            checkpointPolicy = CumulusNative.CumulusCheckpointPolicy.None ()
        else:
            checkpointPolicy = CumulusNative.CumulusCheckpointPolicy.Periodic(
                checkpointInterval, 1024 * 1024)

        self.cumulusWorker = self.constructCumlusWorker(
            callbackScheduler,
            CumulusNative.CumulusWorkerConfiguration(
                self.machineId,
                self.cumulusThreadCountOverride, checkpointPolicy,
                ExecutionContext.createContextConfiguration(), diagnosticsDir
                or ""), self.vdm, self.offlineCache, eventHandler)

        #externalDatasetChannel = self.cumulusWorker.getExternalDatasetRequestChannel(
        #callbackScheduler
        #)
        #self.datasetLoadService = PythonIoTaskService.PythonIoTaskService(
        #settings.s3InterfaceFactory,
        #settings.objectStore,
        #self.vdm,
        #externalDatasetChannel.makeQueuelike(callbackScheduler)
        #)

        self.cumulusWorker.startComputations()

Example #15

0

Show file

File: MessageProcessor.py Project: vishnur/ufora

    def __init__(self, callbackScheduler, sharedStateViewFactory, computedValueGatewayFactory):
        self.lock = threading.Lock()
        self.cacheLoadEvents = {}

        self.resultsById_ = {}
        self.eventsById_ = {}

        logging.info("created a component host")

        self.graph = ComputedGraph.ComputedGraph()

        logging.info("created a ComputedGraph")

        Runtime.initialize()
        logging.info("Runtime initialized")

        ModuleImporter.initialize()
        logging.info("Module importer initialized")

        Fora._builtin = ForaValue.FORAValue(ModuleImporter.builtinModuleImplVal())

        self.incomingObjectCache = IncomingObjectCache()
        self.outgoingObjectCache = OutgoingObjectCache()

        self.VDM = VectorDataManager.constructVDM(callbackScheduler)
        self.VDM.setDropUnreferencedPagesWhenFull(True)
        logging.info("created a VDM")

        logging.info("got shared state view factory: %s", sharedStateViewFactory)

        def initValueGateway():
            with self.graph:
                self.computedValueGateway = computedValueGatewayFactory()
                self.cumulusGatewayRemote = self.computedValueGateway.cumulusGateway

        def initSynchronizer():
            self.synchronizer = SharedStateSynchronizer.SharedStateSynchronizer()

            logging.info("created a SharedStateSynchronizer")

            self.synchronizer.attachView(sharedStateViewFactory.createView())

            logging.info("attached shared state view.")

        simultaneously(initSynchronizer, initValueGateway)

        self.synchronousSharedStateScope = SynchronousPropertyAccess.SynchronousPropertyAccess()

        self.outstandingMessagesById = {}
        self.expectedMessageId = 0

        self.messageTypeHandlers = {}

        self.messageTypeHandlers["Read"] = self.handleReadMessage
        self.messageTypeHandlers["Assign"] = self.handleAssignMessage
        self.messageTypeHandlers["Subscribe"] = self.handleSubscribeMessage
        self.messageTypeHandlers["Execute"] = self.handleExecuteMessage
        self.messageTypeHandlers["ServerFlushObjectIdsBelow"] = self.handleFlushObjectIds

        self.pendingObjectQueue = []

        self.subscriptions = Subscriptions.Subscriptions(self.graph, self.computedValueGateway, self.synchronizer)

Example #16

0

Show file

File: MessageProcessor.py Project: vishnur/ufora

    def __init__(self,
                 callbackScheduler,
                 sharedStateViewFactory,
                 computedValueGatewayFactory):
        self.lock = threading.Lock()
        self.cacheLoadEvents = {}

        self.resultsById_ = {}
        self.eventsById_ = {}

        logging.info("created a component host")

        self.graph = ComputedGraph.ComputedGraph()

        logging.info("created a ComputedGraph")

        Runtime.initialize()
        logging.info("Runtime initialized")

        ModuleImporter.initialize()
        logging.info("Module importer initialized")


        Fora._builtin = ForaValue.FORAValue(ModuleImporter.builtinModuleImplVal())

        self.incomingObjectCache = IncomingObjectCache()
        self.outgoingObjectCache = OutgoingObjectCache()

        self.VDM = VectorDataManager.constructVDM(callbackScheduler)
        self.VDM.setDropUnreferencedPagesWhenFull(True)
        logging.info("created a VDM")

        logging.info("got shared state view factory: %s", sharedStateViewFactory)

        def initValueGateway():
            with self.graph:
                self.computedValueGateway = computedValueGatewayFactory()
                self.cumulusGatewayRemote = self.computedValueGateway.cumulusGateway


        def initSynchronizer():
            self.synchronizer = SharedStateSynchronizer.SharedStateSynchronizer()

            logging.info("created a SharedStateSynchronizer")

            self.synchronizer.attachView(
                sharedStateViewFactory.createView()
                )

            logging.info("attached shared state view.")

        simultaneously(
            initSynchronizer,
            initValueGateway
            )

        self.synchronousSharedStateScope = SynchronousPropertyAccess.SynchronousPropertyAccess()

        self.outstandingMessagesById = {}
        self.expectedMessageId = 0

        self.messageTypeHandlers = {}

        self.messageTypeHandlers["Read"] = self.handleReadMessage
        self.messageTypeHandlers["Assign"] = self.handleAssignMessage
        self.messageTypeHandlers["Subscribe"] = self.handleSubscribeMessage
        self.messageTypeHandlers["Execute"] = self.handleExecuteMessage
        self.messageTypeHandlers["ServerFlushObjectIdsBelow"] = self.handleFlushObjectIds

        self.pendingObjectQueue = []

        self.subscriptions = Subscriptions.Subscriptions(
            self.graph,
            self.computedValueGateway,
            self.synchronizer
            )

Example #17

0

Show file

File: CumulusService.py Project: ufora/ufora

    def __init__(self,
                 ownAddress,
                 channelListener,
                 channelFactory,
                 eventHandler,
                 callbackScheduler,
                 diagnosticsDir,
                 config,
                 viewFactory,
                 s3InterfaceFactory=None,
                 objectStore=None):
        Stoppable.Stoppable.__init__(self)

        #acquire a machineId randomly, using uuid
        self.machineId = CumulusNative.MachineId(
            Hash.Hash.sha1(str(uuid.uuid4()))
            )

        self.ownAddress = ownAddress
        self.callbackScheduler = callbackScheduler
        self.viewFactory = viewFactory
        self.s3InterfaceFactory = s3InterfaceFactory
        self.objectStore = objectStore
        self.threadsStarted_ = False
        self.connectedMachines = set()
        self.connectingMachines = set()  # machines we are in the process of connecting to
        self.droppedMachineIds = set()
        self.lock = threading.RLock()
        self.cumulusMaxRamCacheSizeOverride = config.cumulusMaxRamCacheMB * 1024*1024
        self.cumulusVectorRamCacheSizeOverride = config.cumulusVectorRamCacheMB * 1024*1024
        self.cumulusThreadCountOverride = config.cumulusServiceThreadCount
        self.cumulusTrackTcmalloc = config.cumulusTrackTcmalloc
        self.eventHandler = eventHandler

        self.reconnectPersistentCacheIndexViewThreads = []

        if config.cumulusDiskCacheStorageSubdirectory is not None:
            self.cumulusDiskCacheWantsDeletionOnTeardown = True
            self.cumulusDiskCacheStorageDir = os.path.join(
                config.cumulusDiskCacheStorageDir,
                config.cumulusDiskCacheStorageSubdirectory
                )
        else:
            self.cumulusDiskCacheWantsDeletionOnTeardown = False
            self.cumulusDiskCacheStorageDir = config.cumulusDiskCacheStorageDir

        self._stopEvent = threading.Event()

        self._channelListener = channelListener
        assert len(self._channelListener.ports) == 2
        self._channelFactory = channelFactory

        Runtime.initialize()
        ModuleImporter.initialize()

        self.cumulusActiveMachines = CumulusActiveMachines.CumulusActiveMachines(
            self.viewFactory
            )

        self.cumulusChannelFactoryThread = ManagedThread.ManagedThread(
            target=self._channelListener.start
            )

        self.vdm = VectorDataManager.constructVDM(
            callbackScheduler,
            self.cumulusVectorRamCacheSizeOverride,
            self.cumulusMaxRamCacheSizeOverride
            )

        if self.cumulusTrackTcmalloc:
            self.vdm.getMemoryManager().enableCountTcMallocMemoryAsEcMemory()

        self.persistentCacheIndex = CumulusNative.PersistentCacheIndex(
            viewFactory.createView(retrySeconds=10.0, numRetries=10),
            callbackScheduler
            )

        self.vdm.setPersistentCacheIndex(self.persistentCacheIndex)

        self.deleteCumulusDiskCacheIfNecessary()

        self.offlineCache = CumulusNative.DiskOfflineCache(
            callbackScheduler,
            self.cumulusDiskCacheStorageDir,
            config.cumulusDiskCacheStorageMB * 1024 * 1024,
            config.cumulusDiskCacheStorageFileCount
            )

        #If the "s3InterfaceFactory" is not in-memory, we use real out of process python.
        #it would be better if this were more explicit
        outOfProcess = self.s3InterfaceFactory is not None and self.s3InterfaceFactory.isCompatibleWithOutOfProcessDownloadPool

        self.outOfProcessPythonTasks = OutOfProcessPythonTasks.OutOfProcessPythonTasks(outOfProcess=outOfProcess)

        self.vdm.initializeOutOfProcessPythonTasks(self.outOfProcessPythonTasks.nativeTasks)

        checkpointInterval = config.cumulusCheckpointIntervalSeconds
        if checkpointInterval == 0:
            checkpointPolicy = CumulusNative.CumulusCheckpointPolicy.None()
        else:
            checkpointPolicy = CumulusNative.CumulusCheckpointPolicy.Periodic(
                checkpointInterval,
                1024 * 1024
                )

        self.cumulusWorker = self.constructCumlusWorker(
            callbackScheduler,
            CumulusNative.CumulusWorkerConfiguration(
                self.machineId,
                self.cumulusThreadCountOverride,
                checkpointPolicy,
                ExecutionContext.createContextConfiguration(),
                diagnosticsDir or ""
                ),
            self.vdm,
            self.offlineCache,
            eventHandler
            )

        self.datasetLoadService = None
        if self.s3InterfaceFactory:
            externalDatasetChannel = self.cumulusWorker.getExternalDatasetRequestChannel(
                callbackScheduler
                )
            self.datasetLoadService = PythonIoTaskService.PythonIoTaskService(
                self.s3InterfaceFactory,
                self.objectStore,
                self.vdm,
                externalDatasetChannel.makeQueuelike(callbackScheduler)
                )

        self.cumulusWorker.startComputations()

        if self.datasetLoadService:
            self.datasetLoadService.startService()

Example #18

0

Show file

File: CumulusService.py Project: nkhuyu/ufora

    def __init__(self,
                 ownAddress,
                 channelListener,
                 channelFactory,
                 eventHandler,
                 callbackScheduler,
                 diagnosticsDir,
                 config,
                 viewFactory):
        Stoppable.Stoppable.__init__(self)

        #acquire a machineId randomly, using uuid
        self.machineId = CumulusNative.MachineId(
            Hash.Hash.sha1(str(uuid.uuid4()))
            )

        self.ownAddress = ownAddress
        self.callbackScheduler = callbackScheduler
        self.viewFactory = viewFactory
        self.threadsStarted_ = False
        self.connectedMachines = set()
        self.connectingMachines = set()  # machines we are in the process of connecting to
        self.droppedMachineIds = set()
        self.lock = threading.RLock()
        self.cumulusMaxRamCacheSizeOverride = config.cumulusMaxRamCacheMB * 1024*1024
        self.cumulusVectorRamCacheSizeOverride = config.cumulusVectorRamCacheMB * 1024*1024
        self.cumulusThreadCountOverride = config.cumulusServiceThreadCount
        self.cumulusTrackTcMalloc = config.cumulusTrackTcmalloc

        self.reconnectPersistentCacheIndexViewThreads = []

        if config.cumulusDiskCacheStorageSubdirectory is not None:
            self.cumulusDiskCacheWantsDeletionOnTeardown = True
            self.cumulusDiskCacheStorageDir = os.path.join(
                config.cumulusDiskCacheStorageDir,
                config.cumulusDiskCacheStorageSubdirectory
                )
        else:
            self.cumulusDiskCacheWantsDeletionOnTeardown = False
            self.cumulusDiskCacheStorageDir = config.cumulusDiskCacheStorageDir


        logging.info(
            "Creating a CumulusService with ram cache of %s / %s MB and %s threads",
            self.cumulusVectorRamCacheSizeOverride / 1024.0 / 1024.0,
            self.cumulusMaxRamCacheSizeOverride / 1024.0 / 1024.0,
            self.cumulusThreadCountOverride
            )

        self._stopEvent = threading.Event()

        self._channelListener = channelListener
        assert len(self._channelListener.ports) == 2
        self._channelFactory = channelFactory

        Runtime.initialize()
        ModuleImporter.initialize()

        self.cumulusActiveMachines = CumulusActiveMachines.CumulusActiveMachines(
            self.viewFactory
            )

        self.cumulusChannelFactoryThread = ManagedThread.ManagedThread(
            target=self._channelListener.start
            )

        self.vdm = VectorDataManager.constructVDM(
            callbackScheduler,
            self.cumulusVectorRamCacheSizeOverride,
            self.cumulusMaxRamCacheSizeOverride
            )

        if self.cumulusTrackTcMalloc:
            logging.info(
                "CumulusService enabling track-tc-malloc memory with a max cache of %s MB",
                self.cumulusMaxRamCacheSizeOverride / 1024 / 1024.0
                )
            self.vdm.getMemoryManager().enableCountTcMallocMemoryAsEcMemory()

        self.persistentCacheIndex = CumulusNative.PersistentCacheIndex(
            viewFactory.createView(retrySeconds=10.0, numRetries=10),
            callbackScheduler
            )

        self.vdm.setPersistentCacheIndex(self.persistentCacheIndex)

        self.deleteCumulusDiskCacheIfNecessary()

        self.offlineCache = CumulusNative.DiskOfflineCache(
            callbackScheduler,
            self.cumulusDiskCacheStorageDir,
            config.cumulusDiskCacheStorageMB * 1024 * 1024,
            config.cumulusDiskCacheStorageFileCount
            )

        checkpointInterval = config.cumulusCheckpointIntervalSeconds
        if checkpointInterval == 0:
            checkpointPolicy = CumulusNative.CumulusCheckpointPolicy.None()
        else:
            checkpointPolicy = CumulusNative.CumulusCheckpointPolicy.Periodic(
                checkpointInterval,
                1024 * 1024
                )

        self.cumulusWorker = self.constructCumlusWorker(
            callbackScheduler,
            CumulusNative.CumulusWorkerConfiguration(
                self.machineId,
                self.cumulusThreadCountOverride,
                checkpointPolicy,
                ExecutionContext.createContextConfiguration(),
                diagnosticsDir or ""
                ),
            self.vdm,
            self.offlineCache,
            eventHandler
            )

        #externalDatasetChannel = self.cumulusWorker.getExternalDatasetRequestChannel(
            #callbackScheduler
            #)
        #self.datasetLoadService = PythonIoTaskService.PythonIoTaskService(
            #settings.s3InterfaceFactory,
            #settings.objectStore,
            #self.vdm,
            #externalDatasetChannel.makeQueuelike(callbackScheduler)
            #)

        self.cumulusWorker.startComputations()