Beispiel #1
0
    def _test_graph(self, pgCreator, socketListeners=1):
        if isinstance(pgCreator, basestring):
            pgCreator = "test.graphsRepository.%s" % (pgCreator)
        task = FinishGraphExecution(pgCreator=pgCreator)
        sch = scheduler.CentralPlannerScheduler()
        w = worker.Worker(scheduler=sch)
        w.add(task)

        # Start executing the SocketListenerApps so they open their ports
        def startSocketListeners(drop):
            if isinstance(drop, SocketListenerApp):
                threading.Thread(target=lambda drop: drop.execute(), args=(drop,)).start()

        droputils.breadFirstTraverse(task.roots, startSocketListeners)

        # Write to the initial nodes of the graph to trigger the graph execution
        for i in xrange(socketListeners):
            threading.Thread(
                target=utils.writeToRemotePort, name="socketWriter", args=("localhost", 1111 + i, test_data, 2)
            ).start()

        # Run the graph! Luigi will either monitor or execute the DROPs
        w.run()
        w.stop()

        # ... but at the end all the nodes of the graph should be completed
        # and should exist
        droputils.breadFirstTraverse(
            task.roots,
            lambda drop: self.assertTrue(
                drop.isCompleted() and drop.exists(), "%s is not COMPLETED or doesn't exist" % (drop.uid)
            ),
        )
Beispiel #2
0
    def _test_graph(self, pgCreator, socketListeners=1):
        if isinstance(pgCreator, six.string_types):
            pgCreator = "test.graphsRepository.%s" % (pgCreator)
        task = FinishGraphExecution(pgCreator=pgCreator)
        sch = scheduler.CentralPlannerScheduler()
        w = worker.Worker(scheduler=sch)
        w.add(task)

        # Start executing the SocketListenerApps so they open their ports
        for drop, _ in droputils.breadFirstTraverse(task.roots):
            if isinstance(drop, SocketListenerApp):
                threading.Thread(target=lambda drop: drop.execute(),
                                 args=(drop, )).start()

        # Write to the initial nodes of the graph to trigger the graph execution
        for i in range(socketListeners):
            threading.Thread(target=utils.write_to,
                             name='socketWriter',
                             args=("localhost", 1111 + i, test_data,
                                   2)).start()

        # Run the graph! Luigi will either monitor or execute the DROPs
        w.run()
        w.stop()

        # ... but at the end all the nodes of the graph should be completed
        # and should exist
        for drop, _ in droputils.breadFirstTraverse(task.roots):
            self.assertTrue(
                drop.isCompleted() and drop.exists(),
                "%s is not COMPLETED or doesn't exist" % (drop.uid))
Beispiel #3
0
    def testBreadthFirstSearch(self):
        """
        Checks that our BFS method is correct
        """
        a, b, c, d, e, f, g, h, i, j = self._createGraph()
        nodesList = []
        droputils.breadFirstTraverse(a, lambda n: nodesList.append(n))

        self.assertListEqual(nodesList, [a, b, c, d, e, f, g, h, i, j])
        pass
Beispiel #4
0
    def test_BFSWithFiltering(self):
        """
        Checks that the BFS works if the given function does filtering on the
        downstream DROPs.
        """
        a, _, c, _, e, _, _, h, _, j = self._createGraph()

        visitedNodes = []
        def filtering(drop, downStreamDrops):
            downStreamDrops[:] = [x for x in downStreamDrops if x.uid not in ('b','f')]
            visitedNodes.append(drop)
        droputils.breadFirstTraverse(a, filtering)

        self.assertEquals(5, len(visitedNodes))
        self.assertListEqual(visitedNodes, [a,c,e,h,j])
Beispiel #5
0
    def deploySession(self, sessionId, completedDrops=[]):
        session = self._sessions[sessionId]
        session.deploy(completedDrops=completedDrops)
        roots = session.roots

        # We register the new DROPs with the DLM if there is one
        if self._dlm:
            if logger.isEnabledFor(logging.DEBUG):
                logger.debug("Registering new DROPs with the DataLifecycleManager")
            droputils.breadFirstTraverse(roots, lambda drop: self._dlm.addDrop(drop))

        # Finally, we also collect the Pyro URIs of our DROPs and return them
        uris = {}
        droputils.breadFirstTraverse(roots, lambda drop: uris.__setitem__(drop.uid, drop.uri))
        return uris
Beispiel #6
0
 def testBreadthFirstSearch(self):
     """
     Checks that our BFS method is correct
     """
     a, b, c, d, e, f, g, h, i, j = self._createGraph()
     nodesList = [drop for drop, _ in droputils.breadFirstTraverse(a)]
     self.assertListEqual([a, b, c, d, e, f, g, h, i, j], nodesList)
Beispiel #7
0
 def trigger_drops(self, uids):
     for drop,downStreamDrops in droputils.breadFirstTraverse(self._roots):
         downStreamDrops[:] = [dsDrop for dsDrop in downStreamDrops if isinstance(dsDrop, AbstractDROP)]
         if drop.uid in uids:
             if isinstance(drop, InputFiredAppDROP):
                 drop.async_execute()
             else:
                 drop.setCompleted()
Beispiel #8
0
    def getGraphStatus(self):
        if self.status not in (SessionStates.RUNNING, SessionStates.FINISHED):
            raise Exception("The session is currently not running, cannot get graph status")
        statusDict = collections.defaultdict(dict)

        # We shouldn't traverse the full graph because there might be nodes
        # attached to our DROPs that are actually part of other DM (and have been
        # wired together by the DIM after deploying each individual graph on
        # each of the DMs).
        # We recognize such nodes because they are actually not an instance of
        # AbstractDROP (they are Pyro4.Proxy instances).
        #
        # The same trick is used in luigi_int.RunDROPTask.requires
        def addToDict(drop, downStreamDrops):
            downStreamDrops[:] = [dsDrop for dsDrop in downStreamDrops if isinstance(dsDrop, AbstractDROP)]
            if isinstance(drop, AppDROP):
                statusDict[drop.oid]['execStatus'] = drop.execStatus
            statusDict[drop.oid]['status'] = drop.status

        droputils.breadFirstTraverse(self._roots, addToDict)
        return statusDict
Beispiel #9
0
    def test_BFSWithFiltering(self):
        """
        Checks that the BFS works if the given function does filtering on the
        downstream DROPs.
        """
        a, _, c, _, e, _, _, h, _, j = self._createGraph()

        visitedNodes = []
        for drop, downStreamDrops in droputils.breadFirstTraverse(a):
            downStreamDrops[:] = [
                x for x in downStreamDrops if x.uid not in ('b', 'f')
            ]
            visitedNodes.append(drop)

        self.assertEqual(5, len(visitedNodes))
        self.assertListEqual(visitedNodes, [a, c, e, h, j])
Beispiel #10
0
    def getGraphStatus(self):
        if self.status not in (SessionStates.RUNNING, SessionStates.FINISHED):
            raise InvalidSessionState("The session is currently not running, cannot get graph status")

        # We shouldn't traverse the full graph because there might be nodes
        # attached to our DROPs that are actually part of other DM (and have been
        # wired together by the DIM after deploying each individual graph on
        # each of the DMs).
        # We recognize such nodes because they are actually not an instance of
        # AbstractDROP (they are DropProxy instances).
        #
        # The same trick is used in luigi_int.RunDROPTask.requires
        statusDict = collections.defaultdict(dict)
        for drop, downStreamDrops in droputils.breadFirstTraverse(self._roots):
            downStreamDrops[:] = [dsDrop for dsDrop in downStreamDrops if isinstance(dsDrop, AbstractDROP)]
            if isinstance(drop, AppDROP):
                statusDict[drop.oid]['execStatus'] = drop.execStatus
            statusDict[drop.oid]['status'] = drop.status

        return statusDict
Beispiel #11
0
    def deploy(self, completedDrops=[], foreach=None):
        """
        Creates the DROPs represented by all the graph specs contained in
        this session, effectively deploying them.

        When this method has finished executing a Pyro Daemon will also be
        up and running, servicing requests to access to all the DROPs
        belonging to this session
        """

        status = self.status
        if status != SessionStates.BUILDING:
            raise InvalidSessionState("Can't deploy this session in its current status: %d" % (status))

        self.status = SessionStates.DEPLOYING

        # Create the real DROPs from the graph specs
        logger.info("Creating DROPs for session %s", self._sessionId)

        self._roots = graph_loader.createGraphFromDropSpecList(self._graph.values())
        logger.info("%d drops successfully created", len(self._graph))

        for drop,_ in droputils.breadFirstTraverse(self._roots):

            # Register them
            self._drops[drop.uid] = drop

            # Register them with the error handler
            if self._error_status_listener:
                drop.subscribe(self._error_status_listener, eventType='status')
        logger.info("Stored all drops, proceeding with further customization")

        # Start the luigi task that will make sure the graph is executed
        # If we're not using luigi we still
        if self._enable_luigi:
            logger.debug("Starting Luigi FinishGraphExecution task for session %s", self._sessionId)
            task = luigi_int.FinishGraphExecution(self._sessionId, self._roots)
            sch = scheduler.CentralPlannerScheduler()
            w = worker.Worker(scheduler=sch)
            w.add(task)
            workerT = threading.Thread(None, self._run, args=[w])
            workerT.daemon = True
            workerT.start()
        else:
            leaves = droputils.getLeafNodes(self._roots)
            logger.info("Adding completion listener to leaf drops")
            listener = LeavesCompletionListener(leaves, self)
            for leaf in leaves:
                if isinstance(leaf, AppDROP):
                    leaf.subscribe(listener, 'producerFinished')
                else:
                    leaf.subscribe(listener, 'dropCompleted')
            logger.info("Listener added to leaf drops")

        # We move to COMPLETED the DROPs that we were requested to
        # InputFiredAppDROP are here considered as having to be executed and
        # not directly moved to COMPLETED.
        #
        # This is done in a separate iteration at the very end because all drops
        # to make sure all event listeners are ready
        self.trigger_drops(completedDrops)

        # Foreach
        if foreach:
            logger.info("Invoking 'foreach' on each drop")
            for drop,_ in droputils.breadFirstTraverse(self._roots):
                foreach(drop)
            logger.info("'foreach' invoked for each drop")

        # Append proxies
        logger.info("Creating %d drop proxies", len(self._proxyinfo))
        for nm, host, port, local_uid, relname, remote_uid in self._proxyinfo:
            proxy = DropProxy(nm, host, port, self._sessionId, remote_uid)
            method = getattr(self._drops[local_uid], relname)
            method(proxy, False)

        self.status = SessionStates.RUNNING
        logger.info("Session %s is now RUNNING", self._sessionId)
Beispiel #12
0
    def deploy(self, completedDrops=[]):
        """
        Creates the DROPs represented by all the graph specs contained in
        this session, effectively deploying them.

        When this method has finished executing a Pyro Daemon will also be
        up and running, servicing requests to access to all the DROPs
        belonging to this session
        """

        status = self.status
        if status != SessionStates.BUILDING:
            raise Exception("Can't deploy this session in its current status: %d" % (status))

        self.status = SessionStates.DEPLOYING

        # Create the Pyro daemon that will serve the DROP proxies and start it
        if logger.isEnabledFor(logging.DEBUG):
            logger.debug("Starting Pyro4 Daemon for session %s" % (self._sessionId))
        self._daemon = Pyro4.Daemon(host=self._host)
        self._daemonT = threading.Thread(target = lambda: self._daemon.requestLoop(), name="Session %s Pyro Daemon" % (self._sessionId))
        self._daemonT.daemon = True
        self._daemonT.start()

        # Create the real DROPs from the graph specs
        if logger.isEnabledFor(logging.DEBUG):
            logger.debug("Creating DROPs for session %s" % (self._sessionId))

        self._roots = graph_loader.createGraphFromDropSpecList(self._graph.values())

        # Register them
        droputils.breadFirstTraverse(self._roots, self._registerDrop)

        # Register them with the error handler
        # TODO: We should probably merge all these breadFirstTraverse calls into
        # a single one to avoid so much iteration through the drops
        if self._error_status_listener:
            def register_error_status_listener(drop):
                drop.subscribe(self._error_status_listener, eventType='status')
            droputils.breadFirstTraverse(self._roots, register_error_status_listener)

        # We move to COMPLETED the DROPs that we were requested to
        # InputFiredAppDROP are here considered as having to be executed and
        # not directly moved to COMPLETED.
        # TODO: We should possibly unify this initial triggering into a more
        #       solid concept that encompasses these two and other types of DROPs
        def triggerDrop(drop):
            if drop.uid in completedDrops:
                if isinstance(drop, InputFiredAppDROP):
                    t = threading.Thread(target=lambda:drop.execute())
                    t.daemon = True
                    t.start()
                else:
                    drop.setCompleted()
        droputils.breadFirstTraverse(self._roots, triggerDrop)

        # Start the luigi task that will make sure the graph is executed
        # If we're not using luigi we still
        if self._enable_luigi:
            if logger.isEnabledFor(logging.DEBUG):
                logger.debug("Starting Luigi FinishGraphExecution task for session %s" % (self._sessionId))
            task = luigi_int.FinishGraphExecution(self._sessionId, self._roots)
            sch = scheduler.CentralPlannerScheduler()
            w = worker.Worker(scheduler=sch)
            w.add(task)
            workerT = threading.Thread(None, self._run, args=[w])
            workerT.daemon = True
            workerT.start()
        else:
            leaves = droputils.getLeafNodes(self._roots)
            logger.debug("Adding completion listener to leaf drops %r", leaves)
            listener = LeavesCompletionListener(leaves, self)
            for leaf in leaves:
                leaf.subscribe(listener, 'dropCompleted')
                leaf.subscribe(listener, 'producerFinished')

        self.status = SessionStates.RUNNING
        if logger.isEnabledFor(logging.INFO):
            logger.info("Session %s is now RUNNING" % (self._sessionId))