예제 #1
0
    def _test_graph(self, pgCreator, socketListeners=1):
        if isinstance(pgCreator, six.string_types):
            pgCreator = "test.graphsRepository.%s" % (pgCreator)
        task = FinishGraphExecution(pgCreator=pgCreator)
        sch = scheduler.CentralPlannerScheduler()
        w = worker.Worker(scheduler=sch)
        w.add(task)

        # Start executing the SocketListenerApps so they open their ports
        for drop, _ in droputils.breadFirstTraverse(task.roots):
            if isinstance(drop, SocketListenerApp):
                threading.Thread(target=lambda drop: drop.execute(),
                                 args=(drop, )).start()

        # Write to the initial nodes of the graph to trigger the graph execution
        for i in range(socketListeners):
            threading.Thread(target=utils.write_to,
                             name='socketWriter',
                             args=("localhost", 1111 + i, test_data,
                                   2)).start()

        # Run the graph! Luigi will either monitor or execute the DROPs
        w.run()
        w.stop()

        # ... but at the end all the nodes of the graph should be completed
        # and should exist
        for drop, _ in droputils.breadFirstTraverse(task.roots):
            self.assertTrue(
                drop.isCompleted() and drop.exists(),
                "%s is not COMPLETED or doesn't exist" % (drop.uid))
예제 #2
0
def _create_scheduler():
    config = interface.get_config()
    retry_delay = config.getfloat('scheduler', 'retry-delay', 900.0)
    remove_delay = config.getfloat('scheduler', 'remove-delay', 600.0)
    worker_disconnect_delay = config.getfloat('scheduler',
                                              'worker-disconnect-delay', 60.0)
    return scheduler.CentralPlannerScheduler(retry_delay, remove_delay,
                                             worker_disconnect_delay)
예제 #3
0
 def create_local_scheduler(self):
     return scheduler.CentralPlannerScheduler(prune_on_get_work=True)
예제 #4
0
파일: interface.py 프로젝트: trustyou/luigi
 def create_local_scheduler(self):
     return scheduler.CentralPlannerScheduler(prune_on_get_work=True,
                                              record_task_history=False)
예제 #5
0
 def create_local_scheduler(self):
     return scheduler.CentralPlannerScheduler()
예제 #6
0
    def deploy(self, completedDrops=[], foreach=None):
        """
        Creates the DROPs represented by all the graph specs contained in
        this session, effectively deploying them.

        When this method has finished executing a Pyro Daemon will also be
        up and running, servicing requests to access to all the DROPs
        belonging to this session
        """

        status = self.status
        if status != SessionStates.BUILDING:
            raise InvalidSessionState("Can't deploy this session in its current status: %d" % (status))

        self.status = SessionStates.DEPLOYING

        # Create the real DROPs from the graph specs
        logger.info("Creating DROPs for session %s", self._sessionId)

        self._roots = graph_loader.createGraphFromDropSpecList(self._graph.values())
        logger.info("%d drops successfully created", len(self._graph))

        for drop,_ in droputils.breadFirstTraverse(self._roots):

            # Register them
            self._drops[drop.uid] = drop

            # Register them with the error handler
            if self._error_status_listener:
                drop.subscribe(self._error_status_listener, eventType='status')
        logger.info("Stored all drops, proceeding with further customization")

        # Start the luigi task that will make sure the graph is executed
        # If we're not using luigi we still
        if self._enable_luigi:
            logger.debug("Starting Luigi FinishGraphExecution task for session %s", self._sessionId)
            task = luigi_int.FinishGraphExecution(self._sessionId, self._roots)
            sch = scheduler.CentralPlannerScheduler()
            w = worker.Worker(scheduler=sch)
            w.add(task)
            workerT = threading.Thread(None, self._run, args=[w])
            workerT.daemon = True
            workerT.start()
        else:
            leaves = droputils.getLeafNodes(self._roots)
            logger.info("Adding completion listener to leaf drops")
            listener = LeavesCompletionListener(leaves, self)
            for leaf in leaves:
                if isinstance(leaf, AppDROP):
                    leaf.subscribe(listener, 'producerFinished')
                else:
                    leaf.subscribe(listener, 'dropCompleted')
            logger.info("Listener added to leaf drops")

        # We move to COMPLETED the DROPs that we were requested to
        # InputFiredAppDROP are here considered as having to be executed and
        # not directly moved to COMPLETED.
        #
        # This is done in a separate iteration at the very end because all drops
        # to make sure all event listeners are ready
        self.trigger_drops(completedDrops)

        # Foreach
        if foreach:
            logger.info("Invoking 'foreach' on each drop")
            for drop,_ in droputils.breadFirstTraverse(self._roots):
                foreach(drop)
            logger.info("'foreach' invoked for each drop")

        # Append proxies
        logger.info("Creating %d drop proxies", len(self._proxyinfo))
        for nm, host, port, local_uid, relname, remote_uid in self._proxyinfo:
            proxy = DropProxy(nm, host, port, self._sessionId, remote_uid)
            method = getattr(self._drops[local_uid], relname)
            method(proxy, False)

        self.status = SessionStates.RUNNING
        logger.info("Session %s is now RUNNING", self._sessionId)
예제 #7
0
    def run(self):
        os.system('touch "%s"' % self.output().path)


class DepTask(DependencyTriggeredTask):
    def requires(self):
        return [SimpleTask()]

    def output(self):
        return luigi.LocalTarget('/tmp/b-created')

    def run(self):
        os.system('touch "%s"' % self.output().path)


if __name__ == '__main__':
    os.system('rm -f /tmp/a-created /tmp/b-created')
    interface.setup_interface_logging()
    sch = scheduler.CentralPlannerScheduler()
    w = worker.Worker(scheduler=sch)
    w.add(SimpleTask())
    w.run()
    w.add(DepTask())
    w.run()
    os.system('rm -f /tmp/a-created /tmp/b-created')
    w.add(DepTask())
    w.run()
    os.system('rm -f /tmp/a-created')
    w.add(DepTask())
    w.run()
예제 #8
0
def main():

    update_markers = mongo_get_update_markers()

    # Make sure the updates have all mongo classes
    bulk_tasks = [
        MongoCollectionIndexTask,
        MongoCollectionEventTask,
        MongoCatalogueTask,
        MongoTaxonomyTask,
        # MongoMultimediaTask,
        MongoSiteTask,
        UnpublishTask,
        MongoDeleteTask
    ]

    def _get_task_names(tasks):
        """
        We need to initiate and get the family name, not just the class name
        MongoDeleteTask => DeleteTask
        @param tasks:
        @return:
        """
        return [unicode(task(date=0).task_family) for task in tasks]

    full_export_date = int(config.get('keemu', 'full_export_date'))

    for date, update_marker in update_markers.iteritems():

        #  If this is the fll export date, MongoDeleteTask is not required
        if full_export_date and date == full_export_date:
            bulk_task_copy = list(bulk_tasks)
            bulk_task_copy.remove(MongoDeleteTask)
            bulk_task_names = _get_task_names(bulk_task_copy)
        else:
            bulk_task_names = _get_task_names(bulk_tasks)

        # Assert that for every date we have all the bulk tasks
        missing_tasks = list(set(bulk_task_names) - set(update_marker))
        assert missing_tasks == [], 'There are missing mongo tasks for date %s: %s' % (
            date, missing_tasks)

    # Get a list of all export files to process
    export_dates = [
        d for d in get_export_file_dates() if d not in update_markers.keys()
    ]

    # Run setup_interface_logging to ensure luigi commands
    setup_interface_logging()

    sch = scheduler.CentralPlannerScheduler()

    w = BulkWorker(scheduler=sch)

    for export_date in export_dates:

        log.info('Processing date %s', export_date)
        # We only need to call the mongo delete task, as all other tasks are a requirement
        # NB: This doesn't delete anything from CKAN - if that's needed change this to DeleteTask
        w.add(MongoDeleteTask(date=export_date, force=True))
        w.run()
        w.stop()