Example #1
0
def run_jobs(ctx, all, force, fileset, job):
    """Run jobs for read filesets - slower
    """
    logger = logging.getLogger('bagbunker.run-jobs')
    logger.setLevel(logging.INFO)
    app = ctx.obj
    db.create_all()
    storage = STORAGE
    jobconfig = read_config(os.path.join(ctx.obj.instance_path, 'job.cfg'))

    if not ctx.invoked_subcommand and not all and not job:
        click.echo(ctx.command.get_help(ctx))
        ctx.exit()

    if ctx.invoked_subcommand:
        joblist = [ctx.invoked_subcommand]
    elif job:
        joblist = job   # multiple
    else:
        joblist = filter(None, jobconfig.get('job', {}).get('list', '').split(' ')) \
            or JOB.keys()
    cmdlist = [(JOB[name].inputs[0].topics, ctx.command.get_command(ctx, name))
               for name in joblist]

    if not cmdlist:
        print "No jobs to run"
        ctx.exit()

    MATRIX = OrderedDict()
    filesets = storage.active_intact_filesets\
                      .filter(Fileset.read_succeeded.is_(True))
    if fileset:
        fileset_filter = reduce(lambda x, y: x | y, (
            ((Fileset.name == x) | (Fileset.md5.like('{}%'.format(x))))
            for x in fileset   # multiple
        ))
        filesets = filesets.filter(fileset_filter)
    for fileset in filesets:
        bag = fileset.bag
        if bag is None:
            fileset_topics = set()
        else:
            fileset_topics = set([x.topic.name for x in bag.topics])
        topics = set()
        cmds = []
        jobruns = Jobrun.query.filter(Jobrun.fileset == fileset)
        # jobruns may be aborted - in this case they neither succeeded not failed
        # We could create them as failed
        # We could not add them to the DB until they are done
        latest = dict(jobruns.with_entities(Jobrun.name, db.func.max(Jobrun.version))
                      .filter(Jobrun.succeeded.is_(True) | Jobrun.failed.is_(True))
                      .group_by(Jobrun.name))
        for cmdtopics, cmd in cmdlist:
            if not force and cmd.name in latest and \
               cmd.callback.version <= latest[cmd.name]:
                continue

            # XXX: hack for jobs that don't want messages
            if not cmdtopics:
                cmds.append(((), cmd))
                continue

            if '*' in cmdtopics:
                topics = topics.union(fileset_topics)
                cmds.append((fileset_topics, cmd))
                continue

            intersect = fileset_topics.intersection(cmdtopics)
            if intersect:
                topics = topics.union(intersect)
                cmds.append((cmdtopics, cmd))
        if cmds:
            MATRIX[fileset] = topics, cmds

    if not MATRIX:
        ctx.exit()

    # for each fileset, start all registered jobs in parallel - at
    # this point we know that topics a job wants do exist
    for fileset, (topics, cmds) in MATRIX.items():
        logger.info('Starting job run for fileset %s', fileset.name)
        async_jobs = [make_async_job(app=app, name=cmd.name,
                                     topics=cmdtopics,
                                     job=partial(ctx.invoke, cmd),
                                     group=cmd.callback.namespace,
                                     version=cmd.callback.version,
                                     fileset_id=fileset.id,
                                     config=jobconfig.get(cmd.name, {}))
                      for cmdtopics, cmd in cmds]
        logger.info('Created threads for: %s', [x.name for x in async_jobs])
        milkers = {}
        for async_job in async_jobs:
            name = async_job.thread.name
            thread = Thread(target=async_job_milker, name=name,
                            args=(app, async_job,))
            thread.daemon = True
            thread.start()
            milkers[name] = thread

        def messages():
            if not topics:
                return
            import rosbag
            for file in fileset.files:
                rbag = rosbag.Bag(file.path)
                for msg in rbag.read_messages(topics=topics):
                    yield msg

        for msg in messages():
            async_jobs = [x for x in async_jobs if milkers[x.thread.name].is_alive()]
            if not async_jobs:
                break
            for async_job in async_jobs:
                # XXX: replace with namedtuple
                topic, _, _ = msg
                if topic in async_job.topics:
                    async_job.msg_queue.put(msg)

        for async_job in async_jobs:
            async_job.msg_queue.put(Done)

        for milker in milkers.values():
            milker.join()

        trigger_update_listing_entries([fileset.id])

    # Never call subcommand directly
    ctx.exit()
Example #2
0
 def list_commands(self, ctx):
     return JOB.keys()