def run_jobs(ctx, all, force, fileset, job): """Run jobs for read filesets - slower """ logger = logging.getLogger('bagbunker.run-jobs') logger.setLevel(logging.INFO) app = ctx.obj db.create_all() storage = STORAGE jobconfig = read_config(os.path.join(ctx.obj.instance_path, 'job.cfg')) if not ctx.invoked_subcommand and not all and not job: click.echo(ctx.command.get_help(ctx)) ctx.exit() if ctx.invoked_subcommand: joblist = [ctx.invoked_subcommand] elif job: joblist = job # multiple else: joblist = filter(None, jobconfig.get('job', {}).get('list', '').split(' ')) \ or JOB.keys() cmdlist = [(JOB[name].inputs[0].topics, ctx.command.get_command(ctx, name)) for name in joblist] if not cmdlist: print "No jobs to run" ctx.exit() MATRIX = OrderedDict() filesets = storage.active_intact_filesets\ .filter(Fileset.read_succeeded.is_(True)) if fileset: fileset_filter = reduce(lambda x, y: x | y, ( ((Fileset.name == x) | (Fileset.md5.like('{}%'.format(x)))) for x in fileset # multiple )) filesets = filesets.filter(fileset_filter) for fileset in filesets: bag = fileset.bag if bag is None: fileset_topics = set() else: fileset_topics = set([x.topic.name for x in bag.topics]) topics = set() cmds = [] jobruns = Jobrun.query.filter(Jobrun.fileset == fileset) # jobruns may be aborted - in this case they neither succeeded not failed # We could create them as failed # We could not add them to the DB until they are done latest = dict(jobruns.with_entities(Jobrun.name, db.func.max(Jobrun.version)) .filter(Jobrun.succeeded.is_(True) | Jobrun.failed.is_(True)) .group_by(Jobrun.name)) for cmdtopics, cmd in cmdlist: if not force and cmd.name in latest and \ cmd.callback.version <= latest[cmd.name]: continue # XXX: hack for jobs that don't want messages if not cmdtopics: cmds.append(((), cmd)) continue if '*' in cmdtopics: topics = topics.union(fileset_topics) cmds.append((fileset_topics, cmd)) continue intersect = fileset_topics.intersection(cmdtopics) if intersect: topics = topics.union(intersect) cmds.append((cmdtopics, cmd)) if cmds: MATRIX[fileset] = topics, cmds if not MATRIX: ctx.exit() # for each fileset, start all registered jobs in parallel - at # this point we know that topics a job wants do exist for fileset, (topics, cmds) in MATRIX.items(): logger.info('Starting job run for fileset %s', fileset.name) async_jobs = [make_async_job(app=app, name=cmd.name, topics=cmdtopics, job=partial(ctx.invoke, cmd), group=cmd.callback.namespace, version=cmd.callback.version, fileset_id=fileset.id, config=jobconfig.get(cmd.name, {})) for cmdtopics, cmd in cmds] logger.info('Created threads for: %s', [x.name for x in async_jobs]) milkers = {} for async_job in async_jobs: name = async_job.thread.name thread = Thread(target=async_job_milker, name=name, args=(app, async_job,)) thread.daemon = True thread.start() milkers[name] = thread def messages(): if not topics: return import rosbag for file in fileset.files: rbag = rosbag.Bag(file.path) for msg in rbag.read_messages(topics=topics): yield msg for msg in messages(): async_jobs = [x for x in async_jobs if milkers[x.thread.name].is_alive()] if not async_jobs: break for async_job in async_jobs: # XXX: replace with namedtuple topic, _, _ = msg if topic in async_job.topics: async_job.msg_queue.put(msg) for async_job in async_jobs: async_job.msg_queue.put(Done) for milker in milkers.values(): milker.join() trigger_update_listing_entries([fileset.id]) # Never call subcommand directly ctx.exit()
def list_commands(self, ctx): return JOB.keys()