def main_driver(args): """ Driver method for the spider script. """ starttime = time.time() signal.alarm(TIMEOUT_MINS * 60 + 60) # Get all the schedd ads schedd_ads = [] if args.collectors_file: schedd_ads = get_schedds_from_file(args, collectors_file=args.collectors_file) del ( args.collectors_file ) # sending a file through postprocessing will cause problems. else: schedd_ads = get_schedds(args, collectors=args.collectors) logging.warning("&&& There are %d schedds to query.", len(schedd_ads)) pool = multiprocessing.Pool(processes=args.query_pool_size) metadata = collect_metadata() if not args.skip_history: htcondor_es.history.process_histories( schedd_ads=schedd_ads, starttime=starttime, pool=pool, args=args, metadata=metadata, ) # Now that we have the fresh history, process the queues themselves. if args.process_queue: htcondor_es.queues.process_queues( schedd_ads=schedd_ads, starttime=starttime, pool=pool, args=args, metadata=metadata, ) pool.close() pool.join() logging.warning( "@@@ Total processing time: %.2f mins", ((time.time() - starttime) / 60.0) ) return 0
def main(args): if not os.path.isfile(args.filename): print("...file doesn't exist, querying schedds") schedd_ads = get_schedds(args) for schedd_ad in schedd_ads[: args.n_schedds_to_query]: print("...processing %s" % schedd_ad["Name"]) get_ads_from_schedd(schedd_ad, args) try: os.makedirs(args.dump_target) except OSError: pass # dir exists process_pickle(args.filename, args)
def main_driver(args): os.environ["CMS_HTCONDOR_BROKER"] = "cms-test-mb.cern.ch" os.environ["CMS_HTCONDOR_PRODUCER"] = "condor-test" schedd_ads = [] start_time = time.time() if args.collectors_file: schedd_ads = get_schedds_from_file( args, collectors_file=args.collectors_file) del (args.collectors_file ) # sending a file through postprocessing will cause problems. else: schedd_ads = get_schedds(args, collectors=args.collectors) res = group( query_schedd.s( sched, dry_run=args.dry_run, start_time=start_time, keep_full_queue_data=args.keep_full_queue_data, bunch=args.amq_bunch_size, ) for sched in schedd_ads).apply_async() groups = res.get() print([g.collect() for g in groups])
def main_driver(args): """ Driver method for the spider script. """ starttime = time.time() # Get all the schedd ads schedd_ads = get_schedds(args) logging.warning("&&& There are %d schedds to query.", len(schedd_ads)) pool = multiprocessing.Pool(processes=args.query_pool_size) metadata = collect_metadata() if not args.skip_history: htcondor_es.history.process_histories(schedd_ads=schedd_ads, starttime=starttime, pool=pool, args=args, metadata=metadata) # Now that we have the fresh history, process the queues themselves. if args.process_queue: htcondor_es.queues.process_queues(schedd_ads=schedd_ads, starttime=starttime, pool=pool, args=args, metadata=metadata) pool.close() pool.join() logging.warning("@@@ Total processing time: %.2f mins", ((time.time() - starttime) / 60.)) return 0
def main_driver(args): """Gets condor schedds and calls celery `query_schedd` task for each schedd. Important: Tasks are called in order. - ``query_schedd`` task runs `send_data` method. - `send_data` method calls ``process_docs`` task. - ``process_docs`` task runs `convert_to_json`, `amq_post_ads` methods and calls ``post_ads_es`` task. Notes: Submission of tasks to queue entailed with `group`[1] primitive of celery. `group` accepts list of tasks and they are applied in parallel. Details of current task submission: - `query_schedd.si()` creates a signature of `query_schedd` task. This signature will be passed to workers. - `query_schedd` task signature is created for each types(queue, history) of each schedd. -- `htcondor_es.utils.get_schedds_from_file` describes the schedd format. - All task signatures of all types of all schedds are given to `group` primitive to run in parallel. - `query_schedd` task is the initial task, it calls `process_docs` and `post_ads_es` with indirect calls. -- Indirect calls means, for example, it runs `send_data` but `send_data` calls `process_docs` task. - `propagate=False` important. Because, if it is not given, default value is True which raise exception if any schedd query fails. This means that the rest of the schedds in queue will also be terminated. References: [1]: https://docs.celeryproject.org/en/stable/userguide/canvas.html#the-primitives Args: args (argparse.Namespace): Please see main method argument definitions """ start_time = time.time() """int: Used for `start_time` of `query_schedd` task. And used in calculation of task duration. Please see src.htcondor_es.celery.tasks.query_schedd method for usage of `start_time`. """ if args.collectors_file: schedd_ads = get_schedds_from_file(args, collectors_file=args.collectors_file) """list: Htcondor schedds to query. `collectors_file`, which is defined by `COLLECTORS_FILE_LOCATION` in k8s, used to get schedds information. Please see what `htcondor_es.utils.get_schedds_from_file` method returns. """ del args.collectors_file # sending a file through postprocessing will cause problems. else: schedd_ads = get_schedds(args, collectors=args.collectors) """list: Htcondor schedds to query. Not used in current k8s deployment because collectors_file is given.""" #: list: Includes query types: history, queue. _types = [] if not args.skip_history: _types.append(__TYPE_HISTORY) if not args.skip_queues: _types.append(__TYPE_QUEUE) #: celery.result.AsyncResult: Async method to update affiliations. `get()` waits for the results. aff_res = create_affiliation_dir.si().apply_async() aff_res.get() #: celery.result.GroupResult: Async method for group call for query_schedd task res = group( query_schedd.si( sched, dry_run=args.dry_run, start_time=start_time, keep_full_queue_data=args.keep_full_queue_data, chunk_size=args.query_queue_batch_size, bunch=args.amq_bunch_size, query_type=_type, es_index_template=args.es_index_template, feed_es=args.feed_es and _type is __TYPE_HISTORY, ) for _type in _types for sched in schedd_ads ).apply_async() # - Use the get to wait for results. We could also chain it to a chord to process the responses # for logging purposes. # # The propagate false will prevent it to raise an exception if any of the schedds query failed. #: list(tuple): results of `query_schedd` method, i.e [('vocmsXXXX.xxx.xx', 6), ...] _query_res = res.get(propagate=False) print("Get schedds query result:", _query_res) if res.failed(): print("At least one of the schedd queries failed") duration = time.time() - start_time print("Duration of whole process: {} seconds".format(round(duration, 2))) if duration > 60*10: # if duration is greater than 10 minutes print("Duration exceeded 10 minutes!") if duration > 60*12: # if duration is greater than 12 minutes print("ATTENTION: Duration exceeded 12 minutes!")