def __init__(self, production, category=None): super(LALInference, self).__init__(production, category) self.logger = logger = logging.AsimovLogger(event=production.event) if not production.pipeline.lower() == "lalinference": raise PipelineException
def resultslinks(event, update, root): """ Find all available results for a given event. """ server, repository = connect_gitlab() events = gitlab.find_events(repository, milestone=config.get("olivaw", "milestone"), subset=[event], update=update, repo=False) for event in events: click.secho(f"{event.title}") logger = logging.AsimovLogger(event=event.event_object) for production in event.productions: try: for result, meta in production.results().items(): print( f"{production.event.name}/{production.name}/{result}, {production.results(result)}" ) pathlib.Path( os.path.join(root, production.event.name, production.name)).mkdir(parents=True, exist_ok=True) os.symlink( f"{production.results(result)}", f"{root}/{production.event.name}/{production.name}/{result.split('/')[-1]}" ) except AttributeError: pass
def __init__(self, production, category=None): super(Rift, self).__init__(production, category) self.logger = logger = logging.AsimovLogger(event=production.event) if not production.pipeline.lower() == "rift": raise PipelineException if "bootstrap" in self.production.meta: self.bootstrap = self.production.meta['bootstrap'] else: self.bootstrap = False
def __init__(self, production, category=None): self.production = production if not category: if "Prod" in production.name: self.category = "C01_offline" else: self.category = "online" else: self.category = category self.logger = logger = logging.AsimovLogger(event=production.event)
def __init__(self, production, category=None): super(BayesWave, self).__init__(production, category) self.logger = logger = logging.AsimovLogger(event=production.event) if not production.pipeline.lower() == "bayeswave": raise PipelineException try: self.category = config.get("general", "category") except: self.category = "C01_offline" self.logger.info("Assuming C01_offline calibration.")
def submit(event, update): """ Submit the run configuration files for a given event for jobs which are ready to run. If no event is specified then all of the events will be processed. """ server, repository = connect_gitlab() events = gitlab.find_events(repository, milestone=config.get("olivaw", "milestone"), subset=[event], update=update) for event in events: logger = logging.AsimovLogger(event=event.event_object) ready_productions = event.event_object.get_all_latest() for production in ready_productions: if production.status.lower() in { "running", "stuck", "wait", "processing", "uploaded", "finished", "manual", "cancelled", "stopped" }: continue if production.status.lower() == "restart": if production.pipeline.lower() in known_pipelines: pipe = known_pipelines[production.pipeline.lower()]( production, "C01_offline") pipe.clean() pipe.submit_dag() else: #try: # configuration = production.get_configuration() #except ValueError as e: # #build(event) # logger.error(f"Error while trying to submit a configuration. {e}", production=production, channels="gitlab") if production.pipeline.lower() in known_pipelines: pipe = known_pipelines[production.pipeline.lower()]( production, "C01_offline") try: pipe.build_dag() except PipelineException: logger.error( "The pipeline failed to build a DAG file.", production=production) try: pipe.submit_dag() production.status = "running" except PipelineException as e: production.status = "stuck" logger.error( f"The pipeline failed to submit the DAG file to the cluster. {e}", production=production)
def build(event): """ Create the run configuration files for a given event for jobs which are ready to run. If no event is specified then all of the events will be processed. """ server, repository = connect_gitlab() events = gitlab.find_events(repository, milestone=config.get("olivaw", "milestone"), subset=[event], update=False) for event in events: click.echo(f"Working on {event.title}") logger = logging.AsimovLogger(event=event.event_object) ready_productions = event.event_object.get_all_latest() for production in ready_productions: click.echo(f"\tWorking on production {production.name}") if production.status in { "running", "stuck", "wait", "finished", "uploaded", "cancelled", "stopped" }: continue try: configuration = production.get_configuration() except ValueError: try: rundir = config.get("general", "rundir_default") production.make_config(f"{production.name}.ini") click.echo(f"Production config {production.name} created.") logger.info("Run configuration created.", production=production) try: event.event_object.repository.add_file( f"{production.name}.ini", os.path.join(f"{production.category}", f"{production.name}.ini")) logger.info( "Configuration committed to event repository.", production=production) except Exception as e: logger.error( f"Configuration could not be committed to repository.\n{e}", production=production) except DescriptionException as e: logger.error("Run configuration failed", production=production, channels=["file", "mattermost"])
def results(event, update): """ Find all available results for a given event. """ server, repository = connect_gitlab() events = gitlab.find_events(repository, milestone=config.get("olivaw", "milestone"), subset=[event], update=update, repo=False) for event in events: click.secho(f"{event.title}") logger = logging.AsimovLogger(event=event.event_object) for production in event.productions: try: for result, meta in production.results().items(): print( f"{production.event.name}/{production.name}/{result}, {production.results(result)}" ) except: pass
def __init__(self, production, category=None): super(Bilby, self).__init__(production, category) self.logger = logger = logging.AsimovLogger(event=production.event) if not production.pipeline.lower() == "bilby": raise PipelineException
def monitor(event, update, dry_run): """ Monitor condor jobs' status, and collect logging information. """ server, repository = connect_gitlab() events = gitlab.find_events(repository, milestone=config.get("olivaw", "milestone"), subset=[event], update=update, repo=True) for event in events: stuck = 0 running = 0 ready = 0 finish = 0 click.secho(f"{event.title}", bold=True) on_deck = [ production for production in event.productions if production.status.lower() in ACTIVE_STATES ] for production in on_deck: click.secho(f"\t{production.name}", bold=True) if not dry_run: logger = logging.AsimovLogger(event=event.event_object) else: logger = None # Deal with jobs which need to be stopped first if production.status.lower() == "stop": pipe = known_pipelines[production.pipeline.lower()]( production, "C01_offline") if not dry_run: pipe.eject_job() production.status = "stopped" click.echo(f"\t\t{production.name} stopped") else: click.echo("\t\t{production.name} --> stopped") continue # Get the condor jobs try: if "job id" in production.meta: if not dry_run: job = condor.CondorJob(production.meta['job id']) else: click.echo(f"\t\tRunning under condor") else: raise ValueError # Pass to the exception handler if not dry_run: if job.status.lower() == "running": pass if job.status.lower() == "processing": pass if event.state == "running" and job.status.lower( ) == "stuck": click.echo("\t\tJob is stuck on condor") event.state = "stuck" production.status = "stuck" stuck += 1 production.meta['stage'] = 'production' elif event.state == "processing" and job.status.lower( ) == "stuck": click.echo("\t\tPost-processing is stuck on condor") production.status = "stuck" stuck += 1 production.meta['stage'] = "post" else: running += 1 except ValueError as e: click.echo(e) click.echo( f"\t\t{production.name}\t{production.status.lower()}") if production.pipeline.lower() in known_pipelines: click.echo("Investigating...") pipe = known_pipelines[production.pipeline.lower()]( production, "C01_offline") if production.status.lower() == "stop": pipe.eject_job() production.status = "stopped" elif production.status.lower() == "finished": click.echo("Finished") pipe.after_completion() elif production.status.lower() == "processing": # Need to check the upload has completed try: pipe.after_processing() except ValueError as e: click.echo(e) #production.status = "stuck" #stuck += 1 production.meta['stage'] = "after processing" elif pipe.detect_completion() and production.status.lower( ) == "running": # The job has been completed, collect its assets production.meta['job id'] = None finish += 1 production.status = "finished" pipe.after_completion() else: # It looks like the job has been evicted from the cluster click.echo(f"Attempting to rescue {production.name}") #event.state = "stuck" #production.status = "stuck" #production.meta['stage'] = 'production' try: pipe.resurrect() except: production.status = "stuck" production.meta['error'] = "resurrection error" if production.status == "stuck": event.state = "stuck" production.event.issue_object.update_data() if (running > 0) and (stuck == 0): event.state = "running" elif (stuck == 0) and (running == 0) and (finish > 0): event.state = "finished"