예제 #1
0
    def __init__(self, production, category=None):
        super(LALInference, self).__init__(production, category)

        self.logger = logger = logging.AsimovLogger(event=production.event)

        if not production.pipeline.lower() == "lalinference":
            raise PipelineException
예제 #2
0
def resultslinks(event, update, root):
    """
    Find all available results for a given event.
    """
    server, repository = connect_gitlab()
    events = gitlab.find_events(repository,
                                milestone=config.get("olivaw", "milestone"),
                                subset=[event],
                                update=update,
                                repo=False)
    for event in events:
        click.secho(f"{event.title}")
        logger = logging.AsimovLogger(event=event.event_object)
        for production in event.productions:
            try:
                for result, meta in production.results().items():
                    print(
                        f"{production.event.name}/{production.name}/{result}, {production.results(result)}"
                    )
                    pathlib.Path(
                        os.path.join(root, production.event.name,
                                     production.name)).mkdir(parents=True,
                                                             exist_ok=True)
                    os.symlink(
                        f"{production.results(result)}",
                        f"{root}/{production.event.name}/{production.name}/{result.split('/')[-1]}"
                    )
            except AttributeError:
                pass
예제 #3
0
    def __init__(self, production, category=None):
        super(Rift, self).__init__(production, category)
        self.logger = logger = logging.AsimovLogger(event=production.event)
        if not production.pipeline.lower() == "rift":
            raise PipelineException

        if "bootstrap" in self.production.meta:
            self.bootstrap = self.production.meta['bootstrap']
        else:
            self.bootstrap = False
예제 #4
0
    def __init__(self, production, category=None):
        self.production = production

        if not category:
            if "Prod" in production.name:
                self.category = "C01_offline"
            else:
                self.category = "online"
        else:
            self.category = category
        self.logger = logger = logging.AsimovLogger(event=production.event)
예제 #5
0
    def __init__(self, production, category=None):
        super(BayesWave, self).__init__(production, category)
        self.logger = logger = logging.AsimovLogger(event=production.event)
        if not production.pipeline.lower() == "bayeswave":
            raise PipelineException

        try:
            self.category = config.get("general", "category")
        except:
            self.category = "C01_offline"
            self.logger.info("Assuming C01_offline calibration.")
예제 #6
0
def submit(event, update):
    """
    Submit the run configuration files for a given event for jobs which are ready to run.
    If no event is specified then all of the events will be processed.
    """
    server, repository = connect_gitlab()
    events = gitlab.find_events(repository,
                                milestone=config.get("olivaw", "milestone"),
                                subset=[event],
                                update=update)
    for event in events:
        logger = logging.AsimovLogger(event=event.event_object)
        ready_productions = event.event_object.get_all_latest()
        for production in ready_productions:
            if production.status.lower() in {
                    "running", "stuck", "wait", "processing", "uploaded",
                    "finished", "manual", "cancelled", "stopped"
            }:
                continue
            if production.status.lower() == "restart":
                if production.pipeline.lower() in known_pipelines:
                    pipe = known_pipelines[production.pipeline.lower()](
                        production, "C01_offline")
                    pipe.clean()
                    pipe.submit_dag()
            else:
                #try:
                #    configuration = production.get_configuration()
                #except ValueError as e:
                #    #build(event)
                #    logger.error(f"Error while trying to submit a configuration. {e}", production=production, channels="gitlab")
                if production.pipeline.lower() in known_pipelines:
                    pipe = known_pipelines[production.pipeline.lower()](
                        production, "C01_offline")
                    try:
                        pipe.build_dag()
                    except PipelineException:
                        logger.error(
                            "The pipeline failed to build a DAG file.",
                            production=production)
                    try:
                        pipe.submit_dag()
                        production.status = "running"

                    except PipelineException as e:
                        production.status = "stuck"
                        logger.error(
                            f"The pipeline failed to submit the DAG file to the cluster. {e}",
                            production=production)
예제 #7
0
def build(event):
    """
    Create the run configuration files for a given event for jobs which are ready to run.
    If no event is specified then all of the events will be processed.
    """
    server, repository = connect_gitlab()
    events = gitlab.find_events(repository,
                                milestone=config.get("olivaw", "milestone"),
                                subset=[event],
                                update=False)
    for event in events:
        click.echo(f"Working on {event.title}")
        logger = logging.AsimovLogger(event=event.event_object)
        ready_productions = event.event_object.get_all_latest()
        for production in ready_productions:
            click.echo(f"\tWorking on production {production.name}")
            if production.status in {
                    "running", "stuck", "wait", "finished", "uploaded",
                    "cancelled", "stopped"
            }:
                continue
            try:
                configuration = production.get_configuration()
            except ValueError:
                try:
                    rundir = config.get("general", "rundir_default")

                    production.make_config(f"{production.name}.ini")
                    click.echo(f"Production config {production.name} created.")
                    logger.info("Run configuration created.",
                                production=production)

                    try:
                        event.event_object.repository.add_file(
                            f"{production.name}.ini",
                            os.path.join(f"{production.category}",
                                         f"{production.name}.ini"))
                        logger.info(
                            "Configuration committed to event repository.",
                            production=production)
                    except Exception as e:
                        logger.error(
                            f"Configuration could not be committed to repository.\n{e}",
                            production=production)

                except DescriptionException as e:
                    logger.error("Run configuration failed",
                                 production=production,
                                 channels=["file", "mattermost"])
예제 #8
0
def results(event, update):
    """
    Find all available results for a given event.
    """
    server, repository = connect_gitlab()
    events = gitlab.find_events(repository,
                                milestone=config.get("olivaw", "milestone"),
                                subset=[event],
                                update=update,
                                repo=False)
    for event in events:
        click.secho(f"{event.title}")
        logger = logging.AsimovLogger(event=event.event_object)
        for production in event.productions:
            try:
                for result, meta in production.results().items():
                    print(
                        f"{production.event.name}/{production.name}/{result}, {production.results(result)}"
                    )
            except:
                pass
예제 #9
0
 def __init__(self, production, category=None):
     super(Bilby, self).__init__(production, category)
     self.logger = logger = logging.AsimovLogger(event=production.event)
     if not production.pipeline.lower() == "bilby":
         raise PipelineException
예제 #10
0
def monitor(event, update, dry_run):
    """
    Monitor condor jobs' status, and collect logging information.
    """
    server, repository = connect_gitlab()
    events = gitlab.find_events(repository,
                                milestone=config.get("olivaw", "milestone"),
                                subset=[event],
                                update=update,
                                repo=True)

    for event in events:
        stuck = 0
        running = 0
        ready = 0
        finish = 0
        click.secho(f"{event.title}", bold=True)
        on_deck = [
            production for production in event.productions
            if production.status.lower() in ACTIVE_STATES
        ]
        for production in on_deck:

            click.secho(f"\t{production.name}", bold=True)

            if not dry_run:
                logger = logging.AsimovLogger(event=event.event_object)
            else:
                logger = None

            # Deal with jobs which need to be stopped first
            if production.status.lower() == "stop":
                pipe = known_pipelines[production.pipeline.lower()](
                    production, "C01_offline")
                if not dry_run:
                    pipe.eject_job()
                    production.status = "stopped"
                    click.echo(f"\t\t{production.name} stopped")
                else:
                    click.echo("\t\t{production.name} --> stopped")
                continue

            # Get the condor jobs
            try:
                if "job id" in production.meta:
                    if not dry_run:
                        job = condor.CondorJob(production.meta['job id'])
                    else:
                        click.echo(f"\t\tRunning under condor")
                else:
                    raise ValueError  # Pass to the exception handler

                if not dry_run:
                    if job.status.lower() == "running":
                        pass

                    if job.status.lower() == "processing":
                        pass

                    if event.state == "running" and job.status.lower(
                    ) == "stuck":
                        click.echo("\t\tJob is stuck on condor")
                        event.state = "stuck"
                        production.status = "stuck"
                        stuck += 1
                        production.meta['stage'] = 'production'
                    elif event.state == "processing" and job.status.lower(
                    ) == "stuck":
                        click.echo("\t\tPost-processing is stuck on condor")
                        production.status = "stuck"
                        stuck += 1
                        production.meta['stage'] = "post"
                    else:
                        running += 1

            except ValueError as e:
                click.echo(e)
                click.echo(
                    f"\t\t{production.name}\t{production.status.lower()}")
                if production.pipeline.lower() in known_pipelines:
                    click.echo("Investigating...")
                    pipe = known_pipelines[production.pipeline.lower()](
                        production, "C01_offline")

                    if production.status.lower() == "stop":
                        pipe.eject_job()
                        production.status = "stopped"

                    elif production.status.lower() == "finished":
                        click.echo("Finished")
                        pipe.after_completion()

                    elif production.status.lower() == "processing":
                        # Need to check the upload has completed
                        try:
                            pipe.after_processing()
                        except ValueError as e:
                            click.echo(e)
                            #production.status = "stuck"
                            #stuck += 1
                            production.meta['stage'] = "after processing"

                    elif pipe.detect_completion() and production.status.lower(
                    ) == "running":
                        # The job has been completed, collect its assets
                        production.meta['job id'] = None
                        finish += 1
                        production.status = "finished"
                        pipe.after_completion()

                    else:
                        # It looks like the job has been evicted from the cluster
                        click.echo(f"Attempting to rescue {production.name}")
                        #event.state = "stuck"
                        #production.status = "stuck"
                        #production.meta['stage'] = 'production'
                        try:
                            pipe.resurrect()
                        except:
                            production.status = "stuck"
                            production.meta['error'] = "resurrection error"

                if production.status == "stuck":
                    event.state = "stuck"
                production.event.issue_object.update_data()

            if (running > 0) and (stuck == 0):
                event.state = "running"
            elif (stuck == 0) and (running == 0) and (finish > 0):
                event.state = "finished"