Python monitor Examples, longbow.scheduling.monitor Python Examples

Example #1

0

Show file

File: test_monitor.py Project: naveen584/Longbow

def test_monitor_testpollfrequency(mock_init, mock_poll, mock_wait):
    """
    Test that the polling frequency is working.
    """

    import time

    jobs = {
        "lbowconf": {
            "recoveryfile": "recovery-YYMMDD-HHMMSS",
            "hpc1-queue-slots": 1,
            "hpc1-queue-max": 2
        },
        "jobone": {
            "resource": "hpc1",
            "laststatus": "Running"
        }
    }

    mock_init.return_value = 0, 2
    mock_poll.return_value = False
    mock_wait.return_value = False
    mock_poll.side_effect = [None, exceptions.PluginattributeError]

    start = time.time()

    with pytest.raises(exceptions.PluginattributeError):

        monitor(jobs)

    end = time.time()

    assert mock_poll.call_count == 2
    assert int(end - start) > 1

Example #2

0

Show file

File: test_monitor.py Project: naveen584/Longbow

def test_monitor_update(mock_init, mock_poll, mock_wait, mock_down, mock_save):
    """
    Test that when all jobs complete the method exits.
    """

    jobs = {
        "lbowconf": {
            "update": True,
            "recoveryfile": "recovery-YYMMDD-HHMMSS",
            "hpc1-queue-slots": 2,
            "hpc1-queue-max": 8
        },
        "jobone": {
            "resource": "hpc1",
            "laststatus": "Running"
        },
        "jobtwo": {
            "resource": "hpc1",
            "laststatus": "Running"
        },
        "jobthree": {
            "resource": "hpc1",
            "laststatus": "Queued"
        },
        "jobfour": {
            "resource": "hpc1",
            "laststatus": "Queued"
        },
        "jobfive": {
            "resource": "hpc1",
            "laststatus": "Queued"
        }
    }

    mock_init.return_value = 0, 1
    mock_poll.return_value = True
    mock_poll.side_effect = jobstatus
    mock_wait.return_value = True

    with pytest.raises(exceptions.UpdateExit):

        monitor(jobs)

    assert jobs["lbowconf"]["update"] is False
    assert jobs["jobone"]["laststatus"] == "Finished"
    assert jobs["jobtwo"]["laststatus"] == "Finished"
    assert jobs["jobthree"]["laststatus"] == "Running"
    assert jobs["jobfour"]["laststatus"] == "Running"
    assert jobs["jobfive"]["laststatus"] == "Running"
    assert mock_poll.call_count == 1
    assert mock_wait.call_count == 1
    assert mock_down.call_count == 0
    assert mock_save.call_count == 1

Example #3

0

Show file

File: test_monitor.py Project: naveen584/Longbow

def test_monitor_complete2(mock_init, mock_poll, mock_wait, mock_down,
                           mock_save):
    """
    Test that when all jobs complete the method exits.
    """

    jobs = {
        "lbowconf": {
            "recoveryfile": "recovery-YYMMDD-HHMMSS",
            "hpc1-queue-slots": 1,
            "hpc1-queue-max": 2
        },
        "jobone": {
            "resource": "hpc1",
            "laststatus": "Finished"
        },
        "jobtwo": {
            "resource": "hpc1",
            "laststatus": "Complete"
        },
        "jobthree": {
            "resource": "hpc1",
            "laststatus": "Submit Error"
        },
        "jobfour": {
            "resource": "hpc1",
            "laststatus": "Queued"
        },
        "jobfive": {
            "resource": "hpc1",
            "laststatus": "Running"
        }
    }

    mock_init.return_value = 0, 1
    mock_poll.return_value = False
    mock_poll.side_effect = jobstatus
    mock_wait.return_value = False
    mock_down.return_value = None
    mock_save.return_value = None

    monitor(jobs)

    assert jobs["jobone"]["laststatus"] == "Complete"
    assert jobs["jobtwo"]["laststatus"] == "Complete"
    assert jobs["jobthree"]["laststatus"] == "Submit Error"
    assert jobs["jobtwo"]["laststatus"] == "Complete"
    assert jobs["jobtwo"]["laststatus"] == "Complete"
    assert mock_down.call_count == 3
    assert mock_save.call_count == 1

Example #4

0

Show file

File: entrypoints.py Project: naveen584/Longbow

def recovery(jobs, recoveryfile):
    """Recover a Longbow session.

    This method is for attempting to recover a failed Longbow session or to
    reconnect to an intentionally disconnected session. It will try to take the
    recovery file, written shortly after submission to recover the whole
    session. Once the data has been loaded from the recovery file and a new job
    data structure populated, this method will then re-enter the monitoring
    function to continue where it left off. Any jobs that finished in the
    meantime will be marked accordingly and then file staging will continue.

    Required inputs are:
    recoveryfile (string): A path to the recovery file.

    """

    jobfile = os.path.join(os.path.expanduser('~/.longbow'), recoveryfile)

    LOG.info("Attempting to find the recovery file '{0}'".format(jobfile))

    # Load the jobs recovery file.
    if os.path.isfile(jobfile):

        LOG.info("Recovery file found.")

        _, _, jobparams = configuration.loadconfigs(jobfile)

        # Copy to jobs so when exceptions are raised the structure is
        # available.
        for param in jobparams:

            jobs[param] = jobparams[param]

    else:

        raise exceptions.RequiredinputError(
            "Recovery file could not be found, make sure you haven't deleted "
            "the recovery file and that you are not providing the full path, "
            "just the file name is needed.")

    # Rejoin at the monitoring stage. This will assume that all jobs that
    # are no longer in the queue have completed.
    scheduling.monitor(jobs)

    # Cleanup the remote working directory.
    staging.cleanup(jobs)

Example #5

0

Show file

File: entrypoints.py Project: naveen584/Longbow

def update(jobs, updatefile):
    """Trigger update of a disconnected Longbow session.

    This method will start the update process on an existing but disconnected
    Longbow session. All job statuses will be checked and updated in the
    recovery file and all output files will be synced before disconnecting."""

    jobfile = os.path.join(os.path.expanduser('~/.longbow'), updatefile)

    LOG.info("Attempting to find the recovery file '{0}'".format(jobfile))

    # Load the jobs recovery file.
    if os.path.isfile(jobfile):

        LOG.info("Recovery file found.")

        _, _, jobparams = configuration.loadconfigs(jobfile)

        # Copy to jobs so when exceptions are raised the structure is
        # available.
        for param in jobparams:

            jobs[param] = jobparams[param]

    else:

        raise exceptions.RequiredinputError(
            "Recovery file could not be found, make sure you haven't deleted "
            "the recovery file and that you are not providing the full path, "
            "just the file name is needed.")

    # Add the updater key
    jobs["lbowconf"]["update"] = True

    # Enter monitoring loop
    scheduling.monitor(jobs)

    # Cleanup the remote working directory.
    staging.cleanup(jobs)

Example #6

0

Show file

File: test_monitor.py Project: naveen584/Longbow

def test_monitor_except(mock_init, mock_poll, mock_wait, mock_down, mock_save):
    """
    Check that if an exception is thrown on the save recovery file, that
    it does not bring the whole application down.
    """

    jobs = {
        "lbowconf": {
            "recoveryfile": "recovery-YYMMDD-HHMMSS",
            "hpc1-queue-slots": 1,
            "hpc1-queue-max": 2
        },
        "jobone": {
            "resource": "hpc1",
            "laststatus": "Finished"
        },
        "jobtwo": {
            "resource": "hpc1",
            "laststatus": "Complete"
        },
        "jobthree": {
            "resource": "hpc1",
            "laststatus": "Submit Error"
        }
    }

    mock_init.return_value = 0, 1
    mock_poll.return_value = False
    mock_down.return_value = None
    mock_save.side_effect = IOError
    mock_wait.return_value = False

    monitor(jobs)

    assert jobs["jobone"]["laststatus"] == "Complete"
    assert mock_save.call_count == 1

Example #7

0

Show file

File: entrypoints.py Project: naveen584/Longbow

def longbow(jobs, parameters):
    """Entry point at the top level of the Longbow library.

    Being the top level method that makes calls on the Longbow library.
    This is a good place to link against Longbow if a developer does not want
    to link against the executable, or if low level linking is not needed or is
    over-kill.

    Required inputs are:
    parameters (dictionary): A dictionary containing the parameters and
                             overrides from the command-line.

    """
    # A failure at this level will result in jobs being killed off before
    # escalating the exception to trigger graceful exit.

    # Load configurations and initialise Longbow data structures.
    jobparams = configuration.processconfigs(parameters)

    # Copy to jobs so when exceptions are raised the structure is available.
    for param in jobparams:

        jobs[param] = jobparams[param]

    # Test all connection/s specified in the job configurations
    shellwrappers.checkconnections(jobs)

    # Test the hosts listed in the jobs configuration file have their
    # scheduler environments listed, if not then test and save them.
    scheduling.checkenv(jobs, parameters["hosts"])

    # Test that for the applications listed in the job configuration
    # file are available and that the executable is present.
    if parameters["nochecks"] is False:

        applications.checkapp(jobs)

    # Process the jobs command line arguments and find files for
    # staging.
    applications.processjobs(jobs)

    # Create jobfile and add it to the list of files that needs
    # uploading.
    scheduling.prepare(jobs)

    # Stage all of the job files along with the scheduling script.
    staging.stage_upstream(jobs)

    # Submit all jobs.
    scheduling.submit(jobs)

    # Process the disconnect function.
    if parameters["disconnect"] is True:

        raise exceptions.DisconnectException

    # Monitor all jobs.
    scheduling.monitor(jobs)

    # Clean up all jobs
    staging.cleanup(jobs)