def test_monitor_testpollfrequency(mock_init, mock_poll, mock_wait): """ Test that the polling frequency is working. """ import time jobs = { "lbowconf": { "recoveryfile": "recovery-YYMMDD-HHMMSS", "hpc1-queue-slots": 1, "hpc1-queue-max": 2 }, "jobone": { "resource": "hpc1", "laststatus": "Running" } } mock_init.return_value = 0, 2 mock_poll.return_value = False mock_wait.return_value = False mock_poll.side_effect = [None, exceptions.PluginattributeError] start = time.time() with pytest.raises(exceptions.PluginattributeError): monitor(jobs) end = time.time() assert mock_poll.call_count == 2 assert int(end - start) > 1
def test_monitor_update(mock_init, mock_poll, mock_wait, mock_down, mock_save): """ Test that when all jobs complete the method exits. """ jobs = { "lbowconf": { "update": True, "recoveryfile": "recovery-YYMMDD-HHMMSS", "hpc1-queue-slots": 2, "hpc1-queue-max": 8 }, "jobone": { "resource": "hpc1", "laststatus": "Running" }, "jobtwo": { "resource": "hpc1", "laststatus": "Running" }, "jobthree": { "resource": "hpc1", "laststatus": "Queued" }, "jobfour": { "resource": "hpc1", "laststatus": "Queued" }, "jobfive": { "resource": "hpc1", "laststatus": "Queued" } } mock_init.return_value = 0, 1 mock_poll.return_value = True mock_poll.side_effect = jobstatus mock_wait.return_value = True with pytest.raises(exceptions.UpdateExit): monitor(jobs) assert jobs["lbowconf"]["update"] is False assert jobs["jobone"]["laststatus"] == "Finished" assert jobs["jobtwo"]["laststatus"] == "Finished" assert jobs["jobthree"]["laststatus"] == "Running" assert jobs["jobfour"]["laststatus"] == "Running" assert jobs["jobfive"]["laststatus"] == "Running" assert mock_poll.call_count == 1 assert mock_wait.call_count == 1 assert mock_down.call_count == 0 assert mock_save.call_count == 1
def test_monitor_complete2(mock_init, mock_poll, mock_wait, mock_down, mock_save): """ Test that when all jobs complete the method exits. """ jobs = { "lbowconf": { "recoveryfile": "recovery-YYMMDD-HHMMSS", "hpc1-queue-slots": 1, "hpc1-queue-max": 2 }, "jobone": { "resource": "hpc1", "laststatus": "Finished" }, "jobtwo": { "resource": "hpc1", "laststatus": "Complete" }, "jobthree": { "resource": "hpc1", "laststatus": "Submit Error" }, "jobfour": { "resource": "hpc1", "laststatus": "Queued" }, "jobfive": { "resource": "hpc1", "laststatus": "Running" } } mock_init.return_value = 0, 1 mock_poll.return_value = False mock_poll.side_effect = jobstatus mock_wait.return_value = False mock_down.return_value = None mock_save.return_value = None monitor(jobs) assert jobs["jobone"]["laststatus"] == "Complete" assert jobs["jobtwo"]["laststatus"] == "Complete" assert jobs["jobthree"]["laststatus"] == "Submit Error" assert jobs["jobtwo"]["laststatus"] == "Complete" assert jobs["jobtwo"]["laststatus"] == "Complete" assert mock_down.call_count == 3 assert mock_save.call_count == 1
def recovery(jobs, recoveryfile): """Recover a Longbow session. This method is for attempting to recover a failed Longbow session or to reconnect to an intentionally disconnected session. It will try to take the recovery file, written shortly after submission to recover the whole session. Once the data has been loaded from the recovery file and a new job data structure populated, this method will then re-enter the monitoring function to continue where it left off. Any jobs that finished in the meantime will be marked accordingly and then file staging will continue. Required inputs are: recoveryfile (string): A path to the recovery file. """ jobfile = os.path.join(os.path.expanduser('~/.longbow'), recoveryfile) LOG.info("Attempting to find the recovery file '{0}'".format(jobfile)) # Load the jobs recovery file. if os.path.isfile(jobfile): LOG.info("Recovery file found.") _, _, jobparams = configuration.loadconfigs(jobfile) # Copy to jobs so when exceptions are raised the structure is # available. for param in jobparams: jobs[param] = jobparams[param] else: raise exceptions.RequiredinputError( "Recovery file could not be found, make sure you haven't deleted " "the recovery file and that you are not providing the full path, " "just the file name is needed.") # Rejoin at the monitoring stage. This will assume that all jobs that # are no longer in the queue have completed. scheduling.monitor(jobs) # Cleanup the remote working directory. staging.cleanup(jobs)
def update(jobs, updatefile): """Trigger update of a disconnected Longbow session. This method will start the update process on an existing but disconnected Longbow session. All job statuses will be checked and updated in the recovery file and all output files will be synced before disconnecting.""" jobfile = os.path.join(os.path.expanduser('~/.longbow'), updatefile) LOG.info("Attempting to find the recovery file '{0}'".format(jobfile)) # Load the jobs recovery file. if os.path.isfile(jobfile): LOG.info("Recovery file found.") _, _, jobparams = configuration.loadconfigs(jobfile) # Copy to jobs so when exceptions are raised the structure is # available. for param in jobparams: jobs[param] = jobparams[param] else: raise exceptions.RequiredinputError( "Recovery file could not be found, make sure you haven't deleted " "the recovery file and that you are not providing the full path, " "just the file name is needed.") # Add the updater key jobs["lbowconf"]["update"] = True # Enter monitoring loop scheduling.monitor(jobs) # Cleanup the remote working directory. staging.cleanup(jobs)
def test_monitor_except(mock_init, mock_poll, mock_wait, mock_down, mock_save): """ Check that if an exception is thrown on the save recovery file, that it does not bring the whole application down. """ jobs = { "lbowconf": { "recoveryfile": "recovery-YYMMDD-HHMMSS", "hpc1-queue-slots": 1, "hpc1-queue-max": 2 }, "jobone": { "resource": "hpc1", "laststatus": "Finished" }, "jobtwo": { "resource": "hpc1", "laststatus": "Complete" }, "jobthree": { "resource": "hpc1", "laststatus": "Submit Error" } } mock_init.return_value = 0, 1 mock_poll.return_value = False mock_down.return_value = None mock_save.side_effect = IOError mock_wait.return_value = False monitor(jobs) assert jobs["jobone"]["laststatus"] == "Complete" assert mock_save.call_count == 1
def longbow(jobs, parameters): """Entry point at the top level of the Longbow library. Being the top level method that makes calls on the Longbow library. This is a good place to link against Longbow if a developer does not want to link against the executable, or if low level linking is not needed or is over-kill. Required inputs are: parameters (dictionary): A dictionary containing the parameters and overrides from the command-line. """ # A failure at this level will result in jobs being killed off before # escalating the exception to trigger graceful exit. # Load configurations and initialise Longbow data structures. jobparams = configuration.processconfigs(parameters) # Copy to jobs so when exceptions are raised the structure is available. for param in jobparams: jobs[param] = jobparams[param] # Test all connection/s specified in the job configurations shellwrappers.checkconnections(jobs) # Test the hosts listed in the jobs configuration file have their # scheduler environments listed, if not then test and save them. scheduling.checkenv(jobs, parameters["hosts"]) # Test that for the applications listed in the job configuration # file are available and that the executable is present. if parameters["nochecks"] is False: applications.checkapp(jobs) # Process the jobs command line arguments and find files for # staging. applications.processjobs(jobs) # Create jobfile and add it to the list of files that needs # uploading. scheduling.prepare(jobs) # Stage all of the job files along with the scheduling script. staging.stage_upstream(jobs) # Submit all jobs. scheduling.submit(jobs) # Process the disconnect function. if parameters["disconnect"] is True: raise exceptions.DisconnectException # Monitor all jobs. scheduling.monitor(jobs) # Clean up all jobs staging.cleanup(jobs)