def testCreateSubmission(self): """Test create_submission correctly loads dummy information to submission_queue.json""" service = 'service' submissions = dict() submissions[service] = dict() presubmit_ids = ['foo', 'bar'] for pre_id in presubmit_ids: submissions[service][pre_id] = pre_id pre_len = len(submissions[service]) orch = orchestrator.Orchestrator() util.save_json(self.queue_loc, submissions) new_id = orch.create_submission(service, 'data', 'wf_type', 'wf_name', 'sample') submissions = util.get_json(self.queue_loc) for pre_id in presubmit_ids: self.assertEqual(submissions[service][pre_id], pre_id) # Check that new entry was added appropriately. self.assertEqual(pre_len + 1, len(submissions[service])) self.assertFalse(new_id in presubmit_ids) self.assertTrue((new_id in submissions[service])) # Check that entry has correct data. self.assertEqual(submissions[service][new_id]['status'], 'RECEIVED') self.assertEqual(submissions[service][new_id]['data'], 'data') self.assertEqual(submissions[service][new_id]['wf_id'], 'wf_name') self.assertEqual(submissions[service][new_id]['type'], 'wf_type') self.assertEqual(submissions[service][new_id]['sample'], 'sample')
def set_queue_from_user_json(filepath): """ Intended to take a user json-config file and submit the contents as queued workflows. Example: {"local": {"NWD119836": {"wf_name": "wdl_UoM_align", "jsonyaml": "file:///home/quokka/git/current_demo/orchestrator/tests/data/NWD119836.json"}, "NWD136397": {"wf_name": "wdl_UoM_align", "jsonyaml": "file:///home/quokka/git/current_demo/orchestrator/tests/data/NWD136397.json"} }, "aws-toil-server": {"NWD119836": {"wf_name": "wdl_UoM_align", "jsonyaml": "file:///home/quokka/git/current_demo/orchestrator/tests/data/NWD119836.json"}, "NWD136397": {"wf_name": "wdl_UoM_align", "jsonyaml": "file:///home/quokka/git/current_demo/orchestrator/tests/data/NWD136397.json"}}} This config would submit two samples each (named NWD119836 & NWD136397) to the workflow services: local and aws-toil-server respectively, retrieving configuration details that had been set for those services in stored_templates.json. """ # TODO verify terms match between configs sdict = get_json(filepath) for wf_service in sdict: for sample in sdict[wf_service]: wf_name = sdict[wf_service][sample]['wf_name'] wf_jsonyaml = sdict[wf_service][sample]['jsonyaml'] print('Queueing "{}" on "{}" with data: {}'.format( wf_name, wf_service, sample)) queue(wf_service, wf_name, wf_jsonyaml, sample)
def update_submission_run(eval_id, submission_id, run_data): """ Update information for a workflow run. """ evals = get_json(EVALS_PATH) evals[eval_id][submission_id]['run'] = run_data save_json(EVALS_PATH, evals)
def update_submission_status(eval_id, submission_id, status): """ Update the status of a submission. """ evals = get_json(EVALS_PATH) evals[eval_id][submission_id]['status'] = status save_json(EVALS_PATH, evals)
def set_json(self, section, service, var2add): try: orchestrator_config = get_json(self.config_path) orchestrator_config.setdefault(section, {})[service] = var2add save_json(self.config_path, orchestrator_config) except AttributeError: raise AttributeError('The config file needs to be set: ' + self.config_path)
def get_submissions(wes_id, status='RECEIVED'): """Return all ids with the requested status.""" submissions = get_json(queue_path()) if wes_id not in submissions: return [] return [ id for id, bundle in submissions[wes_id].items() if bundle['status'] == status ]
def get_submissions(eval_id, status='RECEIVED'): """ Return all submissions to a queue matching the specified status. RECEIVED is hard-coded on all job creations atm. """ evals = get_json(EVALS_PATH) return [ id for id, bundle in evals[eval_id].items() if bundle['status'] in status ]
def create_submission(wes_id, submission_data, wf_type, wf_name, sample): submissions = get_json(queue_path()) submission_id = dt.datetime.now().strftime('%d%m%d%H%M%S%f') submissions.setdefault(wes_id, {})[submission_id] = { 'status': 'RECEIVED', 'data': submission_data, 'wf_id': wf_name, 'type': wf_type, 'sample': sample } save_json(queue_path(), submissions) logger.info(" Queueing Job for '{}' endpoint:" "\n - submission ID: {}".format(wes_id, submission_id)) return submission_id
def testConfigs(self): """ Make sure that the various config fetching functions reads the right data from the config file. This test checks that the following functions return as expected: config.wf_config() config.trs_config() config.wes_config() """ c = config.Config(self.config_loc) config_entries = {'workflows': c.wf_config, 'toolregistries': c.trs_config, 'workflowservices': c.wes_config} for entry, get_func in config_entries.items(): config_file = util.get_json(c.config_path) config_file[entry] = entry # X_config() returns whatever is stored here. util.save_json(c.config_path, config_file) self.assertEqual(get_func(), entry)
def create_submission(eval_id, submission_data, wes_id, type=None): """ Submit a new job request to an evaluation queue. """ evals = get_json(EVALS_PATH) submission_id = dt.datetime.now().strftime('%d%m%d%H%M%S%f') evals.setdefault(eval_id, {})[submission_id] = { 'status': 'RECEIVED', 'data': submission_data, 'wes_id': wes_id, 'type': type } save_json(EVALS_PATH, evals) logger.info("Created new job submission:\n - submission ID: {}".format( submission_id)) logger.debug("\n - evaluation queue: {} ({})" "\n - data:\n{}".format( eval_id, config.eval_config[eval_id]['workflow_id'], json.dumps(submission_data, indent=2))) return submission_id
def testAddWorkflow(self): """Test that add_workflow() adds entries to the config properly.""" c = config.Config(self.config_loc) # Write the empty file. c.add_workflow('cactus', 'Toil', 'wf_url', 'workflow_attachments', 'submission_type', 'trs_id', 'version_id') config_file = util.get_json(self.config_loc) self.assertTrue('workflows' in config_file) self.assertTrue('cactus' in config_file['workflows']) var_name = config_file['workflows']['cactus'] self.assertEqual(var_name['submission_type'], 'submission_type') self.assertEqual(var_name['trs_id'], 'trs_id') self.assertEqual(var_name['version_id'], 'version_id') self.assertEqual(var_name['workflow_url'], 'wf_url') self.assertEqual(var_name['workflow_attachments'], 'workflow_attachments') self.assertEqual(var_name['workflow_type'], 'Toil')
def show(self): """ Show current application configuration. """ orchestrator_config = get_json(self.config_path) wfs = '\n'.join('{}\t[{}]'.format( k, orchestrator_config['workflows'][k]['workflow_type']) for k in orchestrator_config['workflows']) trs = '\n'.join('{}:\t{}'.format( k, orchestrator_config['toolregistries'][k]['host']) for k in orchestrator_config['toolregistries']) wes = '\n'.join('{}:\t{}'.format( k, orchestrator_config['workflowservices'][k]['host']) for k in orchestrator_config['workflowservices']) display = heredoc( ''' Orchestrator Options: Parametrized Workflows (Workflow Name [Workflow Type]) --------------------------------------------------------------------------- {wfs} Tool Registries (TRS ID: Host Address) --------------------------------------------------------------------------- {trs} Workflow Services (WES ID: Host Address) --------------------------------------------------------------------------- {wes} ''', { 'wfs': wfs, 'trs': trs, 'wes': wes }) print(display)
def monitor(): """Monitor progress of workflow jobs.""" import pandas as pd pd.set_option('display.width', 100) while True: statuses = [] submissions = get_json(queue_path()) for wf_service in submissions: statuses.append(monitor_service(wf_service)) status_df = pd.DataFrame.from_dict( {(i, j): status[i][j] for status in statuses for i in status.keys() for j in status[i].keys()}, orient='index') clear_output(wait=True) os.system('clear') display(status_df) sys.stdout.flush() time.sleep(2)
def wes_config(self): return get_json(self.config_path)['workflowservices']
def trs_config(self): return get_json(self.config_path)['toolregistries']
def update_submission_run(wes_id, submission_id, param, status): """Update the status of a submission.""" submissions = get_json(queue_path()) submissions[wes_id][submission_id]['run'][param] = status save_json(queue_path(), submissions)
def get_submission_bundle(eval_id, submission_id): """ Submit a new job request to an evaluation queue. """ return get_json(EVALS_PATH)[eval_id][submission_id]
def monitor_service(wf_service): """ Returns a dictionary of all of the jobs under a single wes service appropriate for displaying as a pandas dataframe. :param wf_service: :return: """ status_dict = {} submissions = get_json(queue_path()) for run_id in submissions[wf_service]: sample_name = submissions[wf_service][run_id]['sample'] if 'run' not in submissions[wf_service][run_id]: status_dict.setdefault(wf_service, {})[run_id] = { 'wf_id': submissions[wf_service][run_id]['wf_id'], 'run_id': '-', 'sample_name': sample_name, 'run_status': 'QUEUED', 'start_time': '-', 'elapsed_time': '-' } else: if submissions[wf_service][run_id]['status'] in [ 'COMPLETE', 'SYSTEM_ERROR', 'EXECUTOR_ERROR' ]: run = submissions[wf_service][run_id]['run'] try: wf_id = run['workflow_id'] except KeyError: wf_id = run['run_id'] status_dict.setdefault(wf_service, {})[run_id] = { 'wf_id': submissions[wf_service][run_id]['wf_id'], 'run_id': wf_id, 'sample_name': sample_name, 'run_status': submissions[wf_service][run_id]['status'], 'start_time': run['start_time'], 'elapsed_time': run['elapsed_time'] } else: try: run = submissions[wf_service][run_id]['run'] if 'run_id' not in run and 'workflow_id' not in run: status_dict.setdefault(wf_service, {})[run_id] = { 'wf_id': submissions[wf_service][run_id]['wf_id'], 'run_id': '-', 'sample_name': sample_name, 'run_status': 'INITIALIZING', 'start_time': '-', 'elapsed_time': '-' } else: client = WESClient(wes_config()[wf_service]) try: wf_id = run['workflow_id'] except KeyError: wf_id = run['run_id'] if 'state' not in run: run['state'] = client.get_run_status( wf_id)['state'].upper() elif run['state'].upper() not in [ 'COMPLETED', 'OK', 'EXECUTOR_ERROR', 'SYSTEM_ERROR' ]: run['state'] = client.get_run_status( wf_id)['state'].upper() if run['state'] in [ 'QUEUED', 'INITIALIZING', 'RUNNING' ]: etime = convert_timedelta( dt.datetime.now() - ctime2datetime(run['start_time'])) elif 'elapsed_time' not in run: etime = '0h:0m:0s' else: update_submission(wf_service, run_id, 'status', run['state']) etime = run['elapsed_time'] update_submission_run(wf_service, run_id, 'elapsed_time', etime) status_dict.setdefault(wf_service, {})[run_id] = { 'wf_id': submissions[wf_service][run_id]['wf_id'], 'run_id': wf_id, 'sample_name': sample_name, 'run_status': run['state'], 'start_time': run['start_time'], 'elapsed_time': etime } except ConnectionError: status_dict.setdefault(wf_service, {})[run_id] = { 'wf_id': 'ConnectionError', 'run_id': '-', 'sample_name': sample_name, 'run_status': '-', 'start_time': '-', 'elapsed_time': '-' } return status_dict
def get_submission_bundle(self, wes_id, submission_id): """Return the submission's info.""" return get_json(self.queue_path)[wes_id][submission_id]
def update_submission(self, wes_id, submission_id, param, status): """Update the status of a submission.""" submissions = get_json(self.queue_path) submissions[wes_id][submission_id][param] = status save_json(self.queue_path, submissions)
def trs_config(): return get_json(config_path())['toolregistries']
def get_submission_bundle(wes_id, submission_id): """Return the submission's info.""" return get_json(queue_path())[wes_id][submission_id]
def wf_config(self): return get_json(self.config_path)['workflows']
def wf_config(): return get_json(config_path())['workflows']
def wes_config(): return get_json(config_path())['workflowservices']