def get_jobs_per_project(projects): """ Return dict of project2state2jobs Args: projects (list): List of project ids Returns: dict: Dict of project to state to jobs """ project2jobs = defaultdict(lambda: defaultdict(list)) project_no_run = [] for project in projects: project_id = project.describe()["id"] project_name = project.describe()["name"] log.info(f'Get job per {project_name} started') jobs = list(dx.find_jobs(project=project_id, created_after="-24h")) if jobs: for job in jobs: job = dx.DXJob(job["id"]) job_name = job.describe()["name"] job_state = job.describe()["state"] project2jobs[(project_name, project_id)][job_state].append(job_name) else: project_no_run.append(project_name) return project2jobs, project_no_run
def dnanexus_workflows_get(event, context): """Handle GET /workflows/{workflow_id}. Args: event (dict): has a key "workflow_id" that's been taking from the URL. This is the id that was generated when the POST request was made, and the dnanexus executions we care about should be tagged with it. context (dict): an AWS context object that we ignore """ auth_header = set_dx_authorization(event["headers"]["Authorization"]) wes_workflow_id = event["workflow_id"] # First try to find the dxWDL job that's the parent job of everything try: base_job = list( dxpy.find_jobs(properties={"wes_id": wes_workflow_id}, name="WES dxWDL Runner", return_handler=True))[0] except IndexError: error_dict = { "errorType": "NotFound", "httpStatus": "404", "requestId": context.aws_request_id, "message": "Workflow {} was not found".format(wes_workflow_id) } return error_dict child_jobs = list( dxpy.find_jobs(root_execution=base_job.get_id(), return_handler=True)) child_job_ids = [j.get_id() for j in child_jobs] response = { "state": "", "workflow_id": "", "workflow_log": { "start_time": "", "end_time": "", "stdout": "", "stderr": "", "exit_code": -1 }, "task_logs": [] } dx_state = base_job.describe()["state"] wes_state = dx_to_wes_state(dx_state) response["state"] = wes_state def get_logs_for_job(dx_job_id): """Retrieve the logs for single DXJob.""" dx_exe_path = os.path.abspath("bin/dx") cmd = [ "dx", "watch", "-q", "--no-timestamps", "--get-streams", "--no-follow", dx_job_id ] proc = subprocess.Popen( cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, env={ "DX_SECURITY_CONTEXT": auth_header, "PYTHONPATH": ':'.join([ os.environ.get("PYTHONPATH", ""), os.path.dirname(os.path.dirname(dx_exe_path)) ]), "PATH": ':'.join([os.environ["PATH"], os.path.dirname(dx_exe_path)]) }) stdout, stderr = proc.communicate() return stdout pool = Pool(8) jobs_to_logs = dict( zip(child_job_ids, pool.map(get_logs_for_job, child_job_ids))) for job_id in child_job_ids: dx_job = dxpy.DXJob(job_id) job_desc = dx_job.describe() task_name = job_desc["executableName"] time_fmt = "{:%Y-%m-%dT%H:%M:%S}" try: start_time = time_fmt.format( datetime.datetime.fromtimestamp(job_desc["startedRunning"] / 1000)) except: start_time = "" try: end_time = time_fmt.format( datetime.datetime.fromtimestamp(job_desc["stoppedRunning"] / 1000)) except: end_time = "" try: log = jobs_to_logs[job_id] except: log = "" response["task_logs"].append({ "name": task_name + ":" + job_id, "start_time": start_time, "end_time": end_time, "stdout": log, "stderr": "", "exit_code": -1 }) return response