Exemple #1
0
    def _get_failed_task(cls, analysis: dxpy.DXAnalysis) -> dict:
        """
        Find the causal failure within an execution tree and get the logs.
        """
        query = {
            "project": dxpy.WORKSPACE_ID,
            "state": "failed",
            "describe": {
                "stateTransitions": True
            },
            "include_subjobs": True,
            "root_execution": analysis.get_id()
        }
        cause = None
        cause_ts = None
        for execution_result in dxpy.find_executions(**query):
            if "stateTransitions" in execution_result["describe"]:
                for st in execution_result["describe"]["stateTransitions"]:
                    if st["newState"] == "failed":
                        ts = st["setAt"]
                        if cause is None or ts < cause_ts:
                            cause = execution_result
                            cause_ts = ts

        if cause:
            stdout, stderr = cls._get_logs(cause["id"])
            return {
                "failed_task": cause["id"],
                "failed_task_stdout": stdout,
                "failed_task_stderr": stderr
            }
        else:
            return {
                "msg": f"Analysis {analysis.get_id()} failed but the cause could not "
                       f"be determined"
            }
Exemple #2
0
def main():
    args = get_args()
    # applets = list(dxpy.find_data_objects(classname='applet', name='dbgap_sra_to_fastq*', name_mode='glob', project=args.project, return_handler=True))
    # assert applets
    srrs = args.SRRs or args.infile
    fieldnames = [
        'SRR', 'sra_size', 'sra_md5', 'fastq_id', 'fastq_alias', 'fastq_size',
        'fastq_name', 'fastq_md5'
    ]
    writer = csv.DictWriter(args.outfile, fieldnames, delimiter='\t')
    writer.writeheader()
    # jobs = []
    # for applet in applets:
    #     jobs.extend(list(dxpy.find_executions(executable=applet, describe=True, first_page_size=1000)))
    for row in srrs:
        if row.startswith('#'):
            continue
        srr = row.strip()
        srr_jobs = [
            j.get('describe')
            for j in dxpy.find_executions(describe=True,
                                          name="%s_sra_to_fastq" % (srr))
        ]
        # srr_jobs = [j['describe'] for j in jobs if j['describe'].get('state') == 'done' and srr in j['describe']['input']['SRR']]
        if not srr_jobs:
            writer.writerow({'SRR': "%s: not downloaded" % (srr)})
        else:
            if not any([job.get('state') == 'done' for job in srr_jobs]):
                for job in srr_jobs:
                    writer.writerow({
                        'SRR':
                        "%s: job %s %s" %
                        (srr, job.get('id'), job.get('state'))
                    })
            else:
                for job in [j for j in srr_jobs if j.get('state') == 'done']:
                    outrow = {
                        'SRR': job['input'].get('SRR'),
                        'sra_size': job['output'].get('sra_size'),
                        'sra_md5': job['output'].get('sra_md5')
                    }
                    for i, fastq in enumerate(job['output'].get('fastq')):
                        fh = dxpy.DXFile(job['output'].get('fastq')[i])
                        try:
                            file_size = fh.describe().get('size')
                            file_name = fh.name
                        except dxpy.exceptions.ResourceNotFound:
                            file_size = 'deleted'
                            file_name = 'deleted'
                        outrow.update({
                            'fastq_id':
                            fh.get_id(),
                            'fastq_alias':
                            ":".join(["dnanexus", fh.get_id()]),
                            'fastq_size':
                            file_size,
                            # 'fastq_name': job['output'].get('fastq_filenames')[i],
                            'fastq_name':
                            file_name,
                            'fastq_md5':
                            job['output'].get('fastq_md5s')[i]
                        })
                        writer.writerow(outrow)
    created_before = "-" + args.created_before

    #if not args.created_after:
    #    args.created_after = 0     # default is everything since the Unix epoch
    #if not args.created_before:
    #    args.created_before = -1   # default is everything upto 1ms ago

    project_job_ids = []
    if project_ids:
        for project_id in project_ids:
            if args.states:
                for state in args.states:
                    project_job_ids.extend([
                        e["id"] for e in dxpy.find_executions(
                            project=project_id,
                            state=state,
                            created_after=created_after,
                            created_before=created_before)
                    ])
            else:
                project_job_ids.extend([
                    e["id"] for e in dxpy.find_executions(
                        project=project_id,
                        created_after=created_after,
                        created_before=created_before)
                ])

    execution_ids_to_describe = list(
        set(analysis_ids + project_job_ids + job_ids))

    print("Reading {} total executions...".format(