def get_handler_from_desc(desc): if desc['class'] == 'applet': return dxpy.DXApplet(desc['id'], project=desc['project']) elif desc['class'] == 'app': return dxpy.DXApp(dxid=desc['id']) else: return dxpy.DXWorkflow(desc['id'], project=desc['project'])
def once(): try: desc = test_files[tname] if tname in test_defaults: inputs = {} elif desc.dx_input is None: inputs = {} else: inputs = read_json_file(desc.dx_input) project.new_folder(test_folder, parents=True) if desc.kind == "workflow": exec_obj = dxpy.DXWorkflow(project=project.get_id(), dxid=oid) elif desc.kind == "applet": exec_obj = dxpy.DXApplet(project=project.get_id(), dxid=oid) else: raise RuntimeError("Unknown kind {}".format(desc.kind)) run_kwargs = {} if debug_flag: run_kwargs = { "debug": {"debugOn": ['AppError', 'AppInternalError', 'ExecutionError'] }, "allow_ssh" : [ "*" ] } if delay_workspace_destruction: run_kwargs["delay_workspace_destruction"] = True return exec_obj.run(inputs, project=project.get_id(), folder=test_folder, name="{} {}".format(desc.name, git_revision), instance_type="mem1_ssd1_x4", **run_kwargs) except Exception as e: print("exception message={}".format(e)) return None
def main(): inputs_file = open("inputs_stats.txt", 'w') print sys.argv[2] workflow = dxpy.DXWorkflow(sys.argv[2].split(":")[-1]) fh = dxpy.DXFile(sys.argv[1].split(":")[-1]) if "/Results" in fh.describe()['folder']: return app_id = sys.argv[3] if "applet" in app_id: app = dxpy.DXApplet(app_id) else: app = dxpy.DXApp(app_id) w_id = sys.argv[1].split(":")[1] existing_inputs = [] for item in workflow.describe()['stages'][0]['input']: existing_inputs.append(item) print existing_inputs for x in app.describe()['inputSpec']: print x if x['class'] == 'file' and x['name'] not in existing_inputs: inputs_file.write(x['name'] + "\n") inputs_file.close()
def test_duplicate_outputs(self): """ If bax files are converted to BAM or FASTA, the output will be of the format: [prefix].[part_number].[ext] Since these are grouped together in mapping, we want to make sure output files don't all have the name [prefix].mapped.bam """ job_input = self.base_input # these are 40MB files job_input["reads"] = [{ "$dnanexus_link": "file-BXqbQx803GZK2FBqzb40yvQ1" }, { "$dnanexus_link": "file-BXqbV1j03GZFyYv3zk4BjZxZ" }] job_input["datatype"] = "PacBio" job_input["chunk_size"] = 1 job = dxpy.DXApplet(self.applet_id).run(job_input, folder=self.tempdirdx, name=self.testname, project=DX_PROJECT_ID) print "Waiting for %s to complete" % (job.get_id(), ) try: job.wait_on_done() output = job.describe()["output"] # check that 2 files are output mappings_files = output["bam_files"] # check that both files don't have the same name self.assertTrue(len(mappings_files) == 2) filename1 = dxpy.DXFile(mappings_files[0]).describe()['name'] filename2 = dxpy.DXFile(mappings_files[1]).describe()['name'] self.assertTrue(filename1 != filename2) except Exception: DX_PROJ_OBJ.move_folder(self.tempdirdx, ARTIFACTS_FOLDER) raise
def test_datatype_compatbility(self): """ Tests the app with a basic input. """ job_input = self.base_input # this is a subreads.bam file job_input["reads"] = [ {"$dnanexus_link": "file-FPY0BY80pbJvg49Z3k4zZp71"}, {"$dnanexus_link": "file-FPY0BX802J4jybZVJf0gy272"} ] job_input["reads_indices"] = [ {"$dnanexus_link": "file-FPY096j0VvqbBk3Y5367XbJp"}, {"$dnanexus_link": "file-FPY096j0G03Gg49Z3k4zZg1Q"} ] # this is an incompatible datatype job_input["datatype"] = "ONT" job = dxpy.DXApplet(self.applet_id).run(job_input, folder=self.tempdirdx, name=self.testname, project=DX_PROJECT_ID) print "Waiting for %s to complete" % (job.get_id(),) try: job.wait_on_done() print json.dumps(job.describe()["output"]) # if job continued, fail this test raise Exception("Job should have failed with DXAppError") except dxpy.exceptions.AppError: pass
def test_alignment_count(applet_id, project_id, folder, tmpdir): """Run BWA on a FASTQ file and verify that the number of alignments produced is correct. """ # Recall that applet_id is set in the associated conftest.py, which either # gets it from the command line or builds the applet and retrieves its id. # And tmpdir is some pytest magic. It's type is py.path.local.LocalPath. # It's strpath property just returns a string. applet = dxpy.DXApplet(applet_id) input_dict = { "fastq": dxpy.dxlink(SAMPLE_FASTQ), "genomeindex_targz": dxpy.dxlink(HS37D5_BWA_INDEX) } job = applet.run(input_dict, instance_type="mem1_ssd1_x16", folder=folder, project=project_id) job.wait_on_done() output_bam_dxfile = dxpy.DXFile(job.describe()["output"]["bam"]) local_filename = os.path.join(tmpdir.strpath, "test.bam") dxpy.download_dxfile(output_bam_dxfile.get_id(), local_filename) count_alignments_cmd = "samtools view {bam} | wc -l".format( bam=local_filename) num_alignments = int( subprocess.check_output(count_alignments_cmd, shell=True)) assert num_alignments == 1951476
def test_base_input(self): """ Tests the app with a basic input. """ job = dxpy.DXApplet(self.applet_id).run(self.base_input) print "Waiting for %s to complete" % (job.get_id(), ) job.wait_on_done() print json.dumps(job.describe()["output"])
def test_post(self): """ Tests the app with a basic input, skipping validation """ job = dxpy.DXApplet(self.applet_id).run(self.bed_input) print "Waiting for %s to complete" % (job.get_id(),) job.wait_on_done() outp = job.describe()["output"] print json.dumps(outp) bb_input = self.bb_input bb_input['file_meta'].update({'derived_from': [ outp['accession'] ]}) djob = dxpy.DXApplet(self.applet_id).run(bb_input) print "Waiting for %s to complete" % (djob.get_id(),) djob.wait_on_done() outp = djob.describe()["output"] print json.dumps(outp)
def build_applet(): """Build the dxWDL applet.""" dx_applet_id = dxpy.api.applet_new({ "name": APPLET_NAME, "title": "WES dxWDL Runner", "dxapi": dxpy.API_VERSION, "project": dxpy.PROJECT_CONTEXT_ID, "properties": { "version": APPLET_VERSION }, "inputSpec": [{ "name": "workflow_descriptor", "class": "string" }, { "name": "workflow_params", "class": "string" }, { "name": "workflow_dependencies", "class": "string", "optional": True }, { "name": "project", "class": "string" }, { "name": "wes_id", "class": "string" }], "outputSpec": [], "runSpec": { "code": APPLET_CODE, "interpreter": "python2.7", "systemRequirements": { "*": { "instanceType": "mem1_ssd1_x4" } }, "execDepends": [{ "name": "openjdk-8-jre-headless", }, { "name": "dx-toolkit" }] }, "access": { "network": ["*"], "project": "CONTRIBUTE" }, "release": "14.04" }) return dxpy.DXApplet(dx_applet_id["id"])
def test_dx_build_app_locally_using_app_builder(self): appdir = create_app_dir() print("Setting current project to", self.project) dxpy.WORKSPACE_ID = self.project dxpy.PROJECT_CONTEXT_ID = self.project bundled_resources = dxpy.app_builder.upload_resources(appdir) applet_id, _ignored_applet_spec = dxpy.app_builder.upload_applet(appdir, bundled_resources, overwrite=True, dx_toolkit_autodep=False) app_obj = dxpy.DXApplet(applet_id) self.assertEqual(app_obj.describe()['id'], app_obj.get_id())
def test_skip(self): """ Tests the app skipping validation. """ skip_input = self.base_input skip_input.update({'skipvalidate': True}) job = dxpy.DXApplet(self.applet_id).run(skip_input) print "Waiting for %s to complete" % (job.get_id(),) job.wait_on_done() print json.dumps(job.describe()["output"])
def test_applet_completion(self): dxapplet = dxpy.DXApplet() dxapplet.new(runSpec={"code": "placeholder", "interpreter": "bash"}, dxapi="1.0.0", name="my applet") self.assert_completion("dx ls my", "my applet ") self.assert_completion("dx ls", "ls ") self.assert_completion("dx run my", "my applet ") self.assert_completion("dx ls ", "my applet ")
def get_project_id(project_id_, applet_id_): """Return the project_id to use for the tests. If project_id is specified, just use that. If not, use the the project that contains the applet. """ if project_id_: return project_id_ applet = dxpy.DXApplet(applet_id_) return applet.describe()['project']
def lookup_applet(name, project, folder): wfgen = dxpy.bindings.search.find_data_objects(name= name, folder= folder, project= project.get_id(), limit= 1) objs = [item for item in wfgen] if len(objs) == 0: raise RuntimeError("applet {} not found in folder {}".format(name, folder)) if len(objs) == 1: oid = objs[0]['id'] return dxpy.DXApplet(project=project.get_id(), dxid=oid) raise RuntimeError("sanity")
def build_applet(): """Build the file localizer applet on dnanexus.""" dx_applet_id = dxpy.api.applet_new({ "name": APPLET_NAME, "title": "WES URL Localizer", "dxapi": dxpy.API_VERSION, "project": dxpy.PROJECT_CONTEXT_ID, "properties": { "version": APPLET_VERSION }, "inputSpec": [{ "name": "url", "class": "string" }, { "name": "project", "class": "string" }, { "name": "folder", "class": "string" }], "outputSpec": [{ "name": "localized_file", "class": "file" }], "runSpec": { "code": APPLET_CODE, "interpreter": "python2.7", "systemRequirements": { "*": { "instanceType": "mem1_ssd1_x2" } }, "execDepends": [{ "name": "google-cloud-storage", "package_manager": "pip" }] }, "access": { "network": ["*"], "project": "UPLOAD" }, "release": "14.04" }) return dxpy.DXApplet(dx_applet_id["id"])
def test_build_and_use_asset(self): asset_spec = { "name": "asset library name with space", "title": "A human readable name", "description": "A detailed description about the asset", "version": "0.0.1", "distribution": "Ubuntu", "release": "14.04" } asset_dir = self.write_asset_directory("build_and_use_asset", json.dumps(asset_spec), "resources") run("mkdir -p " + os.path.join(asset_dir, "resources/usr/local/bin")) with open( os.path.join(asset_dir, "resources/usr/local/bin", 'test.sh'), 'wb') as manifest: manifest.write("echo 'hi'".encode()) run("chmod +x " + os.path.join(asset_dir, "resources/usr/local/bin", 'test.sh')) asset_bundle_id = json.loads(run('dx build_asset --json ' + asset_dir))['id'] code_str = """#!/bin/bash main(){ test.sh } """ app_spec = { "name": "asset_depends", "dxapi": "1.0.0", "runSpec": { "code": code_str, "interpreter": "bash", "distribution": "Ubuntu", "release": "14.04", "assetDepends": [{ "id": asset_bundle_id }] }, "inputSpec": [], "outputSpec": [], "version": "1.0.0" } app_dir = self.write_app_directory("asset_depends", json.dumps(app_spec)) asset_applet_id = json.loads( run("dx build --json {app_dir}".format(app_dir=app_dir)))["id"] asset_applet = dxpy.DXApplet(asset_applet_id) applet_job = asset_applet.run({}) applet_job.wait_on_done() self.assertEqual(applet_job.describe()['state'], 'done')
def main(project, folder, name): # Build the applet app_id, app_desc = upload_applet('.', None) app_handler = dxpy.DXApplet(app_id) # Build a workflow that uses that applet workflow = dxpy.new_dxworkflow(name=name, project=project, folder=folder) workflow.add_stage(app_id) # Delete the applet, to break the workflow app_handler.remove() return workflow.get_id()
def test_ont_input(self): """ Tests the app with a basic input. """ job_input = self.base_input job_input["reads"] = [{"$dnanexus_link": "file-FPXx7v00x99J3b743k9z93x8"}] job_input["datatype"] = "ONT" job = dxpy.DXApplet(self.applet_id).run(job_input, folder=self.tempdirdx, name=self.testname, project=DX_PROJECT_ID) try: job.wait_on_done() print json.dumps(job.describe()["output"]) except Exception: DX_PROJ_OBJ.move_folder(self.tempdirdx, ARTIFACTS_FOLDER) raise
def test_dx_run_app_locally_and_compare_results(self): appdir = create_app_dir() print("Setting current project to", self.project) dxpy.WORKSPACE_ID = self.project dxpy.PROJECT_CONTEXT_ID = self.project applet_id = dx_build_app.build_and_upload_locally(appdir, mode='applet', overwrite=True, dx_toolkit_autodep=False, return_object_dump=True)['id'] remote_job = dxpy.DXApplet(applet_id).run({"in1": 8}) print("Waiting for", remote_job, "to complete") remote_job.wait_on_done() result = remote_job.describe() self.assertEqual(result["output"]["out1"], 140)
def test_build_asset_inside_job(self): asset_spec = { "name": "asset library name with space", "title": "A human readable name", "description": " A detailed description about the asset", "version": "0.0.1", "distribution": "Ubuntu", "release": "14.04" } asset_dir = self.write_asset_directory("test_build_asset_inside_job", json.dumps(asset_spec)) asset_conf_file_id = run("dx upload " + os.path.join(asset_dir, "dxasset.json") + " --brief --wait").strip() code_str = """#!/bin/bash main(){ dx download "${asset_conf}" -o dxasset.json dx build_asset } """ app_spec = { "name": "run_build_asset", "dxapi": "1.0.0", "runSpec": { "code": code_str, "interpreter": "bash", "distribution": "Ubuntu", "release": "14.04" }, "inputSpec": [{ "name": "asset_conf", "class": "file" }], "outputSpec": [], "version": "1.0.0" } app_dir = self.write_app_directory("run_build_asset", json.dumps(app_spec)) asset_applet_id = json.loads( run("dx build --json {app_dir}".format(app_dir=app_dir)))["id"] asset_applet = dxpy.DXApplet(asset_applet_id) applet_job = asset_applet.run( {"asset_conf": { "$dnanexus_link": asset_conf_file_id }}) applet_job.wait_on_done() self.assertEqual(applet_job.describe()['state'], 'done')
def test_pacb_input(self): """ Tests the app with a basic input. """ job_input = self.base_input job_input["reads"] = [{"$dnanexus_link": "file-FPY0BY80pbJvg49Z3k4zZp71"}] job_input["reads_indices"] = [{"$dnanexus_link": "file-FPY096j0VvqbBk3Y5367XbJp"}] job_input["datatype"] = "PacBio" job = dxpy.DXApplet(self.applet_id).run(job_input, folder=self.tempdirdx, name=self.testname, project=DX_PROJECT_ID) print "Waiting for %s to complete" % (job.get_id(),) try: job.wait_on_done() print json.dumps(job.describe()["output"]) except Exception: DX_PROJ_OBJ.move_folder(self.tempdirdx, ARTIFACTS_FOLDER) raise
def test_pacb_subjobs(self): """ Make sure PacBio format subjobs work as expected by running the app with multiple file inputs """ job_input = self.base_input # these are 10GB files job_input["reads"] = [{ "$dnanexus_link": "file-FPY0BY80pbJvg49Z3k4zZp71" }, { "$dnanexus_link": "file-FPY0BX802J4jybZVJf0gy272" }] job_input["reads_indices"] = [{ "$dnanexus_link": "file-FPY096j0VvqbBk3Y5367XbJp" }, { "$dnanexus_link": "file-FPY096j0G03Gg49Z3k4zZg1Q" }] job_input["datatype"] = "PacBio" job_input["chunk_size"] = 9 job = dxpy.DXApplet(self.applet_id).run(job_input, folder=self.tempdirdx, name=self.testname, project=DX_PROJECT_ID) print "Waiting for %s to complete" % (job.get_id(), ) try: job.wait_on_done() output = job.describe()["output"] # check that 2 chunks were run and 2 files are output mappings_files = output["bam_files"] self.assertTrue(len(mappings_files) > 1) # check that two subjobs named "map_reads_pbmm2" were run subjobs = dxpy.find_jobs(parent_job=job.id) subjob_names = [ dxpy.DXJob(subjob['id']).name for subjob in subjobs ] subjob_names = [s.split(':')[-1] for s in subjob_names] mapping_jobs = [ s for s in subjob_names if s.split(':')[-1] == 'map_reads_pbmm2' ] self.assertTrue(len(mapping_jobs) == 2) except Exception: DX_PROJ_OBJ.move_folder(self.tempdirdx, ARTIFACTS_FOLDER) raise
def run_map_sample(project_id, output_folder, fastq_files, genome_fasta_file, genome_index_file, mapper, applet_id, applet_project, fastq_files2=None, mark_duplicates=False, sample_name=None, properties=None): mapper_applet = dxpy.DXApplet(dxid=applet_id, project=applet_project) print 'Running map_sample' mapper_input = { "project_id": project_id, "output_folder": output_folder, "fastq_files": fastq_files, "genome_fasta_file": dxpy.dxlink(genome_fasta_file), "genome_index_file": dxpy.dxlink(genome_index_file), "mapper": mapper, "sample_name": sample_name, "mark_duplicates": mark_duplicates, "properties": properties } if fastq_files2: mapper_input['fastq_files2'] = fastq_files2 map_sample_job = mapper_applet.run(mapper_input) mapper_output = { "bam": { "job": map_sample_job.get_id(), "field": "bam" }, "bai": { "job": map_sample_job.get_id(), "field": "bai" }, "tools_used": { "job": map_sample_job.get_id(), "field": "tools_used" } } return mapper_output
def test_post_and_valid(self): """ Tests validation for quantification bams (4 types) """ jobs = [] for bam in self.qbams_input: # start 4 jobs. jobs.append(dxpy.DXApplet(self.applet_id).run(bam)) print "Waiting for %s to complete" % (jobs[-1].get_id(),) for job in jobs: job.wait_on_done() print "Job %s done" % (job.get_id(),) # should wait until all 4 are done for job in jobs: outp = job.describe()["output"] print json.dumps(outp)
def test_applet_completion(self): dxapplet = dxpy.DXApplet() run_spec = {"code": "placeholder", "interpreter": "bash", "distribution": "Ubuntu", "release": "14.04"} dxapplet.new(runSpec=run_spec, dxapi="1.0.0", name="my applet") self.assert_completion("dx ls my", "my applet ") self.assert_completion("dx ls", "ls ") self.assert_completion("dx run my", "my applet ") self.assert_completion("dx ls ", "my applet ") # not available to run when hidden dxapplet.new(runSpec=run_spec, dxapi="1.0.0", name="hidden", hidden=True) self.assert_completion("dx ls hid", "hidden ") self.assert_no_completions("dx run hid")
def test_ont_subjobs(self): job_input = self.base_input # these are 40MB files job_input["reads"] = [ {"$dnanexus_link": "file-FPXx7v00x99J3b743k9z93x8"}, {"$dnanexus_link": "file-FPXxJXQ0zYzGg49Z3k4zKFP7"} ] job_input["datatype"] = "ONT" job_input["chunk_size"] = 1 job = dxpy.DXApplet(self.applet_id).run(job_input, folder=self.tempdirdx, name=self.testname, project=DX_PROJECT_ID) print "Waiting for %s to complete" % (job.get_id(),) try: job.wait_on_done() output = job.describe()["output"] # check that 2 chunks were run and 2 files are output mappings_files = output["mapped_reads"] self.assertTrue(len(mappings_files) > 1) except dxpy.exceptions.AppError: DX_PROJ_OBJ.move_folder(self.tempdirdx, ARTIFACTS_FOLDER) raise
def test_base_input(self): """ Tests the app with a basic input. Verify files are in S3 """ try: job = dxpy.DXApplet(self.applet_id).run( self.base_input, folder="/aws_transfer/aws_platform_to_s3_file_transfer") print "Waiting for %s to complete" % (job.get_id()) job.wait_on_done() except dxpy.exceptions.DXJobFailureError as e: print "Job failed. Review message for error type" self.fail(msg=e.message) else: print json.dumps(job.describe()["output"]) # Verify files are on S3 prefix = "s3://{0}/{1}".format(self.base_input["target_s3"], self.awsfolder) full_filenames = _get_full_filenames(self.base_input["f_ids"], prefix) aws_ls_cmds = [ "aws s3 ls \"{0}\"".format(filename) for filename in full_filenames ] verify_fail = [] for name, cmd in zip(full_filenames, aws_ls_cmds): try: print 'Verifying: {0}'.format(name) _run_cmd(cmd) except CalledProcessError: verify_fail.append(name) if verify_fail: self.fail( "Following uploads are not present on S3: {0}".format( str(verify_fail)))
def build_applets(): applets = [ "fpfilter-tool", "muse-tool", "pindel-tool", "radia-tool", "samtools-pileup-tool", "somaticsniper-tool", "tcga-vcf-filter-tool", "varscan-tool", "mutect-tool", "tcga-vcf-reheader" ] # Build applets for assembly workflow in [args.folder]/applets/ folder project.new_folder(applets_folder, parents=True) for applet in applets: print "building {}...".format(applet), sys.stdout.flush() build_out = subprocess.check_output([ "dx", "build", "--destination", args.project + ":" + applets_folder + "/", applet ]) # take just the last line, ignore other output that makefile generates applet_dxid = json.loads(build_out.rstrip().split('\n')[-1])["id"] print applet_dxid applet = dxpy.DXApplet(applet_dxid, project=project.get_id()) applet.set_properties({"git_revision": git_revision})
def once(): try: desc = test_files[tname] if tname in test_defaults: inputs = {} else: inputs = read_json_file_maybe_empty(desc.dx_input) project.new_folder(test_folder, parents=True) if desc.kind == "workflow": exec_obj = dxpy.DXWorkflow(project=project.get_id(), dxid=oid) elif desc.kind == "applet": exec_obj = dxpy.DXApplet(project=project.get_id(), dxid=oid) else: raise RuntimeError("Unknown kind {}".format(desc.kind)) return exec_obj.run( inputs, project=project.get_id(), folder=test_folder, name="{} {}".format(desc.name, git_revision), delay_workspace_destruction=delay_workspace_destruction, instance_type="mem1_ssd1_x4") except Exception, e: print("exception message={}".format(e)) return None
def test_datatype_compatbility(self): """ Make sure the app fails when the input reads are in BAM format but the datatype is specified as ONT """ job_input = self.base_input # this is a subreads.bam file job_input["reads"] = [{ "$dnanexus_link": "file-FPY0BY80pbJvg49Z3k4zZp71" }, { "$dnanexus_link": "file-FPY0BX802J4jybZVJf0gy272" }] job_input["reads_indices"] = [{ "$dnanexus_link": "file-FPY096j0VvqbBk3Y5367XbJp" }, { "$dnanexus_link": "file-FPY096j0G03Gg49Z3k4zZg1Q" }] # this is an incompatible datatype job_input["datatype"] = "ONT" job = dxpy.DXApplet(self.applet_id).run(job_input, folder=self.tempdirdx, name=self.testname, project=DX_PROJECT_ID) print "Waiting for %s to complete" % (job.get_id(), ) # check that the job failed try: job.wait_on_done() print json.dumps(job.describe()["output"]) # if job continued, fail this test raise Exception("Job should have failed with DXAppError") except dxpy.exceptions.DXJobFailureError: # confirm that error is an AppError and not something else failureReason = job.describe()["failureReason"] self.assertTrue(failureReason == "AppError")