def test_prefix_patterns(self): """ Tests that the bash prefix variable works correctly, and respects patterns. """ with temporary_project( 'TestDXBashHelpers.test_app1 temporary project') as dxproj: env = update_environ(DX_PROJECT_CONTEXT_ID=dxproj.get_id()) filenames = [ "A.bar", "A.json.dot.bar", "A.vcf.pam", "A.foo.bar", "fooxxx.bam", "A.bar.gz", "x13year23.sam" ] for fname in filenames: dxpy.upload_string("1234", project=dxproj.get_id(), name=fname) # Build the applet, patching in the bash helpers from the # local checkout applet_id = build_app_with_bash_helpers( os.path.join(TEST_APPS, 'prefix_patterns'), dxproj.get_id()) # Run the applet applet_args = [ '-iseq1=A.bar', '-iseq2=A.json.dot.bar', '-igene=A.vcf.pam', '-imap=A.foo.bar', '-imap2=fooxxx.bam', '-imap3=A.bar', '-imap4=A.bar.gz', '-imulti=x13year23.sam' ] cmd_args = ['dx', 'run', '--yes', '--watch', applet_id] cmd_args.extend(applet_args) run(cmd_args, env=env)
def test_prefix_patterns(self): """ Tests that the bash prefix variable works correctly, and respects patterns. """ with temporary_project('TestDXBashHelpers.test_app1 temporary project') as dxproj: env = update_environ(DX_PROJECT_CONTEXT_ID=dxproj.get_id()) filenames = ["A.bar", "A.json.dot.bar", "A.vcf.pam", "A.foo.bar", "fooxxx.bam", "A.bar.gz", "x13year23.sam"] for fname in filenames: dxpy.upload_string("1234", project=dxproj.get_id(), name=fname) # Build the applet, patching in the bash helpers from the # local checkout applet_id = build_app_with_bash_helpers(os.path.join(TEST_APPS, 'prefix_patterns'), dxproj.get_id()) # Run the applet applet_args = ['-iseq1=A.bar', '-iseq2=A.json.dot.bar', '-igene=A.vcf.pam', '-imap=A.foo.bar', '-imap2=fooxxx.bam', '-imap3=A.bar', '-imap4=A.bar.gz', '-imulti=x13year23.sam'] cmd_args = ['dx', 'run', '--yes', '--watch', applet_id] cmd_args.extend(applet_args) run(cmd_args, env=env)
def test_vars(self): ''' Quick test for the bash variables ''' with temporary_project( 'TestDXBashHelpers.test_app1 temporary project') as p: env = update_environ(DX_PROJECT_CONTEXT_ID=p.get_id()) # Upload some files for use by the applet dxpy.upload_string("1234\n", project=p.get_id(), name="A.txt") # Build the applet, patching in the bash helpers from the # local checkout applet_id = build_app_with_bash_helpers( os.path.join(TEST_APPS, 'vars'), p.get_id()) # Run the applet applet_args = [ '-iseq1=A.txt', '-iseq2=A.txt', '-igenes=A.txt', '-igenes=A.txt', '-ii=5', '-ix=4.2', '-ib=true', '-is=hello', '-iil=6', '-iil=7', '-iil=8', '-ixl=3.3', '-ixl=4.4', '-ixl=5.0', '-ibl=true', '-ibl=false', '-ibl=true', '-isl=hello', '-isl=world', '-isl=next', '-imisc={"hello": "world", "foo": true}' ] cmd_args = ['dx', 'run', '--yes', '--watch', applet_id] cmd_args.extend(applet_args) run(cmd_args, env=env)
def create_file_in_project(fname, trg_proj_id, folder=None): data = "foo" if folder is None: dxfile = dxpy.upload_string(data, name=fname, project=trg_proj_id, wait_on_close=True) else: dxfile = dxpy.upload_string(data, name=fname, project=trg_proj_id, folder=folder, wait_on_close=True) return dxfile.get_id()
def test_basic(self): '''Tests upload/download helpers ''' # Make a couple files for testing dxpy.upload_string("1234", wait_on_close=True, name="A.txt") # this invocation should fail with a CLI exception with self.assertRaises(testutil.DXCalledProcessError): self.run_test_app_locally('basic', ['-iseq1=A.txt', '-iseq2=B.txt']) dxpy.upload_string("ABCD", wait_on_close=True, name="B.txt") # these should succeed self.run_test_app_locally('basic', ['-iseq1=A.txt', '-iseq2=B.txt', '-iref=A.txt', '-iref=B.txt', "-ivalue=5", '-iages=1']) self.run_test_app_locally('basic', ['-iseq1=A.txt', '-iseq2=B.txt', '-ibar=A.txt', '-iref=A.txt', '-iref=B.txt', "-ivalue=5", '-iages=1']) self.run_test_app_locally('basic', ['-iseq1=A.txt', '-iseq2=B.txt', '-iref=A.txt', '-iref=B.txt', "-ivalue=5", '-iages=1', '-iages=11', '-iages=33']) # check the except flags self.run_test_app_locally('basic_except', ['-iseq1=A.txt', '-iseq2=B.txt', '-iref=A.txt', '-iref=B.txt', "-ivalue=5", '-iages=1', '-iages=11', '-iages=33'])
def test_vars(self): """Tests bash variable generation """ # Make a couple files for testing dxpy.upload_string("1234", name="A.txt", wait_on_close=True) self.run_test_app_locally('vars', ['-iseq1=A.txt', '-iseq2=A.txt', '-igenes=A.txt', '-igenes=A.txt', '-ii=5', '-ix=4.2', '-ib=true', '-is=hello', '-iil=6', '-iil=7', '-iil=8', '-ixl=3.3', '-ixl=4.4', '-ixl=5.0', '-ibl=true', '-ibl=false', '-ibl=true', '-isl=hello', '-isl=world', '-isl=next', '-imisc={"hello": "world", "foo": true}'])
def test_vars(self): """Tests bash variable generation """ # Make a couple files for testing dxpy.upload_string("1234", name="A.txt", wait_on_close=True) self.run_test_app_locally('vars', [ '-iseq1=A.txt', '-iseq2=A.txt', '-igenes=A.txt', '-igenes=A.txt', '-ii=5', '-ix=4.2', '-ib=true', '-is=hello', '-iil=6', '-iil=7', '-iil=8', '-ixl=3.3', '-ixl=4.4', '-ixl=5.0', '-ibl=true', '-ibl=false', '-ibl=true', '-isl=hello', '-isl=world', '-isl=next', '-imisc={"hello": "world", "foo": true}' ])
def test_prefix_patterns(self): """ Tests that the bash prefix variable works correctly, and respects patterns. """ buf = "1234" filenames = ["A.bar", "A.json.dot.bar", "A.vcf.pam", "A.foo.bar", "fooxxx.bam", "A.bar.gz", "x13year23.sam"] for fname in filenames: dxpy.upload_string(buf, name=fname, wait_on_close=True) self.run_test_app_locally('prefix_patterns', ['-iseq1=A.bar', '-iseq2=A.json.dot.bar', '-igene=A.vcf.pam', '-imap=A.foo.bar', '-imap2=fooxxx.bam', '-imap3=A.bar', '-imap4=A.bar.gz', '-imulti=x13year23.sam'])
def test_prefix_patterns(self): """ Tests that the bash prefix variable works correctly, and respects patterns. """ buf = "1234" filenames = [ "A.bar", "A.json.dot.bar", "A.vcf.pam", "A.foo.bar", "fooxxx.bam", "A.bar.gz", "x13year23.sam" ] for fname in filenames: dxpy.upload_string(buf, name=fname, wait_on_close=True) self.run_test_app_locally('prefix_patterns', [ '-iseq1=A.bar', '-iseq2=A.json.dot.bar', '-igene=A.vcf.pam', '-imap=A.foo.bar', '-imap2=fooxxx.bam', '-imap3=A.bar', '-imap4=A.bar.gz', '-imulti=x13year23.sam' ])
def test_file_arrays(self): # Create file with junk content dxfile = dxpy.upload_string("xxyyzz", project=self.project, wait_on_close=True, name="bubbles") # write python code into code.py file tmp_path = tempfile.mkdtemp() code_path = os.path.join(tmp_path, 'code.py') with open(code_path, write_mode) as f: f.write("@dxpy.entry_point('main')\n") f.write("def main(**job_inputs):\n") f.write("\toutput = {}\n") f.write("\toutput['plant'] = job_inputs['plant']\n") f.write("\treturn output\n") f.write("\n") f.write("dxpy.run()\n") with open(code_path, 'r') as f: code = f.read() # write arguments table. These ara arrays with a single element. arg_table = os.path.join(tmp_path, 'table.csv') with open(arg_table, write_mode) as csvfile: writer = csv.writer(csvfile, delimiter=delimiter) header = ["batch ID", "plant", "plant ID"] writer.writerow(header) writer.writerow( ["SRR_1", "[bubbles]", "[" + dxfile.get_id() + "]"]) applet = dxpy.api.applet_new({ "name": "ident_file_array", "project": self.project, "dxapi": "1.0.0", "inputSpec": [{ "name": "plant", "class": "array:file" }], "outputSpec": [{ "name": "plant", "class": "array:file" }], "runSpec": { "interpreter": "python2.7", "code": code, "distribution": "Ubuntu", "release": "14.04" } }) job_id = run("dx run {} --batch-tsv={} --yes --brief".format( applet["id"], arg_table)).strip() job_desc = dxpy.api.job_describe(job_id) self.assertEqual(job_desc["executableName"], 'ident_file_array') self.assertEqual(job_desc["input"], {"plant": [{ "$dnanexus_link": dxfile.get_id() }]})
def test_basic(self): with temporary_project('TestDXBashHelpers.test_app1 temporary project') as dxproj: env = update_environ(DX_PROJECT_CONTEXT_ID=dxproj.get_id()) # Upload some files for use by the applet dxpy.upload_string("1234\n", project=dxproj.get_id(), name="A.txt") dxpy.upload_string("ABCD\n", project=dxproj.get_id(), name="B.txt") # Build the applet, patching in the bash helpers from the # local checkout applet_id = build_app_with_bash_helpers(os.path.join(TEST_APPS, 'basic'), dxproj.get_id()) # Run the applet applet_args = ['-iseq1=A.txt', '-iseq2=B.txt', '-iref=A.txt', '-iref=B.txt', "-ivalue=5", "-iages=4"] cmd_args = ['dx', 'run', '--yes', '--watch', applet_id] cmd_args.extend(applet_args) run(cmd_args, env=env)
def test_upload_string_dxfile(self): self.dxfile = dxpy.upload_string(self.foo_str) self.dxfile.wait_on_close() self.assertTrue(self.dxfile.closed()) dxpy.download_dxfile(self.dxfile.get_id(), self.new_file.name) self.assertTrue(filecmp.cmp(self.foo_file.name, self.new_file.name))
def test_parseq(self): """ Tests the parallel/sequential variations """ with temporary_project("TestDXBashHelpers.test_app1 temporary project") as dxproj: env = update_environ(DX_PROJECT_CONTEXT_ID=dxproj.get_id()) # Upload some files for use by the applet dxpy.upload_string("1234\n", project=dxproj.get_id(), name="A.txt") dxpy.upload_string("ABCD\n", project=dxproj.get_id(), name="B.txt") # Build the applet, patching in the bash helpers from the # local checkout applet_id = build_app_with_bash_helpers(os.path.join(TEST_APPS, "parseq"), dxproj.get_id()) # Run the applet applet_args = ["-iseq1=A.txt", "-iseq2=B.txt", "-iref=A.txt", "-iref=B.txt"] cmd_args = ["dx", "run", "--yes", "--watch", applet_id] cmd_args.extend(applet_args) run(cmd_args, env=env)
def upload_html(destination, html, name=None): """ Uploads the HTML to a file on the server """ [project, path, n] = parse_destination(destination) try: dxfile = dxpy.upload_string(html, media_type="text/html", project=project, folder=path, hidden=True, name=name or None) return dxfile.get_id() except dxpy.DXAPIError as ex: parser.error("Could not upload HTML report to DNAnexus server! ({ex})".format(ex=ex))
def test_xattr_parameters(self): ''' Tests dx-upload-all-outputs uploading with filesystem metadata as properties ''' with temporary_project( 'TestDXBashHelpers.test_app1 temporary project') as dxproj: env = update_environ(DX_PROJECT_CONTEXT_ID=dxproj.get_id()) # Upload some files for use by the applet dxpy.upload_string("1234\n", project=dxproj.get_id(), name="A.txt") dxpy.upload_string("ABCD\n", project=dxproj.get_id(), name="B.txt") # Build the applet, patching in the bash helpers from the # local checkout applet_id = build_app_with_bash_helpers( os.path.join(TEST_APPS, 'xattr_properties'), dxproj.get_id()) # Run the applet applet_args = [ "-iseq1=A.txt", "-iseq2=B.txt", "-iref=A.txt", "-iref=B.txt" ] cmd_args = ['dx', 'run', '--yes', '--watch', applet_id] cmd_args.extend(applet_args) run(cmd_args, env=env)
def test_vars(self): """ Quick test for the bash variables """ with temporary_project("TestDXBashHelpers.test_app1 temporary project") as p: env = update_environ(DX_PROJECT_CONTEXT_ID=p.get_id()) # Upload some files for use by the applet dxpy.upload_string("1234\n", project=p.get_id(), name="A.txt") # Build the applet, patching in the bash helpers from the # local checkout applet_id = build_app_with_bash_helpers(os.path.join(TEST_APPS, "vars"), p.get_id()) # Run the applet applet_args = [ "-iseq1=A.txt", "-iseq2=A.txt", "-igenes=A.txt", "-igenes=A.txt", "-ii=5", "-ix=4.2", "-ib=true", "-is=hello", "-iil=6", "-iil=7", "-iil=8", "-ixl=3.3", "-ixl=4.4", "-ixl=5.0", "-ibl=true", "-ibl=false", "-ibl=true", "-isl=hello", "-isl=world", "-isl=next", '-imisc={"hello": "world", "foo": true}', ] cmd_args = ["dx", "run", "--yes", "--watch", applet_id] cmd_args.extend(applet_args) run(cmd_args, env=env)
def test_vars(self): ''' Quick test for the bash variables ''' with temporary_project('TestDXBashHelpers.test_app1 temporary project') as p: env = update_environ(DX_PROJECT_CONTEXT_ID=p.get_id()) # Upload some files for use by the applet dxpy.upload_string("1234\n", project=p.get_id(), name="A.txt") # Build the applet, patching in the bash helpers from the # local checkout applet_id = build_app_with_bash_helpers(os.path.join(TEST_APPS, 'vars'), p.get_id()) # Run the applet applet_args = ['-iseq1=A.txt', '-iseq2=A.txt', '-igenes=A.txt', '-igenes=A.txt', '-ii=5', '-ix=4.2', '-ib=true', '-is=hello', '-iil=6', '-iil=7', '-iil=8', '-ixl=3.3', '-ixl=4.4', '-ixl=5.0', '-ibl=true', '-ibl=false', '-ibl=true', '-isl=hello', '-isl=world', '-isl=next', '-imisc={"hello": "world", "foo": true}'] cmd_args = ['dx', 'run', '--yes', '--watch', applet_id] cmd_args.extend(applet_args) run(cmd_args, env=env)
def test_file_arrays(self): # Create file with junk content dxfile = dxpy.upload_string("xxyyzz", project=self.project, wait_on_close=True, name="bubbles") # write python code into code.py file tmp_path = tempfile.mkdtemp() code_path = os.path.join(tmp_path, 'code.py') with open(code_path, write_mode) as f: f.write("@dxpy.entry_point('main')\n") f.write("def main(**job_inputs):\n") f.write("\toutput = {}\n") f.write("\toutput['plant'] = job_inputs['plant']\n") f.write("\treturn output\n") f.write("\n") f.write("dxpy.run()\n") with open(code_path, 'r') as f: code = f.read() # write arguments table. These ara arrays with a single element. arg_table = os.path.join(tmp_path, 'table.csv') with open(arg_table, write_mode) as csvfile: writer = csv.writer(csvfile, delimiter=delimiter) header = ["batch ID", "plant", "plant ID"] writer.writerow(header) writer.writerow(["SRR_1", "[bubbles]", "[" + dxfile.get_id() + "]" ]) applet = dxpy.api.applet_new({ "name": "ident_file_array", "project": self.project, "dxapi": "1.0.0", "inputSpec": [ { "name": "plant", "class": "array:file" } ], "outputSpec": [ { "name": "plant", "class": "array:file" } ], "runSpec": { "interpreter": "python2.7", "code": code, "distribution": "Ubuntu", "release": "14.04" } }) job_id = run("dx run {} --batch-tsv={} --yes --brief" .format(applet["id"], arg_table)).strip() job_desc = dxpy.api.job_describe(job_id) self.assertEqual(job_desc["executableName"], 'ident_file_array') self.assertEqual(job_desc["input"], { "plant": [{ "$dnanexus_link": dxfile.get_id() }] })
def makeGenomeObject(): # NOTE: for these tests we don't upload a full sequence file (which # would be huge, for hg19). Importers and exporters that need to # look at the full sequence file can't be run on this test # contigset. sequence_file = dxpy.upload_string("", hidden=True) genome_record = dxpy.new_dxrecord() genome_record.set_details({ "flat_sequence_file": {"$dnanexus_link": sequence_file.get_id()}, "contigs": { "offsets": [0], "names": ["chr1"], "sizes": [249250621] } }) genome_record.add_types(["ContigSet"]) genome_record.close() sequence_file.wait_on_close() return genome_record.get_id()
def test_parseq(self): ''' Tests the parallel/sequential variations ''' dxpy.upload_string("1234", wait_on_close=True, name="A.txt") dxpy.upload_string("ABCD", wait_on_close=True, name="B.txt") self.run_test_app_locally('parseq', ["-iseq1=A.txt", "-iseq2=B.txt", "-iref=A.txt", "-iref=B.txt"])
def main(folder_name, key_name, assembly, noupload, force, debug): #accessions bams contained within the folder named folder_name/bams #Requires #. directory structure folder_name/bams/ENCSRxxxabc/ ... /basename[.anything].bam #. basename contains one or more ENCFF numbers from which the bam is derived #. bam_filename.flagstat.qc exists #. raw bam flagstat file exists in folder_name/raw_bams/ENCSRxxxabc/ ... /basename[.anything].flagstat.qc #if bam file's tags on DNAnexus already contains and ENCFF number, assume it's already accessioned and skip #create a fully qualified project:filename for submitted_file_name and calculate the file size #if an ENCFF objects exists with the same submitted_file_name, AND it has the same size, skip #**INFER the experiment accession number from the bam's containing folder #calculate the md5 #find the raw bam's .flagstat.qc file and parse #find the bam's .flagstat.qc file and parse #**ASSUME all derived_from ENCFF's appear in the bam's filename #POST file object #Upload to AWS if debug: logger.setLevel(logging.DEBUG) else: logger.setLevel(logging.INFO) if not folder_name.startswith('/'): folder_name = '/' + folder_name if not folder_name.endswith('/'): folder_name += '/' try: project = dxpy.DXProject(dxpy.PROJECT_CONTEXT_ID) project_name = project.describe().get('name') except: logger.error("Failed to resolve proejct") project_name = "" bam_folder = folder_name + 'bams/' bams = dxpy.find_data_objects(classname="file", state="closed", name="*.bam", name_mode="glob", project=dxpy.PROJECT_CONTEXT_ID, folder=bam_folder, recurse=True, return_handler=True) authid, authpw, server = processkey(key_name) if not subprocess.call('which md5', shell=True): md5_command = 'md5 -q' elif not subprocess.call('which md5sum', shell=True): md5_command = 'md5sum' else: logger.error("Cannot find md5 or md5sum command") md5_command = '' file_mapping = [] for bam in bams: already_accessioned = False for tag in bam.tags: m = re.search(r'(ENCFF\d{3}\D{3})|(TSTFF\D{6})', tag) if m: logger.info( '%s appears to contain ENCODE accession number in tag %s ... skipping' % (bam.name, m.group(0))) already_accessioned = True break if already_accessioned: continue bam_description = bam.describe() submitted_file_name = project_name + ':' + '/'.join( [bam.folder, bam.name]) submitted_file_size = bam_description.get('size') url = urlparse.urljoin( server, 'search/?type=file&submitted_file_name=%s&format=json&frame=object' % (submitted_file_name)) r = encoded_get(url, authid, authpw) try: r.raise_for_status() if r.json()['@graph']: for duplicate_item in r.json()['@graph']: if duplicate_item.get('status') == 'deleted': logger.info( "A potential duplicate file was found but its status=deleted ... proceeding" ) duplicate_found = False else: logger.info("Found potential duplicate: %s" % (duplicate_item.get('accession'))) if submitted_file_size == duplicate_item.get( 'file_size'): logger.info( "%s %s: File sizes match, assuming duplicate." % (str(submitted_file_size), duplicate_item.get('file_size'))) duplicate_found = True break else: logger.info( "%s %s: File sizes differ, assuming new file." % (str(submitted_file_size), duplicate_item.get('file_size'))) duplicate_found = False else: logger.info("No duplicate ... proceeding") duplicate_found = False except: logger.warning('Duplicate accession check failed: %s %s' % (r.status_code, r.reason)) logger.debug(r.text) duplicate_found = False if duplicate_found: if force: logger.info( "Duplicate detected, but force=true, so continuing") else: logger.info("Duplicate detected, skipping") continue try: bamqc_fh = dxpy.find_one_data_object( classname="file", name='*.flagstat.qc', name_mode="glob", project=dxpy.PROJECT_CONTEXT_ID, folder=bam.folder, return_handler=True) except: logger.warning("Flagstat file not found ... skipping") continue bamqc_fh = None raw_bams_folder = str(bam.folder).replace( '%sbams/' % (folder_name), '%sraw_bams/' % (folder_name), 1) try: raw_bamqc_fh = dxpy.find_one_data_object( classname="file", name='*.flagstat.qc', name_mode="glob", project=dxpy.PROJECT_CONTEXT_ID, folder=raw_bams_folder, return_handler=True) except: logger.warning("Raw flagstat file not found ... skipping") continue raw_bamqc_fh = None try: dup_qc_fh = dxpy.find_one_data_object( classname="file", name='*.dup.qc', name_mode="glob", project=dxpy.PROJECT_CONTEXT_ID, folder=bam.folder, return_handler=True) except: logger.warning("Picard duplicates QC file not found ... skipping") continue dup_qc_fh = None try: xcor_qc_fh = dxpy.find_one_data_object( classname="file", name='*.cc.qc', name_mode="glob", project=dxpy.PROJECT_CONTEXT_ID, folder=bam.folder, return_handler=True) except: logger.warning("Cross-correlation QC file not found ... skipping") continue xcor_qc_fh = None try: pbc_qc_fh = dxpy.find_one_data_object( classname="file", name='*.pbc.qc', name_mode="glob", project=dxpy.PROJECT_CONTEXT_ID, folder=bam.folder, return_handler=True) except: logger.warning("PBC QC file not found ... skipping") continue pbc_qc_fh = None experiment_accession = re.match('\S*(ENC\S{8})', bam.folder).group(1) logger.info("Downloading %s" % (bam.name)) dxpy.download_dxfile(bam.get_id(), bam.name) md5_output = subprocess.check_output(' '.join([md5_command, bam.name]), shell=True) calculated_md5 = md5_output.partition(' ')[0].rstrip() encode_object = FILE_OBJ_TEMPLATE encode_object.update({'assembly': assembly}) notes = { 'filtered_qc': flagstat_parse(bamqc_fh), 'qc': flagstat_parse(raw_bamqc_fh), 'dup_qc': dup_parse(dup_qc_fh), 'xcor_qc': xcor_parse(xcor_qc_fh), 'pbc_qc': pbc_parse(pbc_qc_fh), 'dx-id': bam_description.get('id'), 'dx-createdBy': bam_description.get('createdBy') } encode_object.update({ 'dataset': experiment_accession, 'notes': json.dumps(notes), 'submitted_file_name': submitted_file_name, 'derived_from': re.findall('(ENCFF\S{6})', bam.name), 'file_size': submitted_file_size, 'md5sum': calculated_md5 }) logger.info("Experiment accession: %s" % (experiment_accession)) logger.debug("File metadata: %s" % (encode_object)) url = urlparse.urljoin(server, 'files') r = encoded_post(url, authid, authpw, encode_object) try: r.raise_for_status() new_file_object = r.json()['@graph'][0] logger.info("New accession: %s" % (new_file_object.get('accession'))) except: logger.warning('POST file object failed: %s %s' % (r.status_code, r.reason)) logger.debug(r.text) new_file_object = {} if r.status_code == 409: try: #cautiously add a tag with the existing accession number if calculated_md5 in r.json().get('detail'): url = urlparse.urljoin( server, '/search/?type=file&md5sum=%s' % (calculated_md5)) r = encoded_get(url, authid, authpw) r.raise_for_status() accessioned_file = r.json()['@graph'][0] existing_accession = accessioned_file['accession'] bam.add_tags([existing_accession]) logger.info( 'Already accessioned. Added %s to dxfile tags' % (existing_accession)) except: logger.info( 'Conflict does not appear to be md5 ... continuing') if noupload: logger.info("--noupload so skipping upload") upload_returncode = -1 else: if new_file_object: creds = new_file_object['upload_credentials'] env = os.environ.copy() env.update({ 'AWS_ACCESS_KEY_ID': creds['access_key'], 'AWS_SECRET_ACCESS_KEY': creds['secret_key'], 'AWS_SECURITY_TOKEN': creds['session_token'], }) logger.info("Uploading file.") start = time.time() try: subprocess.check_call([ 'aws', 's3', 'cp', bam.name, creds['upload_url'], '--quiet' ], env=env) except subprocess.CalledProcessError as e: # The aws command returns a non-zero exit code on error. logger.error("Upload failed with exit code %d" % e.returncode) upload_returncode = e.returncode else: upload_returncode = 0 end = time.time() duration = end - start logger.info("Uploaded in %.2f seconds" % duration) bam.add_tags([new_file_object.get('accession')]) else: upload_returncode = -1 out_string = '\t'.join([ experiment_accession, encode_object.get('submitted_file_name'), new_file_object.get('accession') or '', str(upload_returncode), encode_object.get('notes') ]) print out_string file_mapping.append(out_string) os.remove(bam.name) output_log_filename = time.strftime('%m%d%y%H%M') + '-accession_log.csv' out_fh = dxpy.upload_string('\n'.join(file_mapping), name=output_log_filename, media_type='text/csv') out_fh.close() output = {"file_mapping": file_mapping, "outfile": dxpy.dxlink(out_fh)} return output
def test_sub_jobs(self): ''' Tests a bash script that generates sub-jobs ''' dxpy.upload_string("1234", wait_on_close=True, name="A.txt") dxpy.upload_string("ABCD", wait_on_close=True, name="B.txt") self.run_test_app_locally('with-subjobs', ["-ifiles=A.txt", "-ifiles=B.txt"])
def test_sub_jobs(self): ''' Tests a bash script that generates sub-jobs ''' with temporary_project('TestDXBashHelpers.test_app1 temporary project') as dxproj: env = update_environ(DX_PROJECT_CONTEXT_ID=dxproj.get_id()) # Upload some files for use by the applet dxpy.upload_string("1234\n", project=dxproj.get_id(), name="A.txt") dxpy.upload_string("ABCD\n", project=dxproj.get_id(), name="B.txt") # Build the applet, patching in the bash helpers from the # local checkout applet_id = build_app_with_bash_helpers(os.path.join(TEST_APPS, 'with-subjobs'), dxproj.get_id()) # Run the applet. # Since the job creates two sub-jobs, we need to be a bit more sophisticated # in order to wait for completion. applet_args = ["-ifiles=A.txt", "-ifiles=B.txt"] cmd_args = ['dx', 'run', '--yes', '--brief', applet_id] cmd_args.extend(applet_args) job_id = run(cmd_args, env=env).strip() dxpy.DXJob(job_id).wait_on_done() # Assertions -- making sure the script worked # Assertions to make about the job's output after it is done running: # - *first_file* is a file named first_file.txt containing the string: # "contents of first_file" # - *final_file* is a file named final_file.txt containing the # *concatenation of the two input files in *files* print("Test completed successfully, checking file content\n") job_handler = dxpy.get_handler(job_id) job_output = job_handler.output def strip_white_space(_str): return ''.join(_str.split()) def silent_file_remove(filename): try: os.remove(filename) except OSError: pass # The output should include two files, this section verifies that they have # the correct data. def check_file_content(out_param_name, out_filename, tmp_fname, str_content): """ Download a file, read it from local disk, and verify that it has the correct contents """ if not out_param_name in job_output: raise "Error: key {} does not appear in the job output".format(out_param_name) dxlink = job_output[out_param_name] # check that the filename gets preserved trg_fname = dxpy.get_handler(dxlink).name self.assertEqual(trg_fname, out_filename) # download the file and check the contents silent_file_remove(tmp_fname) dxpy.download_dxfile(dxlink, tmp_fname) with open(tmp_fname, "r") as fh: data = fh.read() print(data) if not (strip_white_space(data) == strip_white_space(str_content)): raise Exception("contents of file {} do not match".format(out_param_name)) silent_file_remove(tmp_fname) check_file_content('first_file', 'first_file.txt', "f1.txt", "contents of first_file") check_file_content('final_file', 'final_file.txt', "f2.txt", "1234ABCD")
def test_file_download(self): ''' This test assumes a well-formed input spec and tests that the templates created automatically download the files only if they are available and does something sensible otherwise. ''' print("Setting current project to", self.project) dxpy.WORKSPACE_ID = self.project dxpy.PROJECT_CONTEXT_ID = self.project # Make a couple files for testing dxfile = dxpy.upload_string("foo", name="afile") dxpy.upload_string("foobar", name="otherfile") dxapp_json = { "name": "files", "title": "files", "summary": "files", "dxapi": "1.0.0", "version": "0.0.1", "categories": [], "inputSpec": [ { "name": "required_file", "class": "file", "optional": False }, { "name": "optional_file", "class": "file", "optional": True }, { "name": "default_file", "class": "file", "optional": True, "default": {"$dnanexus_link": dxfile.get_id()} }, { "name": "required_file_array", "class": "array:file", "optional": False }, { "name": "optional_file_array", "class": "array:file", "optional": True } ], "outputSpec": [] } for lang in supported_languages: appdir = create_app_dir_with_dxapp_json(dxapp_json, lang) # Test with bare-minimum of inputs output = subprocess.check_output(['dx-run-app-locally', appdir, '-irequired_file=afile', '-irequired_file_array=afile']) print(output) self.assertIn("App finished successfully", output) self.assertIn("Local job workspaces can be found in:", output) local_workdir = output.split("Local job workspaces can be found in:")[1].strip() file_list = os.listdir(os.path.join(local_workdir, 'localjob-0')) self.assertIn("required_file", file_list) self.assertEqual(os.path.getsize(os.path.join(local_workdir, 'localjob-0', 'required_file')), 3) self.assertNotIn("optional_file", file_list) self.assertIn("default_file", file_list) self.assertEqual(os.path.getsize(os.path.join(local_workdir, 'localjob-0', 'default_file')), 3) # Test with giving an input to everything output = subprocess.check_output(['dx-run-app-locally', appdir, '-irequired_file=afile', '-ioptional_file=afile', '-idefault_file=otherfile', '-irequired_file_array=afile', '-ioptional_file_array=afile']) print(output) self.assertIn("App finished successfully", output) self.assertIn("Local job workspaces can be found in:", output) local_workdir = output.split("Local job workspaces can be found in:")[1].strip() file_list = os.listdir(os.path.join(local_workdir, 'localjob-0')) self.assertIn("required_file", file_list) self.assertEqual(os.path.getsize(os.path.join(local_workdir, 'localjob-0', 'required_file')), 3) self.assertIn("optional_file", file_list) self.assertEqual(os.path.getsize(os.path.join(local_workdir, 'localjob-0', 'optional_file')), 3) self.assertIn("default_file", file_list) self.assertEqual(os.path.getsize(os.path.join(local_workdir, 'localjob-0', 'default_file')), 6) concatenated_file_list = ",".join(file_list) # Different languages have different naming conventions # right now, so just look for the array variable name self.assertIn("required_file_array", concatenated_file_list) self.assertIn("optional_file_array", concatenated_file_list)
def test_var_initialization(self): ''' This test assumes a well-formed input spec and mostly just tests that everything compiles and the variable initialization code does not throw any errors. ''' print("Setting current project to", self.project) dxpy.WORKSPACE_ID = self.project dxpy.PROJECT_CONTEXT_ID = self.project # Make some data objects for input dxapplet = dxpy.api.applet_new({"project": dxpy.WORKSPACE_ID, "name": "anapplet", "dxapi": "1.0.0", "runSpec": {"code": "", "interpreter": "bash"}})['id'] dxfile = dxpy.upload_string("foo", name="afile") dxgtable = dxpy.new_dxgtable(columns=[{"name": "int_col", "type": "int"}], name="agtable") dxgtable.add_rows([[3], [0]]) dxgtable.close(block=True) dxrecord = dxpy.new_dxrecord(name="arecord") dxrecord.close() dxapp_json = { "name": "all_vars", "title": "all_vars", "summary": "all_vars", "dxapi": "1.0.0", "version": "0.0.1", "categories": [], "inputSpec": [], "outputSpec": [] } classes = ['applet', 'record', 'file', 'gtable', 'boolean', 'int', 'float', 'string', 'hash', 'array:applet', 'array:record', 'array:file', 'array:gtable', 'array:boolean', 'array:int', 'array:float', 'array:string'] for classname in classes: dxapp_json['inputSpec'].append({"name": "required_" + classname.replace(":", "_"), "class": classname, "optional": False}) # Note: marking outputs as optional so that empty arrays # will be acceptable; keeping names the same (as required) # in order to allow pass-through from input variables dxapp_json['outputSpec'].append({"name": "required_" + classname.replace(":", "_"), "class": classname, "optional": True}) dxapp_json['inputSpec'].append({"name": "optional_" + classname.replace(":", "_"), "class": classname, "optional": True}) cmdline_args = ['-irequired_applet=anapplet', '-irequired_array_applet=anapplet', '-irequired_record=arecord', '-irequired_array_record=arecord', '-irequired_file=afile', '-irequired_array_file=afile', '-irequired_gtable=agtable', '-irequired_array_gtable=agtable', '-irequired_boolean=true', '-irequired_array_boolean=true', '-irequired_array_boolean=false', '-irequired_int=32', '-irequired_array_int=42', '-irequired_float=3.4', '-irequired_array_float=.42', '-irequired_string=foo', '-irequired_array_string=bar', '-irequired_hash={"foo":"bar"}'] for lang in supported_languages: appdir = create_app_dir_with_dxapp_json(dxapp_json, lang) # Test with bare-minimum of inputs output = subprocess.check_output(['dx-run-app-locally', appdir] + cmdline_args) print(output) # Verify array is printed total 3 times once in each input, logs, and final output self.assertEquals(len(re.findall("required_array_boolean = \[ true, false ]", output)), 3) self.assertIn("App finished successfully", output) # See PTFM-13697 for CentOS 5 details if testutil.TEST_RUN_JOBS and not testutil.host_is_centos_5(): # Now actually make it an applet and run it applet_name = dxapp_json['name'] + '-' + lang subprocess.check_output(['dx', 'build', appdir, '--destination', applet_name]) subprocess.check_output(['dx', 'run', applet_name, '-y', '--wait'] + cmdline_args)
def main(folder_name, key_name, assembly, noupload, force, debug): # accessions bams contained within the folder named folder_name/bams # Requires # . directory structure folder_name/bams/ENCSRxxxabc/ ... /basename[.anything].bam # . basename contains one or more ENCFF numbers from which the bam is derived # . bam_filename.flagstat.qc exists # . raw bam flagstat file exists in folder_name/raw_bams/ENCSRxxxabc/ ... /basename[.anything].flagstat.qc # if bam file's tags on DNAnexus already contains and ENCFF number, assume it's already accessioned and skip # create a fully qualified project:filename for submitted_file_name and calculate the file size # if an ENCFF objects exists with the same submitted_file_name, AND it has the same size, skip # **INFER the experiment accession number from the bam's containing folder # calculate the md5 # find the raw bam's .flagstat.qc file and parse # find the bam's .flagstat.qc file and parse # **ASSUME all derived_from ENCFF's appear in the bam's filename # POST file object # Upload to AWS if debug: logger.setLevel(logging.DEBUG) else: logger.setLevel(logging.INFO) if not folder_name.startswith("/"): folder_name = "/" + folder_name if not folder_name.endswith("/"): folder_name += "/" try: project = dxpy.DXProject(dxpy.PROJECT_CONTEXT_ID) project_name = project.describe().get("name") except: logger.error("Failed to resolve proejct") project_name = "" bam_folder = folder_name + "bams/" bams = dxpy.find_data_objects( classname="file", state="closed", name="*.bam", name_mode="glob", project=dxpy.PROJECT_CONTEXT_ID, folder=bam_folder, recurse=True, return_handler=True, ) authid, authpw, server = processkey(key_name) if not subprocess.call("which md5", shell=True): md5_command = "md5 -q" elif not subprocess.call("which md5sum", shell=True): md5_command = "md5sum" else: logger.error("Cannot find md5 or md5sum command") md5_command = "" file_mapping = [] for bam in bams: already_accessioned = False for tag in bam.tags: m = re.search(r"(ENCFF\d{3}\D{3})|(TSTFF\D{6})", tag) if m: logger.info( "%s appears to contain ENCODE accession number in tag %s ... skipping" % (bam.name, m.group(0)) ) already_accessioned = True break if already_accessioned: continue bam_description = bam.describe() submitted_file_name = project_name + ":" + "/".join([bam.folder, bam.name]) submitted_file_size = bam_description.get("size") url = urlparse.urljoin( server, "search/?type=file&submitted_file_name=%s&format=json&frame=object" % (submitted_file_name) ) r = encoded_get(url, authid, authpw) try: r.raise_for_status() if r.json()["@graph"]: for duplicate_item in r.json()["@graph"]: if duplicate_item.get("status") == "deleted": logger.info("A potential duplicate file was found but its status=deleted ... proceeding") duplicate_found = False else: logger.info("Found potential duplicate: %s" % (duplicate_item.get("accession"))) if submitted_file_size == duplicate_item.get("file_size"): logger.info( "%s %s: File sizes match, assuming duplicate." % (str(submitted_file_size), duplicate_item.get("file_size")) ) duplicate_found = True break else: logger.info( "%s %s: File sizes differ, assuming new file." % (str(submitted_file_size), duplicate_item.get("file_size")) ) duplicate_found = False else: logger.info("No duplicate ... proceeding") duplicate_found = False except: logger.warning("Duplicate accession check failed: %s %s" % (r.status_code, r.reason)) logger.debug(r.text) duplicate_found = False if duplicate_found: if force: logger.info("Duplicate detected, but force=true, so continuing") else: logger.info("Duplicate detected, skipping") continue try: bamqc_fh = dxpy.find_one_data_object( classname="file", name="*.flagstat.qc", name_mode="glob", project=dxpy.PROJECT_CONTEXT_ID, folder=bam.folder, return_handler=True, ) except: logger.warning("Flagstat file not found ... skipping") continue bamqc_fh = None raw_bams_folder = str(bam.folder).replace("%sbams/" % (folder_name), "%sraw_bams/" % (folder_name), 1) try: raw_bamqc_fh = dxpy.find_one_data_object( classname="file", name="*.flagstat.qc", name_mode="glob", project=dxpy.PROJECT_CONTEXT_ID, folder=raw_bams_folder, return_handler=True, ) except: logger.warning("Raw flagstat file not found ... skipping") continue raw_bamqc_fh = None try: dup_qc_fh = dxpy.find_one_data_object( classname="file", name="*.dup.qc", name_mode="glob", project=dxpy.PROJECT_CONTEXT_ID, folder=bam.folder, return_handler=True, ) except: logger.warning("Picard duplicates QC file not found ... skipping") continue dup_qc_fh = None try: xcor_qc_fh = dxpy.find_one_data_object( classname="file", name="*.cc.qc", name_mode="glob", project=dxpy.PROJECT_CONTEXT_ID, folder=bam.folder, return_handler=True, ) except: logger.warning("Cross-correlation QC file not found ... skipping") continue xcor_qc_fh = None try: pbc_qc_fh = dxpy.find_one_data_object( classname="file", name="*.pbc.qc", name_mode="glob", project=dxpy.PROJECT_CONTEXT_ID, folder=bam.folder, return_handler=True, ) except: logger.warning("PBC QC file not found ... skipping") continue pbc_qc_fh = None experiment_accession = re.match("\S*(ENC\S{8})", bam.folder).group(1) logger.info("Downloading %s" % (bam.name)) dxpy.download_dxfile(bam.get_id(), bam.name) md5_output = subprocess.check_output(" ".join([md5_command, bam.name]), shell=True) calculated_md5 = md5_output.partition(" ")[0].rstrip() encode_object = FILE_OBJ_TEMPLATE encode_object.update({"assembly": assembly}) notes = { "filtered_qc": flagstat_parse(bamqc_fh), "qc": flagstat_parse(raw_bamqc_fh), "dup_qc": dup_parse(dup_qc_fh), "xcor_qc": xcor_parse(xcor_qc_fh), "pbc_qc": pbc_parse(pbc_qc_fh), "dx-id": bam_description.get("id"), "dx-createdBy": bam_description.get("createdBy"), } encode_object.update( { "dataset": experiment_accession, "notes": json.dumps(notes), "submitted_file_name": submitted_file_name, "derived_from": re.findall("(ENCFF\S{6})", bam.name), "file_size": submitted_file_size, "md5sum": calculated_md5, } ) logger.info("Experiment accession: %s" % (experiment_accession)) logger.debug("File metadata: %s" % (encode_object)) url = urlparse.urljoin(server, "files") r = encoded_post(url, authid, authpw, encode_object) try: r.raise_for_status() new_file_object = r.json()["@graph"][0] logger.info("New accession: %s" % (new_file_object.get("accession"))) except: logger.warning("POST file object failed: %s %s" % (r.status_code, r.reason)) logger.debug(r.text) new_file_object = {} if r.status_code == 409: try: # cautiously add a tag with the existing accession number if calculated_md5 in r.json().get("detail"): url = urlparse.urljoin(server, "/search/?type=file&md5sum=%s" % (calculated_md5)) r = encoded_get(url, authid, authpw) r.raise_for_status() accessioned_file = r.json()["@graph"][0] existing_accession = accessioned_file["accession"] bam.add_tags([existing_accession]) logger.info("Already accessioned. Added %s to dxfile tags" % (existing_accession)) except: logger.info("Conflict does not appear to be md5 ... continuing") if noupload: logger.info("--noupload so skipping upload") upload_returncode = -1 else: if new_file_object: creds = new_file_object["upload_credentials"] env = os.environ.copy() env.update( { "AWS_ACCESS_KEY_ID": creds["access_key"], "AWS_SECRET_ACCESS_KEY": creds["secret_key"], "AWS_SECURITY_TOKEN": creds["session_token"], } ) logger.info("Uploading file.") start = time.time() try: subprocess.check_call(["aws", "s3", "cp", bam.name, creds["upload_url"], "--quiet"], env=env) except subprocess.CalledProcessError as e: # The aws command returns a non-zero exit code on error. logger.error("Upload failed with exit code %d" % e.returncode) upload_returncode = e.returncode else: upload_returncode = 0 end = time.time() duration = end - start logger.info("Uploaded in %.2f seconds" % duration) bam.add_tags([new_file_object.get("accession")]) else: upload_returncode = -1 out_string = "\t".join( [ experiment_accession, encode_object.get("submitted_file_name"), new_file_object.get("accession") or "", str(upload_returncode), encode_object.get("notes"), ] ) print out_string file_mapping.append(out_string) os.remove(bam.name) output_log_filename = time.strftime("%m%d%y%H%M") + "-accession_log.csv" out_fh = dxpy.upload_string("\n".join(file_mapping), name=output_log_filename, media_type="text/csv") out_fh.close() output = {"file_mapping": file_mapping, "outfile": dxpy.dxlink(out_fh)} return output
def test_sub_jobs(self): ''' Tests a bash script that generates sub-jobs ''' with temporary_project( 'TestDXBashHelpers.test_app1 temporary project') as dxproj: env = update_environ(DX_PROJECT_CONTEXT_ID=dxproj.get_id()) # Upload some files for use by the applet dxpy.upload_string("1234\n", project=dxproj.get_id(), name="A.txt") dxpy.upload_string("ABCD\n", project=dxproj.get_id(), name="B.txt") # Build the applet, patching in the bash helpers from the # local checkout applet_id = build_app_with_bash_helpers( os.path.join(TEST_APPS, 'with-subjobs'), dxproj.get_id()) # Run the applet. # Since the job creates two sub-jobs, we need to be a bit more sophisticated # in order to wait for completion. applet_args = ["-ifiles=A.txt", "-ifiles=B.txt"] cmd_args = ['dx', 'run', '--yes', '--brief', applet_id] cmd_args.extend(applet_args) job_id = run(cmd_args, env=env).strip() dxpy.DXJob(job_id).wait_on_done() # Assertions -- making sure the script worked # Assertions to make about the job's output after it is done running: # - *first_file* is a file named first_file.txt containing the string: # "contents of first_file" # - *final_file* is a file named final_file.txt containing the # *concatenation of the two input files in *files* print("Test completed successfully, checking file content\n") job_handler = dxpy.get_handler(job_id) job_output = job_handler.output def strip_white_space(_str): return ''.join(_str.split()) def silent_file_remove(filename): try: os.remove(filename) except OSError: pass # The output should include two files, this section verifies that they have # the correct data. def check_file_content(out_param_name, out_filename, tmp_fname, str_content): """ Download a file, read it from local disk, and verify that it has the correct contents """ if not out_param_name in job_output: raise "Error: key {} does not appear in the job output".format( out_param_name) dxlink = job_output[out_param_name] # check that the filename gets preserved trg_fname = dxpy.get_handler(dxlink).name self.assertEqual(trg_fname, out_filename) # download the file and check the contents silent_file_remove(tmp_fname) dxpy.download_dxfile(dxlink, tmp_fname) with open(tmp_fname, "r") as fh: data = fh.read() print(data) if not (strip_white_space(data) == strip_white_space(str_content)): raise Exception( "contents of file {} do not match".format( out_param_name)) silent_file_remove(tmp_fname) check_file_content('first_file', 'first_file.txt', "f1.txt", "contents of first_file") check_file_content('final_file', 'final_file.txt', "f2.txt", "1234ABCD")
def test_var_initialization(self): ''' This test assumes a well-formed input spec and mostly just tests that everything compiles and the variable initialization code does not throw any errors. ''' print("Setting current project to", self.project) dxpy.WORKSPACE_ID = self.project dxpy.PROJECT_CONTEXT_ID = self.project # Make some data objects for input dxpy.api.applet_new({ "project": dxpy.WORKSPACE_ID, "name": "anapplet", "dxapi": "1.0.0", "runSpec": { "code": "", "interpreter": "bash", "distribution": "Ubuntu", "release": "14.04" } })['id'] dxpy.upload_string("foo", name="afile") dxrecord = dxpy.new_dxrecord(name="arecord") dxrecord.close() dxapp_json = { "name": "all_vars", "title": "all_vars", "summary": "all_vars", "dxapi": "1.0.0", "version": "0.0.1", "categories": [], "inputSpec": [], "outputSpec": [] } classes = [ 'applet', 'record', 'file', 'boolean', 'int', 'float', 'string', 'hash', 'array:applet', 'array:record', 'array:file', 'array:boolean', 'array:int', 'array:float', 'array:string' ] for classname in classes: dxapp_json['inputSpec'].append({ "name": "required_" + classname.replace(":", "_"), "class": classname, "optional": False }) # Note: marking outputs as optional so that empty arrays # will be acceptable; keeping names the same (as required) # in order to allow pass-through from input variables dxapp_json['outputSpec'].append({ "name": "required_" + classname.replace(":", "_"), "class": classname, "optional": True }) dxapp_json['inputSpec'].append({ "name": "optional_" + classname.replace(":", "_"), "class": classname, "optional": True }) cmdline_args = [ '-irequired_applet=anapplet', '-irequired_array_applet=anapplet', '-irequired_record=arecord', '-irequired_array_record=arecord', '-irequired_file=afile', '-irequired_array_file=afile', '-irequired_boolean=true', '-irequired_array_boolean=true', '-irequired_array_boolean=false', '-irequired_int=32', '-irequired_array_int=42', '-irequired_float=3.4', '-irequired_array_float=.42', '-irequired_string=foo', '-irequired_array_string=bar', '-irequired_hash={"foo":"bar"}' ] for lang in supported_languages: appdir = create_app_dir_with_dxapp_json(dxapp_json, lang) # See PTFM-13697 for CentOS 5 details if testutil.TEST_RUN_JOBS and not testutil.host_is_centos_5(): # Now actually make it an applet and run it applet_name = dxapp_json['name'] + '-' + lang subprocess.check_output( ['dx', 'build', appdir, '--destination', applet_name]) subprocess.check_output( ['dx', 'run', applet_name, '-y', '--wait'] + cmdline_args)
def test_file_download(self): ''' This test assumes a well-formed input spec and tests that the templates created automatically download the files only if they are available and does something sensible otherwise. ''' print("Setting current project to", self.project) dxpy.WORKSPACE_ID = self.project dxpy.PROJECT_CONTEXT_ID = self.project # Make a couple files for testing dxfile = dxpy.upload_string("foo", name="afile") dxpy.upload_string("foobar", name="otherfile") dxapp_json = { "name": "files", "title": "files", "summary": "files", "dxapi": "1.0.0", "version": "0.0.1", "categories": [], "inputSpec": [{ "name": "required_file", "class": "file", "optional": False }, { "name": "optional_file", "class": "file", "optional": True }, { "name": "default_file", "class": "file", "optional": True, "default": { "$dnanexus_link": dxfile.get_id() } }, { "name": "required_file_array", "class": "array:file", "optional": False }, { "name": "optional_file_array", "class": "array:file", "optional": True }], "outputSpec": [] } for lang in supported_languages: appdir = create_app_dir_with_dxapp_json(dxapp_json, lang) # Test with bare-minimum of inputs output = subprocess.check_output([ 'dx-run-app-locally', appdir, '-irequired_file=afile', '-irequired_file_array=afile' ]) print(output) self.assertIn("App finished successfully", output) self.assertIn("Local job workspaces can be found in:", output) local_workdir = output.split( "Local job workspaces can be found in:")[1].strip() file_list = os.listdir(os.path.join(local_workdir, 'localjob-0')) self.assertIn("required_file", file_list) self.assertEqual( os.path.getsize( os.path.join(local_workdir, 'localjob-0', 'required_file')), 3) self.assertNotIn("optional_file", file_list) self.assertIn("default_file", file_list) self.assertEqual( os.path.getsize( os.path.join(local_workdir, 'localjob-0', 'default_file')), 3) # Test with giving an input to everything output = subprocess.check_output([ 'dx-run-app-locally', appdir, '-irequired_file=afile', '-ioptional_file=afile', '-idefault_file=otherfile', '-irequired_file_array=afile', '-ioptional_file_array=afile' ]) print(output) self.assertIn("App finished successfully", output) self.assertIn("Local job workspaces can be found in:", output) local_workdir = output.split( "Local job workspaces can be found in:")[1].strip() file_list = os.listdir(os.path.join(local_workdir, 'localjob-0')) self.assertIn("required_file", file_list) self.assertEqual( os.path.getsize( os.path.join(local_workdir, 'localjob-0', 'required_file')), 3) self.assertIn("optional_file", file_list) self.assertEqual( os.path.getsize( os.path.join(local_workdir, 'localjob-0', 'optional_file')), 3) self.assertIn("default_file", file_list) self.assertEqual( os.path.getsize( os.path.join(local_workdir, 'localjob-0', 'default_file')), 6) concatenated_file_list = ",".join(file_list) # Different languages have different naming conventions # right now, so just look for the array variable name self.assertIn("required_file_array", concatenated_file_list) self.assertIn("optional_file_array", concatenated_file_list)