コード例 #1
0
    def test_prefix_patterns(self):
        """ Tests that the bash prefix variable works correctly, and
        respects patterns.
        """
        with temporary_project(
                'TestDXBashHelpers.test_app1 temporary project') as dxproj:
            env = update_environ(DX_PROJECT_CONTEXT_ID=dxproj.get_id())
            filenames = [
                "A.bar", "A.json.dot.bar", "A.vcf.pam", "A.foo.bar",
                "fooxxx.bam", "A.bar.gz", "x13year23.sam"
            ]
            for fname in filenames:
                dxpy.upload_string("1234", project=dxproj.get_id(), name=fname)

            # Build the applet, patching in the bash helpers from the
            # local checkout
            applet_id = build_app_with_bash_helpers(
                os.path.join(TEST_APPS, 'prefix_patterns'), dxproj.get_id())

            # Run the applet
            applet_args = [
                '-iseq1=A.bar', '-iseq2=A.json.dot.bar', '-igene=A.vcf.pam',
                '-imap=A.foo.bar', '-imap2=fooxxx.bam', '-imap3=A.bar',
                '-imap4=A.bar.gz', '-imulti=x13year23.sam'
            ]
            cmd_args = ['dx', 'run', '--yes', '--watch', applet_id]
            cmd_args.extend(applet_args)
            run(cmd_args, env=env)
コード例 #2
0
    def test_prefix_patterns(self):
        """ Tests that the bash prefix variable works correctly, and
        respects patterns.
        """
        with temporary_project('TestDXBashHelpers.test_app1 temporary project') as dxproj:
            env = update_environ(DX_PROJECT_CONTEXT_ID=dxproj.get_id())
            filenames = ["A.bar", "A.json.dot.bar", "A.vcf.pam", "A.foo.bar",
                         "fooxxx.bam", "A.bar.gz", "x13year23.sam"]
            for fname in filenames:
                dxpy.upload_string("1234", project=dxproj.get_id(), name=fname)

            # Build the applet, patching in the bash helpers from the
            # local checkout
            applet_id = build_app_with_bash_helpers(os.path.join(TEST_APPS, 'prefix_patterns'), dxproj.get_id())

            # Run the applet
            applet_args = ['-iseq1=A.bar',
                           '-iseq2=A.json.dot.bar',
                           '-igene=A.vcf.pam',
                           '-imap=A.foo.bar',
                           '-imap2=fooxxx.bam',
                           '-imap3=A.bar',
                           '-imap4=A.bar.gz',
                           '-imulti=x13year23.sam']
            cmd_args = ['dx', 'run', '--yes', '--watch', applet_id]
            cmd_args.extend(applet_args)
            run(cmd_args, env=env)
コード例 #3
0
    def test_vars(self):
        '''  Quick test for the bash variables '''
        with temporary_project(
                'TestDXBashHelpers.test_app1 temporary project') as p:
            env = update_environ(DX_PROJECT_CONTEXT_ID=p.get_id())

            # Upload some files for use by the applet
            dxpy.upload_string("1234\n", project=p.get_id(), name="A.txt")

            # Build the applet, patching in the bash helpers from the
            # local checkout
            applet_id = build_app_with_bash_helpers(
                os.path.join(TEST_APPS, 'vars'), p.get_id())

            # Run the applet
            applet_args = [
                '-iseq1=A.txt', '-iseq2=A.txt', '-igenes=A.txt',
                '-igenes=A.txt', '-ii=5', '-ix=4.2', '-ib=true', '-is=hello',
                '-iil=6', '-iil=7', '-iil=8', '-ixl=3.3', '-ixl=4.4',
                '-ixl=5.0', '-ibl=true', '-ibl=false', '-ibl=true',
                '-isl=hello', '-isl=world', '-isl=next',
                '-imisc={"hello": "world", "foo": true}'
            ]
            cmd_args = ['dx', 'run', '--yes', '--watch', applet_id]
            cmd_args.extend(applet_args)
            run(cmd_args, env=env)
コード例 #4
0
def create_file_in_project(fname, trg_proj_id, folder=None):
    data = "foo"
    if folder is None:
        dxfile = dxpy.upload_string(data, name=fname, project=trg_proj_id, wait_on_close=True)
    else:
        dxfile = dxpy.upload_string(data, name=fname, project=trg_proj_id, folder=folder, wait_on_close=True)
    return dxfile.get_id()
コード例 #5
0
    def test_basic(self):
        '''Tests upload/download helpers

        '''
        # Make a couple files for testing
        dxpy.upload_string("1234", wait_on_close=True, name="A.txt")

        # this invocation should fail with a CLI exception
        with self.assertRaises(testutil.DXCalledProcessError):
            self.run_test_app_locally('basic', ['-iseq1=A.txt', '-iseq2=B.txt'])

        dxpy.upload_string("ABCD", wait_on_close=True, name="B.txt")

        # these should succeed
        self.run_test_app_locally('basic', ['-iseq1=A.txt', '-iseq2=B.txt',
                                            '-iref=A.txt', '-iref=B.txt',
                                            "-ivalue=5", '-iages=1'])
        self.run_test_app_locally('basic', ['-iseq1=A.txt', '-iseq2=B.txt', '-ibar=A.txt',
                                            '-iref=A.txt', '-iref=B.txt',
                                            "-ivalue=5", '-iages=1'])
        self.run_test_app_locally('basic', ['-iseq1=A.txt', '-iseq2=B.txt',
                                            '-iref=A.txt', '-iref=B.txt', "-ivalue=5",
                                            '-iages=1', '-iages=11', '-iages=33'])

        # check the except flags
        self.run_test_app_locally('basic_except', ['-iseq1=A.txt', '-iseq2=B.txt',
                                                   '-iref=A.txt', '-iref=B.txt', "-ivalue=5",
                                                   '-iages=1', '-iages=11', '-iages=33'])
コード例 #6
0
ファイル: test_dx-docker.py プロジェクト: dnanexus/dx-toolkit
def create_file_in_project(fname, trg_proj_id, folder=None):
    data = "foo"
    if folder is None:
        dxfile = dxpy.upload_string(data, name=fname, project=trg_proj_id, wait_on_close=True)
    else:
        dxfile = dxpy.upload_string(data, name=fname, project=trg_proj_id, folder=folder, wait_on_close=True)
    return dxfile.get_id()
    def test_basic(self):
        '''Tests upload/download helpers

        '''
        # Make a couple files for testing
        dxpy.upload_string("1234", wait_on_close=True, name="A.txt")

        # this invocation should fail with a CLI exception
        with self.assertRaises(testutil.DXCalledProcessError):
            self.run_test_app_locally('basic', ['-iseq1=A.txt', '-iseq2=B.txt'])

        dxpy.upload_string("ABCD", wait_on_close=True, name="B.txt")

        # these should succeed
        self.run_test_app_locally('basic', ['-iseq1=A.txt', '-iseq2=B.txt',
                                            '-iref=A.txt', '-iref=B.txt',
                                            "-ivalue=5", '-iages=1'])
        self.run_test_app_locally('basic', ['-iseq1=A.txt', '-iseq2=B.txt', '-ibar=A.txt',
                                            '-iref=A.txt', '-iref=B.txt',
                                            "-ivalue=5", '-iages=1'])
        self.run_test_app_locally('basic', ['-iseq1=A.txt', '-iseq2=B.txt',
                                            '-iref=A.txt', '-iref=B.txt', "-ivalue=5",
                                            '-iages=1', '-iages=11', '-iages=33'])

        # check the except flags
        self.run_test_app_locally('basic_except', ['-iseq1=A.txt', '-iseq2=B.txt',
                                                   '-iref=A.txt', '-iref=B.txt', "-ivalue=5",
                                                   '-iages=1', '-iages=11', '-iages=33'])
 def test_vars(self):
     """Tests bash variable generation """
     # Make a couple files for testing
     dxpy.upload_string("1234", name="A.txt", wait_on_close=True)
     self.run_test_app_locally('vars', ['-iseq1=A.txt', '-iseq2=A.txt', '-igenes=A.txt', '-igenes=A.txt',
                                        '-ii=5', '-ix=4.2', '-ib=true', '-is=hello',
                                        '-iil=6', '-iil=7', '-iil=8',
                                        '-ixl=3.3', '-ixl=4.4', '-ixl=5.0',
                                        '-ibl=true', '-ibl=false', '-ibl=true',
                                        '-isl=hello', '-isl=world', '-isl=next',
                                        '-imisc={"hello": "world", "foo": true}'])
コード例 #9
0
 def test_vars(self):
     """Tests bash variable generation """
     # Make a couple files for testing
     dxpy.upload_string("1234", name="A.txt", wait_on_close=True)
     self.run_test_app_locally('vars', [
         '-iseq1=A.txt', '-iseq2=A.txt', '-igenes=A.txt', '-igenes=A.txt',
         '-ii=5', '-ix=4.2', '-ib=true', '-is=hello', '-iil=6', '-iil=7',
         '-iil=8', '-ixl=3.3', '-ixl=4.4', '-ixl=5.0', '-ibl=true',
         '-ibl=false', '-ibl=true', '-isl=hello', '-isl=world', '-isl=next',
         '-imisc={"hello": "world", "foo": true}'
     ])
 def test_prefix_patterns(self):
     """ Tests that the bash prefix variable works correctly, and
     respects patterns.
     """
     buf = "1234"
     filenames = ["A.bar", "A.json.dot.bar", "A.vcf.pam", "A.foo.bar", "fooxxx.bam", "A.bar.gz", "x13year23.sam"]
     for fname in filenames:
         dxpy.upload_string(buf, name=fname, wait_on_close=True)
     self.run_test_app_locally('prefix_patterns', ['-iseq1=A.bar',
                                                   '-iseq2=A.json.dot.bar',
                                                   '-igene=A.vcf.pam',
                                                   '-imap=A.foo.bar',
                                                   '-imap2=fooxxx.bam',
                                                   '-imap3=A.bar',
                                                   '-imap4=A.bar.gz',
                                                   '-imulti=x13year23.sam'])
コード例 #11
0
 def test_prefix_patterns(self):
     """ Tests that the bash prefix variable works correctly, and
     respects patterns.
     """
     buf = "1234"
     filenames = [
         "A.bar", "A.json.dot.bar", "A.vcf.pam", "A.foo.bar", "fooxxx.bam",
         "A.bar.gz", "x13year23.sam"
     ]
     for fname in filenames:
         dxpy.upload_string(buf, name=fname, wait_on_close=True)
     self.run_test_app_locally('prefix_patterns', [
         '-iseq1=A.bar', '-iseq2=A.json.dot.bar', '-igene=A.vcf.pam',
         '-imap=A.foo.bar', '-imap2=fooxxx.bam', '-imap3=A.bar',
         '-imap4=A.bar.gz', '-imulti=x13year23.sam'
     ])
コード例 #12
0
ファイル: test_batch.py プロジェクト: xquek/dx-toolkit
    def test_file_arrays(self):
        # Create file with junk content
        dxfile = dxpy.upload_string("xxyyzz",
                                    project=self.project,
                                    wait_on_close=True,
                                    name="bubbles")

        # write python code into code.py file
        tmp_path = tempfile.mkdtemp()
        code_path = os.path.join(tmp_path, 'code.py')
        with open(code_path, write_mode) as f:
            f.write("@dxpy.entry_point('main')\n")
            f.write("def main(**job_inputs):\n")
            f.write("\toutput = {}\n")
            f.write("\toutput['plant'] = job_inputs['plant']\n")
            f.write("\treturn output\n")
            f.write("\n")
            f.write("dxpy.run()\n")
        with open(code_path, 'r') as f:
            code = f.read()

        # write arguments table. These ara arrays with a single element.
        arg_table = os.path.join(tmp_path, 'table.csv')
        with open(arg_table, write_mode) as csvfile:
            writer = csv.writer(csvfile, delimiter=delimiter)
            header = ["batch ID", "plant", "plant ID"]
            writer.writerow(header)
            writer.writerow(
                ["SRR_1", "[bubbles]", "[" + dxfile.get_id() + "]"])

        applet = dxpy.api.applet_new({
            "name":
            "ident_file_array",
            "project":
            self.project,
            "dxapi":
            "1.0.0",
            "inputSpec": [{
                "name": "plant",
                "class": "array:file"
            }],
            "outputSpec": [{
                "name": "plant",
                "class": "array:file"
            }],
            "runSpec": {
                "interpreter": "python2.7",
                "code": code,
                "distribution": "Ubuntu",
                "release": "14.04"
            }
        })
        job_id = run("dx run {} --batch-tsv={} --yes --brief".format(
            applet["id"], arg_table)).strip()
        job_desc = dxpy.api.job_describe(job_id)
        self.assertEqual(job_desc["executableName"], 'ident_file_array')
        self.assertEqual(job_desc["input"],
                         {"plant": [{
                             "$dnanexus_link": dxfile.get_id()
                         }]})
コード例 #13
0
    def test_basic(self):
        with temporary_project('TestDXBashHelpers.test_app1 temporary project') as dxproj:
            env = update_environ(DX_PROJECT_CONTEXT_ID=dxproj.get_id())

            # Upload some files for use by the applet
            dxpy.upload_string("1234\n", project=dxproj.get_id(), name="A.txt")
            dxpy.upload_string("ABCD\n", project=dxproj.get_id(), name="B.txt")

            # Build the applet, patching in the bash helpers from the
            # local checkout
            applet_id = build_app_with_bash_helpers(os.path.join(TEST_APPS, 'basic'), dxproj.get_id())

            # Run the applet
            applet_args = ['-iseq1=A.txt', '-iseq2=B.txt', '-iref=A.txt', '-iref=B.txt', "-ivalue=5", "-iages=4"]
            cmd_args = ['dx', 'run', '--yes', '--watch', applet_id]
            cmd_args.extend(applet_args)
            run(cmd_args, env=env)
コード例 #14
0
    def test_basic(self):
        with temporary_project('TestDXBashHelpers.test_app1 temporary project') as dxproj:
            env = update_environ(DX_PROJECT_CONTEXT_ID=dxproj.get_id())

            # Upload some files for use by the applet
            dxpy.upload_string("1234\n", project=dxproj.get_id(), name="A.txt")
            dxpy.upload_string("ABCD\n", project=dxproj.get_id(), name="B.txt")

            # Build the applet, patching in the bash helpers from the
            # local checkout
            applet_id = build_app_with_bash_helpers(os.path.join(TEST_APPS, 'basic'), dxproj.get_id())

            # Run the applet
            applet_args = ['-iseq1=A.txt', '-iseq2=B.txt', '-iref=A.txt', '-iref=B.txt', "-ivalue=5", "-iages=4"]
            cmd_args = ['dx', 'run', '--yes', '--watch', applet_id]
            cmd_args.extend(applet_args)
            run(cmd_args, env=env)
コード例 #15
0
ファイル: test_dxpy.py プロジェクト: jameslz/dx-toolkit
    def test_upload_string_dxfile(self):
        self.dxfile = dxpy.upload_string(self.foo_str)

        self.dxfile.wait_on_close()
        self.assertTrue(self.dxfile.closed())

        dxpy.download_dxfile(self.dxfile.get_id(), self.new_file.name)

        self.assertTrue(filecmp.cmp(self.foo_file.name, self.new_file.name))
コード例 #16
0
    def test_parseq(self):
        """ Tests the parallel/sequential variations """
        with temporary_project("TestDXBashHelpers.test_app1 temporary project") as dxproj:
            env = update_environ(DX_PROJECT_CONTEXT_ID=dxproj.get_id())

            # Upload some files for use by the applet
            dxpy.upload_string("1234\n", project=dxproj.get_id(), name="A.txt")
            dxpy.upload_string("ABCD\n", project=dxproj.get_id(), name="B.txt")

            # Build the applet, patching in the bash helpers from the
            # local checkout
            applet_id = build_app_with_bash_helpers(os.path.join(TEST_APPS, "parseq"), dxproj.get_id())

            # Run the applet
            applet_args = ["-iseq1=A.txt", "-iseq2=B.txt", "-iref=A.txt", "-iref=B.txt"]
            cmd_args = ["dx", "run", "--yes", "--watch", applet_id]
            cmd_args.extend(applet_args)
            run(cmd_args, env=env)
コード例 #17
0
def upload_html(destination, html, name=None):
    """
    Uploads the HTML to a file on the server
    """
    [project, path, n] = parse_destination(destination)
    try:
        dxfile = dxpy.upload_string(html, media_type="text/html", project=project, folder=path, hidden=True, name=name or None)
        return dxfile.get_id()
    except dxpy.DXAPIError as ex:
        parser.error("Could not upload HTML report to DNAnexus server! ({ex})".format(ex=ex))
コード例 #18
0
def upload_html(destination, html, name=None):
    """
    Uploads the HTML to a file on the server
    """
    [project, path, n] = parse_destination(destination)
    try:
        dxfile = dxpy.upload_string(html, media_type="text/html", project=project, folder=path, hidden=True, name=name or None)
        return dxfile.get_id()
    except dxpy.DXAPIError as ex:
        parser.error("Could not upload HTML report to DNAnexus server! ({ex})".format(ex=ex))
コード例 #19
0
    def test_xattr_parameters(self):
        ''' Tests dx-upload-all-outputs uploading with filesystem metadata as properties '''
        with temporary_project(
                'TestDXBashHelpers.test_app1 temporary project') as dxproj:
            env = update_environ(DX_PROJECT_CONTEXT_ID=dxproj.get_id())

            # Upload some files for use by the applet
            dxpy.upload_string("1234\n", project=dxproj.get_id(), name="A.txt")
            dxpy.upload_string("ABCD\n", project=dxproj.get_id(), name="B.txt")

            # Build the applet, patching in the bash helpers from the
            # local checkout
            applet_id = build_app_with_bash_helpers(
                os.path.join(TEST_APPS, 'xattr_properties'), dxproj.get_id())

            # Run the applet
            applet_args = [
                "-iseq1=A.txt", "-iseq2=B.txt", "-iref=A.txt", "-iref=B.txt"
            ]
            cmd_args = ['dx', 'run', '--yes', '--watch', applet_id]
            cmd_args.extend(applet_args)
            run(cmd_args, env=env)
コード例 #20
0
    def test_vars(self):
        """  Quick test for the bash variables """
        with temporary_project("TestDXBashHelpers.test_app1 temporary project") as p:
            env = update_environ(DX_PROJECT_CONTEXT_ID=p.get_id())

            # Upload some files for use by the applet
            dxpy.upload_string("1234\n", project=p.get_id(), name="A.txt")

            # Build the applet, patching in the bash helpers from the
            # local checkout
            applet_id = build_app_with_bash_helpers(os.path.join(TEST_APPS, "vars"), p.get_id())

            # Run the applet
            applet_args = [
                "-iseq1=A.txt",
                "-iseq2=A.txt",
                "-igenes=A.txt",
                "-igenes=A.txt",
                "-ii=5",
                "-ix=4.2",
                "-ib=true",
                "-is=hello",
                "-iil=6",
                "-iil=7",
                "-iil=8",
                "-ixl=3.3",
                "-ixl=4.4",
                "-ixl=5.0",
                "-ibl=true",
                "-ibl=false",
                "-ibl=true",
                "-isl=hello",
                "-isl=world",
                "-isl=next",
                '-imisc={"hello": "world", "foo": true}',
            ]
            cmd_args = ["dx", "run", "--yes", "--watch", applet_id]
            cmd_args.extend(applet_args)
            run(cmd_args, env=env)
コード例 #21
0
    def test_vars(self):
        '''  Quick test for the bash variables '''
        with temporary_project('TestDXBashHelpers.test_app1 temporary project') as p:
            env = update_environ(DX_PROJECT_CONTEXT_ID=p.get_id())

            # Upload some files for use by the applet
            dxpy.upload_string("1234\n", project=p.get_id(), name="A.txt")

            # Build the applet, patching in the bash helpers from the
            # local checkout
            applet_id = build_app_with_bash_helpers(os.path.join(TEST_APPS, 'vars'), p.get_id())

            # Run the applet
            applet_args = ['-iseq1=A.txt', '-iseq2=A.txt', '-igenes=A.txt', '-igenes=A.txt',
                           '-ii=5', '-ix=4.2', '-ib=true', '-is=hello',
                           '-iil=6', '-iil=7', '-iil=8',
                           '-ixl=3.3', '-ixl=4.4', '-ixl=5.0',
                           '-ibl=true', '-ibl=false', '-ibl=true',
                           '-isl=hello', '-isl=world', '-isl=next',
                           '-imisc={"hello": "world", "foo": true}']
            cmd_args = ['dx', 'run', '--yes', '--watch', applet_id]
            cmd_args.extend(applet_args)
            run(cmd_args, env=env)
コード例 #22
0
ファイル: test_batch.py プロジェクト: dnanexus/dx-toolkit
    def test_file_arrays(self):
        # Create file with junk content
        dxfile = dxpy.upload_string("xxyyzz", project=self.project,
                                    wait_on_close=True, name="bubbles")

        # write python code into code.py file
        tmp_path = tempfile.mkdtemp()
        code_path = os.path.join(tmp_path, 'code.py')
        with open(code_path, write_mode) as f:
            f.write("@dxpy.entry_point('main')\n")
            f.write("def main(**job_inputs):\n")
            f.write("\toutput = {}\n")
            f.write("\toutput['plant'] = job_inputs['plant']\n")
            f.write("\treturn output\n")
            f.write("\n")
            f.write("dxpy.run()\n")
        with open(code_path, 'r') as f:
            code = f.read()

        # write arguments table. These ara arrays with a single element.
        arg_table = os.path.join(tmp_path, 'table.csv')
        with open(arg_table, write_mode) as csvfile:
            writer = csv.writer(csvfile, delimiter=delimiter)
            header = ["batch ID", "plant", "plant ID"]
            writer.writerow(header)
            writer.writerow(["SRR_1",
                             "[bubbles]",
                             "[" + dxfile.get_id() + "]"
            ])

        applet = dxpy.api.applet_new({
            "name": "ident_file_array",
            "project": self.project,
            "dxapi": "1.0.0",
            "inputSpec": [ { "name": "plant", "class": "array:file" } ],
            "outputSpec": [ { "name": "plant", "class": "array:file" } ],
            "runSpec": { "interpreter": "python2.7",
                         "code": code,
                         "distribution": "Ubuntu",
                         "release": "14.04" }
        })
        job_id = run("dx run {} --batch-tsv={} --yes --brief"
                     .format(applet["id"], arg_table)).strip()
        job_desc = dxpy.api.job_describe(job_id)
        self.assertEqual(job_desc["executableName"], 'ident_file_array')
        self.assertEqual(job_desc["input"],
                         { "plant":
                           [{ "$dnanexus_link": dxfile.get_id() }]
                         })
コード例 #23
0
ファイル: test_dxclient.py プロジェクト: sakishum/dx-toolkit
def makeGenomeObject():
    # NOTE: for these tests we don't upload a full sequence file (which
    # would be huge, for hg19). Importers and exporters that need to
    # look at the full sequence file can't be run on this test
    # contigset.
    sequence_file = dxpy.upload_string("", hidden=True)

    genome_record = dxpy.new_dxrecord()
    genome_record.set_details({
        "flat_sequence_file": {"$dnanexus_link": sequence_file.get_id()},
        "contigs": {
            "offsets": [0],
            "names": ["chr1"],
            "sizes": [249250621]
        }
    })
    genome_record.add_types(["ContigSet"])
    genome_record.close()

    sequence_file.wait_on_close()

    return genome_record.get_id()
 def test_parseq(self):
     ''' Tests the parallel/sequential variations '''
     dxpy.upload_string("1234", wait_on_close=True, name="A.txt")
     dxpy.upload_string("ABCD", wait_on_close=True, name="B.txt")
     self.run_test_app_locally('parseq', ["-iseq1=A.txt", "-iseq2=B.txt", "-iref=A.txt", "-iref=B.txt"])
コード例 #25
0
def main(folder_name, key_name, assembly, noupload, force, debug):

    #accessions bams contained within the folder named folder_name/bams

    #Requires
    #. directory structure folder_name/bams/ENCSRxxxabc/ ... /basename[.anything].bam
    #. basename contains one or more ENCFF numbers from which the bam is derived
    #. bam_filename.flagstat.qc exists
    #. raw bam flagstat file exists in folder_name/raw_bams/ENCSRxxxabc/ ... /basename[.anything].flagstat.qc

    #if bam file's tags on DNAnexus already contains and ENCFF number, assume it's already accessioned and skip
    #create a fully qualified project:filename for submitted_file_name and calculate the file size
    #if an ENCFF objects exists with the same submitted_file_name, AND it has the same size, skip

    #**INFER the experiment accession number from the bam's containing folder
    #calculate the md5
    #find the raw bam's .flagstat.qc file and parse
    #find the bam's .flagstat.qc file and parse
    #**ASSUME all derived_from ENCFF's appear in the bam's filename
    #POST file object
    #Upload to AWS

    if debug:
        logger.setLevel(logging.DEBUG)
    else:
        logger.setLevel(logging.INFO)

    if not folder_name.startswith('/'):
        folder_name = '/' + folder_name
    if not folder_name.endswith('/'):
        folder_name += '/'

    try:
        project = dxpy.DXProject(dxpy.PROJECT_CONTEXT_ID)
        project_name = project.describe().get('name')
    except:
        logger.error("Failed to resolve proejct")
        project_name = ""

    bam_folder = folder_name + 'bams/'
    bams = dxpy.find_data_objects(classname="file",
                                  state="closed",
                                  name="*.bam",
                                  name_mode="glob",
                                  project=dxpy.PROJECT_CONTEXT_ID,
                                  folder=bam_folder,
                                  recurse=True,
                                  return_handler=True)

    authid, authpw, server = processkey(key_name)
    if not subprocess.call('which md5', shell=True):
        md5_command = 'md5 -q'
    elif not subprocess.call('which md5sum', shell=True):
        md5_command = 'md5sum'
    else:
        logger.error("Cannot find md5 or md5sum command")
        md5_command = ''

    file_mapping = []
    for bam in bams:
        already_accessioned = False
        for tag in bam.tags:
            m = re.search(r'(ENCFF\d{3}\D{3})|(TSTFF\D{6})', tag)
            if m:
                logger.info(
                    '%s appears to contain ENCODE accession number in tag %s ... skipping'
                    % (bam.name, m.group(0)))
                already_accessioned = True
                break
        if already_accessioned:
            continue
        bam_description = bam.describe()
        submitted_file_name = project_name + ':' + '/'.join(
            [bam.folder, bam.name])
        submitted_file_size = bam_description.get('size')
        url = urlparse.urljoin(
            server,
            'search/?type=file&submitted_file_name=%s&format=json&frame=object'
            % (submitted_file_name))
        r = encoded_get(url, authid, authpw)
        try:
            r.raise_for_status()
            if r.json()['@graph']:
                for duplicate_item in r.json()['@graph']:
                    if duplicate_item.get('status') == 'deleted':
                        logger.info(
                            "A potential duplicate file was found but its status=deleted ... proceeding"
                        )
                        duplicate_found = False
                    else:
                        logger.info("Found potential duplicate: %s" %
                                    (duplicate_item.get('accession')))
                        if submitted_file_size == duplicate_item.get(
                                'file_size'):
                            logger.info(
                                "%s %s: File sizes match, assuming duplicate."
                                % (str(submitted_file_size),
                                   duplicate_item.get('file_size')))
                            duplicate_found = True
                            break
                        else:
                            logger.info(
                                "%s %s: File sizes differ, assuming new file."
                                % (str(submitted_file_size),
                                   duplicate_item.get('file_size')))
                            duplicate_found = False
            else:
                logger.info("No duplicate ... proceeding")
                duplicate_found = False
        except:
            logger.warning('Duplicate accession check failed: %s %s' %
                           (r.status_code, r.reason))
            logger.debug(r.text)
            duplicate_found = False

        if duplicate_found:
            if force:
                logger.info(
                    "Duplicate detected, but force=true, so continuing")
            else:
                logger.info("Duplicate detected, skipping")
                continue

        try:
            bamqc_fh = dxpy.find_one_data_object(
                classname="file",
                name='*.flagstat.qc',
                name_mode="glob",
                project=dxpy.PROJECT_CONTEXT_ID,
                folder=bam.folder,
                return_handler=True)
        except:
            logger.warning("Flagstat file not found ... skipping")
            continue
            bamqc_fh = None

        raw_bams_folder = str(bam.folder).replace(
            '%sbams/' % (folder_name), '%sraw_bams/' % (folder_name), 1)
        try:
            raw_bamqc_fh = dxpy.find_one_data_object(
                classname="file",
                name='*.flagstat.qc',
                name_mode="glob",
                project=dxpy.PROJECT_CONTEXT_ID,
                folder=raw_bams_folder,
                return_handler=True)
        except:
            logger.warning("Raw flagstat file not found ... skipping")
            continue
            raw_bamqc_fh = None

        try:
            dup_qc_fh = dxpy.find_one_data_object(
                classname="file",
                name='*.dup.qc',
                name_mode="glob",
                project=dxpy.PROJECT_CONTEXT_ID,
                folder=bam.folder,
                return_handler=True)
        except:
            logger.warning("Picard duplicates QC file not found ... skipping")
            continue
            dup_qc_fh = None

        try:
            xcor_qc_fh = dxpy.find_one_data_object(
                classname="file",
                name='*.cc.qc',
                name_mode="glob",
                project=dxpy.PROJECT_CONTEXT_ID,
                folder=bam.folder,
                return_handler=True)
        except:
            logger.warning("Cross-correlation QC file not found ... skipping")
            continue
            xcor_qc_fh = None

        try:
            pbc_qc_fh = dxpy.find_one_data_object(
                classname="file",
                name='*.pbc.qc',
                name_mode="glob",
                project=dxpy.PROJECT_CONTEXT_ID,
                folder=bam.folder,
                return_handler=True)
        except:
            logger.warning("PBC QC file not found ... skipping")
            continue
            pbc_qc_fh = None

        experiment_accession = re.match('\S*(ENC\S{8})', bam.folder).group(1)
        logger.info("Downloading %s" % (bam.name))
        dxpy.download_dxfile(bam.get_id(), bam.name)
        md5_output = subprocess.check_output(' '.join([md5_command, bam.name]),
                                             shell=True)
        calculated_md5 = md5_output.partition(' ')[0].rstrip()
        encode_object = FILE_OBJ_TEMPLATE
        encode_object.update({'assembly': assembly})

        notes = {
            'filtered_qc': flagstat_parse(bamqc_fh),
            'qc': flagstat_parse(raw_bamqc_fh),
            'dup_qc': dup_parse(dup_qc_fh),
            'xcor_qc': xcor_parse(xcor_qc_fh),
            'pbc_qc': pbc_parse(pbc_qc_fh),
            'dx-id': bam_description.get('id'),
            'dx-createdBy': bam_description.get('createdBy')
        }
        encode_object.update({
            'dataset':
            experiment_accession,
            'notes':
            json.dumps(notes),
            'submitted_file_name':
            submitted_file_name,
            'derived_from':
            re.findall('(ENCFF\S{6})', bam.name),
            'file_size':
            submitted_file_size,
            'md5sum':
            calculated_md5
        })
        logger.info("Experiment accession: %s" % (experiment_accession))
        logger.debug("File metadata: %s" % (encode_object))

        url = urlparse.urljoin(server, 'files')
        r = encoded_post(url, authid, authpw, encode_object)
        try:
            r.raise_for_status()
            new_file_object = r.json()['@graph'][0]
            logger.info("New accession: %s" %
                        (new_file_object.get('accession')))
        except:
            logger.warning('POST file object failed: %s %s' %
                           (r.status_code, r.reason))
            logger.debug(r.text)
            new_file_object = {}
            if r.status_code == 409:
                try:  #cautiously add a tag with the existing accession number
                    if calculated_md5 in r.json().get('detail'):
                        url = urlparse.urljoin(
                            server,
                            '/search/?type=file&md5sum=%s' % (calculated_md5))
                        r = encoded_get(url, authid, authpw)
                        r.raise_for_status()
                        accessioned_file = r.json()['@graph'][0]
                        existing_accession = accessioned_file['accession']
                        bam.add_tags([existing_accession])
                        logger.info(
                            'Already accessioned.  Added %s to dxfile tags' %
                            (existing_accession))
                except:
                    logger.info(
                        'Conflict does not appear to be md5 ... continuing')
        if noupload:
            logger.info("--noupload so skipping upload")
            upload_returncode = -1
        else:
            if new_file_object:
                creds = new_file_object['upload_credentials']
                env = os.environ.copy()
                env.update({
                    'AWS_ACCESS_KEY_ID': creds['access_key'],
                    'AWS_SECRET_ACCESS_KEY': creds['secret_key'],
                    'AWS_SECURITY_TOKEN': creds['session_token'],
                })

                logger.info("Uploading file.")
                start = time.time()
                try:
                    subprocess.check_call([
                        'aws', 's3', 'cp', bam.name, creds['upload_url'],
                        '--quiet'
                    ],
                                          env=env)
                except subprocess.CalledProcessError as e:
                    # The aws command returns a non-zero exit code on error.
                    logger.error("Upload failed with exit code %d" %
                                 e.returncode)
                    upload_returncode = e.returncode
                else:
                    upload_returncode = 0
                    end = time.time()
                    duration = end - start
                    logger.info("Uploaded in %.2f seconds" % duration)
                    bam.add_tags([new_file_object.get('accession')])
            else:
                upload_returncode = -1

        out_string = '\t'.join([
            experiment_accession,
            encode_object.get('submitted_file_name'),
            new_file_object.get('accession') or '',
            str(upload_returncode),
            encode_object.get('notes')
        ])
        print out_string
        file_mapping.append(out_string)

        os.remove(bam.name)

    output_log_filename = time.strftime('%m%d%y%H%M') + '-accession_log.csv'
    out_fh = dxpy.upload_string('\n'.join(file_mapping),
                                name=output_log_filename,
                                media_type='text/csv')
    out_fh.close()

    output = {"file_mapping": file_mapping, "outfile": dxpy.dxlink(out_fh)}

    return output
 def test_sub_jobs(self):
     '''  Tests a bash script that generates sub-jobs '''
     dxpy.upload_string("1234", wait_on_close=True, name="A.txt")
     dxpy.upload_string("ABCD", wait_on_close=True, name="B.txt")
     self.run_test_app_locally('with-subjobs', ["-ifiles=A.txt", "-ifiles=B.txt"])
コード例 #27
0
    def test_sub_jobs(self):
        '''  Tests a bash script that generates sub-jobs '''
        with temporary_project('TestDXBashHelpers.test_app1 temporary project') as dxproj:
            env = update_environ(DX_PROJECT_CONTEXT_ID=dxproj.get_id())

             # Upload some files for use by the applet
            dxpy.upload_string("1234\n", project=dxproj.get_id(), name="A.txt")
            dxpy.upload_string("ABCD\n", project=dxproj.get_id(), name="B.txt")

            # Build the applet, patching in the bash helpers from the
            # local checkout
            applet_id = build_app_with_bash_helpers(os.path.join(TEST_APPS, 'with-subjobs'), dxproj.get_id())
             # Run the applet.
            # Since the job creates two sub-jobs, we need to be a bit more sophisticated
            # in order to wait for completion.
            applet_args = ["-ifiles=A.txt", "-ifiles=B.txt"]
            cmd_args = ['dx', 'run', '--yes', '--brief', applet_id]
            cmd_args.extend(applet_args)
            job_id = run(cmd_args, env=env).strip()

            dxpy.DXJob(job_id).wait_on_done()

            # Assertions -- making sure the script worked
            # Assertions to make about the job's output after it is done running:
            # - *first_file* is a file named first_file.txt containing the string:
            #     "contents of first_file"
            # - *final_file* is a file named final_file.txt containing the
            #   *concatenation of the two input files in *files*
            print("Test completed successfully, checking file content\n")

            job_handler = dxpy.get_handler(job_id)
            job_output = job_handler.output

            def strip_white_space(_str):
                return ''.join(_str.split())

            def silent_file_remove(filename):
                try:
                    os.remove(filename)
                except OSError:
                    pass

            # The output should include two files, this section verifies that they have
            # the correct data.
            def check_file_content(out_param_name, out_filename, tmp_fname, str_content):
                """
                Download a file, read it from local disk, and verify that it has the correct contents
                """
                if not out_param_name in job_output:
                    raise "Error: key {} does not appear in the job output".format(out_param_name)
                dxlink = job_output[out_param_name]

                # check that the filename gets preserved
                trg_fname = dxpy.get_handler(dxlink).name
                self.assertEqual(trg_fname, out_filename)

                # download the file and check the contents
                silent_file_remove(tmp_fname)
                dxpy.download_dxfile(dxlink, tmp_fname)
                with open(tmp_fname, "r") as fh:
                    data = fh.read()
                    print(data)
                    if not (strip_white_space(data) == strip_white_space(str_content)):
                        raise Exception("contents of file {} do not match".format(out_param_name))
                silent_file_remove(tmp_fname)

            check_file_content('first_file', 'first_file.txt', "f1.txt", "contents of first_file")
            check_file_content('final_file', 'final_file.txt', "f2.txt", "1234ABCD")
コード例 #28
0
 def test_parseq(self):
     ''' Tests the parallel/sequential variations '''
     dxpy.upload_string("1234", wait_on_close=True, name="A.txt")
     dxpy.upload_string("ABCD", wait_on_close=True, name="B.txt")
     self.run_test_app_locally('parseq', ["-iseq1=A.txt", "-iseq2=B.txt", "-iref=A.txt", "-iref=B.txt"])
    def test_file_download(self):
        '''
        This test assumes a well-formed input spec and tests that the
        templates created automatically download the files only if
        they are available and does something sensible otherwise.
        '''
        print("Setting current project to", self.project)
        dxpy.WORKSPACE_ID = self.project
        dxpy.PROJECT_CONTEXT_ID = self.project
        # Make a couple files for testing
        dxfile = dxpy.upload_string("foo", name="afile")
        dxpy.upload_string("foobar", name="otherfile")

        dxapp_json = {
            "name": "files",
            "title": "files",
            "summary": "files",
            "dxapi": "1.0.0",
            "version": "0.0.1",
            "categories": [],
            "inputSpec": [
                {
                    "name": "required_file",
                    "class": "file",
                    "optional": False
                },
                {
                    "name": "optional_file",
                    "class": "file",
                    "optional": True
                },
                {
                    "name": "default_file",
                    "class": "file",
                    "optional": True,
                    "default": {"$dnanexus_link": dxfile.get_id()}
                },
                {
                    "name": "required_file_array",
                    "class": "array:file",
                    "optional": False
                },
                {
                    "name": "optional_file_array",
                    "class": "array:file",
                    "optional": True
                }
            ],
            "outputSpec": []
        }

        for lang in supported_languages:
            appdir = create_app_dir_with_dxapp_json(dxapp_json, lang)
            # Test with bare-minimum of inputs
            output = subprocess.check_output(['dx-run-app-locally', appdir, '-irequired_file=afile',
                                              '-irequired_file_array=afile'])
            print(output)
            self.assertIn("App finished successfully", output)
            self.assertIn("Local job workspaces can be found in:", output)
            local_workdir = output.split("Local job workspaces can be found in:")[1].strip()
            file_list = os.listdir(os.path.join(local_workdir, 'localjob-0'))
            self.assertIn("required_file", file_list)
            self.assertEqual(os.path.getsize(os.path.join(local_workdir, 'localjob-0', 'required_file')), 3)
            self.assertNotIn("optional_file", file_list)
            self.assertIn("default_file", file_list)
            self.assertEqual(os.path.getsize(os.path.join(local_workdir, 'localjob-0', 'default_file')), 3)

            # Test with giving an input to everything
            output = subprocess.check_output(['dx-run-app-locally', appdir,
                                              '-irequired_file=afile',
                                              '-ioptional_file=afile',
                                              '-idefault_file=otherfile',
                                              '-irequired_file_array=afile',
                                              '-ioptional_file_array=afile'])
            print(output)
            self.assertIn("App finished successfully", output)
            self.assertIn("Local job workspaces can be found in:", output)
            local_workdir = output.split("Local job workspaces can be found in:")[1].strip()
            file_list = os.listdir(os.path.join(local_workdir, 'localjob-0'))
            self.assertIn("required_file", file_list)
            self.assertEqual(os.path.getsize(os.path.join(local_workdir, 'localjob-0', 'required_file')), 3)
            self.assertIn("optional_file", file_list)
            self.assertEqual(os.path.getsize(os.path.join(local_workdir, 'localjob-0', 'optional_file')), 3)
            self.assertIn("default_file", file_list)
            self.assertEqual(os.path.getsize(os.path.join(local_workdir, 'localjob-0', 'default_file')), 6)
            concatenated_file_list = ",".join(file_list)
            # Different languages have different naming conventions
            # right now, so just look for the array variable name
            self.assertIn("required_file_array", concatenated_file_list)
            self.assertIn("optional_file_array", concatenated_file_list)
    def test_var_initialization(self):
        '''
        This test assumes a well-formed input spec and mostly just
        tests that everything compiles and the variable initialization
        code does not throw any errors.
        '''

        print("Setting current project to", self.project)
        dxpy.WORKSPACE_ID = self.project
        dxpy.PROJECT_CONTEXT_ID = self.project

        # Make some data objects for input
        dxapplet = dxpy.api.applet_new({"project": dxpy.WORKSPACE_ID,
                                        "name": "anapplet",
                                        "dxapi": "1.0.0",
                                        "runSpec": {"code": "", "interpreter": "bash"}})['id']
        dxfile = dxpy.upload_string("foo", name="afile")
        dxgtable = dxpy.new_dxgtable(columns=[{"name": "int_col", "type": "int"}], name="agtable")
        dxgtable.add_rows([[3], [0]])
        dxgtable.close(block=True)
        dxrecord = dxpy.new_dxrecord(name="arecord")
        dxrecord.close()

        dxapp_json = {
            "name": "all_vars",
            "title": "all_vars",
            "summary": "all_vars",
            "dxapi": "1.0.0",
            "version": "0.0.1",
            "categories": [],
            "inputSpec": [],
            "outputSpec": []
        }

        classes = ['applet', 'record', 'file', 'gtable',
                   'boolean', 'int', 'float', 'string', 'hash',
                   'array:applet', 'array:record', 'array:file', 'array:gtable',
                   'array:boolean', 'array:int', 'array:float', 'array:string']

        for classname in classes:
            dxapp_json['inputSpec'].append({"name": "required_" + classname.replace(":", "_"),
                                            "class": classname,
                                            "optional": False})
            # Note: marking outputs as optional so that empty arrays
            # will be acceptable; keeping names the same (as required)
            # in order to allow pass-through from input variables
            dxapp_json['outputSpec'].append({"name": "required_" + classname.replace(":", "_"),
                                             "class": classname,
                                             "optional": True})
            dxapp_json['inputSpec'].append({"name": "optional_" + classname.replace(":", "_"),
                                            "class": classname,
                                            "optional": True})

        cmdline_args = ['-irequired_applet=anapplet',
                        '-irequired_array_applet=anapplet',
                        '-irequired_record=arecord',
                        '-irequired_array_record=arecord',
                        '-irequired_file=afile',
                        '-irequired_array_file=afile',
                        '-irequired_gtable=agtable',
                        '-irequired_array_gtable=agtable',
                        '-irequired_boolean=true',
                        '-irequired_array_boolean=true',
                        '-irequired_array_boolean=false',
                        '-irequired_int=32',
                        '-irequired_array_int=42',
                        '-irequired_float=3.4',
                        '-irequired_array_float=.42',
                        '-irequired_string=foo',
                        '-irequired_array_string=bar',
                        '-irequired_hash={"foo":"bar"}']
        for lang in supported_languages:
            appdir = create_app_dir_with_dxapp_json(dxapp_json, lang)
            # Test with bare-minimum of inputs
            output = subprocess.check_output(['dx-run-app-locally', appdir] + cmdline_args)
            print(output)
            # Verify array is printed total 3 times once in each input, logs, and final output
            self.assertEquals(len(re.findall("required_array_boolean = \[ true, false ]", output)), 3)
            self.assertIn("App finished successfully", output)

            # See PTFM-13697 for CentOS 5 details
            if testutil.TEST_RUN_JOBS and not testutil.host_is_centos_5():
                # Now actually make it an applet and run it
                applet_name = dxapp_json['name'] + '-' + lang
                subprocess.check_output(['dx', 'build', appdir, '--destination', applet_name])
                subprocess.check_output(['dx', 'run', applet_name, '-y', '--wait'] + cmdline_args)
コード例 #31
0
def main(folder_name, key_name, assembly, noupload, force, debug):

    # accessions bams contained within the folder named folder_name/bams

    # Requires
    # . directory structure folder_name/bams/ENCSRxxxabc/ ... /basename[.anything].bam
    # . basename contains one or more ENCFF numbers from which the bam is derived
    # . bam_filename.flagstat.qc exists
    # . raw bam flagstat file exists in folder_name/raw_bams/ENCSRxxxabc/ ... /basename[.anything].flagstat.qc

    # if bam file's tags on DNAnexus already contains and ENCFF number, assume it's already accessioned and skip
    # create a fully qualified project:filename for submitted_file_name and calculate the file size
    # if an ENCFF objects exists with the same submitted_file_name, AND it has the same size, skip

    # **INFER the experiment accession number from the bam's containing folder
    # calculate the md5
    # find the raw bam's .flagstat.qc file and parse
    # find the bam's .flagstat.qc file and parse
    # **ASSUME all derived_from ENCFF's appear in the bam's filename
    # POST file object
    # Upload to AWS

    if debug:
        logger.setLevel(logging.DEBUG)
    else:
        logger.setLevel(logging.INFO)

    if not folder_name.startswith("/"):
        folder_name = "/" + folder_name
    if not folder_name.endswith("/"):
        folder_name += "/"

    try:
        project = dxpy.DXProject(dxpy.PROJECT_CONTEXT_ID)
        project_name = project.describe().get("name")
    except:
        logger.error("Failed to resolve proejct")
        project_name = ""

    bam_folder = folder_name + "bams/"
    bams = dxpy.find_data_objects(
        classname="file",
        state="closed",
        name="*.bam",
        name_mode="glob",
        project=dxpy.PROJECT_CONTEXT_ID,
        folder=bam_folder,
        recurse=True,
        return_handler=True,
    )

    authid, authpw, server = processkey(key_name)
    if not subprocess.call("which md5", shell=True):
        md5_command = "md5 -q"
    elif not subprocess.call("which md5sum", shell=True):
        md5_command = "md5sum"
    else:
        logger.error("Cannot find md5 or md5sum command")
        md5_command = ""

    file_mapping = []
    for bam in bams:
        already_accessioned = False
        for tag in bam.tags:
            m = re.search(r"(ENCFF\d{3}\D{3})|(TSTFF\D{6})", tag)
            if m:
                logger.info(
                    "%s appears to contain ENCODE accession number in tag %s ... skipping" % (bam.name, m.group(0))
                )
                already_accessioned = True
                break
        if already_accessioned:
            continue
        bam_description = bam.describe()
        submitted_file_name = project_name + ":" + "/".join([bam.folder, bam.name])
        submitted_file_size = bam_description.get("size")
        url = urlparse.urljoin(
            server, "search/?type=file&submitted_file_name=%s&format=json&frame=object" % (submitted_file_name)
        )
        r = encoded_get(url, authid, authpw)
        try:
            r.raise_for_status()
            if r.json()["@graph"]:
                for duplicate_item in r.json()["@graph"]:
                    if duplicate_item.get("status") == "deleted":
                        logger.info("A potential duplicate file was found but its status=deleted ... proceeding")
                        duplicate_found = False
                    else:
                        logger.info("Found potential duplicate: %s" % (duplicate_item.get("accession")))
                        if submitted_file_size == duplicate_item.get("file_size"):
                            logger.info(
                                "%s %s: File sizes match, assuming duplicate."
                                % (str(submitted_file_size), duplicate_item.get("file_size"))
                            )
                            duplicate_found = True
                            break
                        else:
                            logger.info(
                                "%s %s: File sizes differ, assuming new file."
                                % (str(submitted_file_size), duplicate_item.get("file_size"))
                            )
                            duplicate_found = False
            else:
                logger.info("No duplicate ... proceeding")
                duplicate_found = False
        except:
            logger.warning("Duplicate accession check failed: %s %s" % (r.status_code, r.reason))
            logger.debug(r.text)
            duplicate_found = False

        if duplicate_found:
            if force:
                logger.info("Duplicate detected, but force=true, so continuing")
            else:
                logger.info("Duplicate detected, skipping")
                continue

        try:
            bamqc_fh = dxpy.find_one_data_object(
                classname="file",
                name="*.flagstat.qc",
                name_mode="glob",
                project=dxpy.PROJECT_CONTEXT_ID,
                folder=bam.folder,
                return_handler=True,
            )
        except:
            logger.warning("Flagstat file not found ... skipping")
            continue
            bamqc_fh = None

        raw_bams_folder = str(bam.folder).replace("%sbams/" % (folder_name), "%sraw_bams/" % (folder_name), 1)
        try:
            raw_bamqc_fh = dxpy.find_one_data_object(
                classname="file",
                name="*.flagstat.qc",
                name_mode="glob",
                project=dxpy.PROJECT_CONTEXT_ID,
                folder=raw_bams_folder,
                return_handler=True,
            )
        except:
            logger.warning("Raw flagstat file not found ... skipping")
            continue
            raw_bamqc_fh = None

        try:
            dup_qc_fh = dxpy.find_one_data_object(
                classname="file",
                name="*.dup.qc",
                name_mode="glob",
                project=dxpy.PROJECT_CONTEXT_ID,
                folder=bam.folder,
                return_handler=True,
            )
        except:
            logger.warning("Picard duplicates QC file not found ... skipping")
            continue
            dup_qc_fh = None

        try:
            xcor_qc_fh = dxpy.find_one_data_object(
                classname="file",
                name="*.cc.qc",
                name_mode="glob",
                project=dxpy.PROJECT_CONTEXT_ID,
                folder=bam.folder,
                return_handler=True,
            )
        except:
            logger.warning("Cross-correlation QC file not found ... skipping")
            continue
            xcor_qc_fh = None

        try:
            pbc_qc_fh = dxpy.find_one_data_object(
                classname="file",
                name="*.pbc.qc",
                name_mode="glob",
                project=dxpy.PROJECT_CONTEXT_ID,
                folder=bam.folder,
                return_handler=True,
            )
        except:
            logger.warning("PBC QC file not found ... skipping")
            continue
            pbc_qc_fh = None

        experiment_accession = re.match("\S*(ENC\S{8})", bam.folder).group(1)
        logger.info("Downloading %s" % (bam.name))
        dxpy.download_dxfile(bam.get_id(), bam.name)
        md5_output = subprocess.check_output(" ".join([md5_command, bam.name]), shell=True)
        calculated_md5 = md5_output.partition(" ")[0].rstrip()
        encode_object = FILE_OBJ_TEMPLATE
        encode_object.update({"assembly": assembly})

        notes = {
            "filtered_qc": flagstat_parse(bamqc_fh),
            "qc": flagstat_parse(raw_bamqc_fh),
            "dup_qc": dup_parse(dup_qc_fh),
            "xcor_qc": xcor_parse(xcor_qc_fh),
            "pbc_qc": pbc_parse(pbc_qc_fh),
            "dx-id": bam_description.get("id"),
            "dx-createdBy": bam_description.get("createdBy"),
        }
        encode_object.update(
            {
                "dataset": experiment_accession,
                "notes": json.dumps(notes),
                "submitted_file_name": submitted_file_name,
                "derived_from": re.findall("(ENCFF\S{6})", bam.name),
                "file_size": submitted_file_size,
                "md5sum": calculated_md5,
            }
        )
        logger.info("Experiment accession: %s" % (experiment_accession))
        logger.debug("File metadata: %s" % (encode_object))

        url = urlparse.urljoin(server, "files")
        r = encoded_post(url, authid, authpw, encode_object)
        try:
            r.raise_for_status()
            new_file_object = r.json()["@graph"][0]
            logger.info("New accession: %s" % (new_file_object.get("accession")))
        except:
            logger.warning("POST file object failed: %s %s" % (r.status_code, r.reason))
            logger.debug(r.text)
            new_file_object = {}
            if r.status_code == 409:
                try:  # cautiously add a tag with the existing accession number
                    if calculated_md5 in r.json().get("detail"):
                        url = urlparse.urljoin(server, "/search/?type=file&md5sum=%s" % (calculated_md5))
                        r = encoded_get(url, authid, authpw)
                        r.raise_for_status()
                        accessioned_file = r.json()["@graph"][0]
                        existing_accession = accessioned_file["accession"]
                        bam.add_tags([existing_accession])
                        logger.info("Already accessioned.  Added %s to dxfile tags" % (existing_accession))
                except:
                    logger.info("Conflict does not appear to be md5 ... continuing")
        if noupload:
            logger.info("--noupload so skipping upload")
            upload_returncode = -1
        else:
            if new_file_object:
                creds = new_file_object["upload_credentials"]
                env = os.environ.copy()
                env.update(
                    {
                        "AWS_ACCESS_KEY_ID": creds["access_key"],
                        "AWS_SECRET_ACCESS_KEY": creds["secret_key"],
                        "AWS_SECURITY_TOKEN": creds["session_token"],
                    }
                )

                logger.info("Uploading file.")
                start = time.time()
                try:
                    subprocess.check_call(["aws", "s3", "cp", bam.name, creds["upload_url"], "--quiet"], env=env)
                except subprocess.CalledProcessError as e:
                    # The aws command returns a non-zero exit code on error.
                    logger.error("Upload failed with exit code %d" % e.returncode)
                    upload_returncode = e.returncode
                else:
                    upload_returncode = 0
                    end = time.time()
                    duration = end - start
                    logger.info("Uploaded in %.2f seconds" % duration)
                    bam.add_tags([new_file_object.get("accession")])
            else:
                upload_returncode = -1

        out_string = "\t".join(
            [
                experiment_accession,
                encode_object.get("submitted_file_name"),
                new_file_object.get("accession") or "",
                str(upload_returncode),
                encode_object.get("notes"),
            ]
        )
        print out_string
        file_mapping.append(out_string)

        os.remove(bam.name)

    output_log_filename = time.strftime("%m%d%y%H%M") + "-accession_log.csv"
    out_fh = dxpy.upload_string("\n".join(file_mapping), name=output_log_filename, media_type="text/csv")
    out_fh.close()

    output = {"file_mapping": file_mapping, "outfile": dxpy.dxlink(out_fh)}

    return output
コード例 #32
0
    def test_sub_jobs(self):
        '''  Tests a bash script that generates sub-jobs '''
        with temporary_project(
                'TestDXBashHelpers.test_app1 temporary project') as dxproj:
            env = update_environ(DX_PROJECT_CONTEXT_ID=dxproj.get_id())

            # Upload some files for use by the applet
            dxpy.upload_string("1234\n", project=dxproj.get_id(), name="A.txt")
            dxpy.upload_string("ABCD\n", project=dxproj.get_id(), name="B.txt")

            # Build the applet, patching in the bash helpers from the
            # local checkout
            applet_id = build_app_with_bash_helpers(
                os.path.join(TEST_APPS, 'with-subjobs'), dxproj.get_id())
            # Run the applet.
            # Since the job creates two sub-jobs, we need to be a bit more sophisticated
            # in order to wait for completion.
            applet_args = ["-ifiles=A.txt", "-ifiles=B.txt"]
            cmd_args = ['dx', 'run', '--yes', '--brief', applet_id]
            cmd_args.extend(applet_args)
            job_id = run(cmd_args, env=env).strip()

            dxpy.DXJob(job_id).wait_on_done()

            # Assertions -- making sure the script worked
            # Assertions to make about the job's output after it is done running:
            # - *first_file* is a file named first_file.txt containing the string:
            #     "contents of first_file"
            # - *final_file* is a file named final_file.txt containing the
            #   *concatenation of the two input files in *files*
            print("Test completed successfully, checking file content\n")

            job_handler = dxpy.get_handler(job_id)
            job_output = job_handler.output

            def strip_white_space(_str):
                return ''.join(_str.split())

            def silent_file_remove(filename):
                try:
                    os.remove(filename)
                except OSError:
                    pass

            # The output should include two files, this section verifies that they have
            # the correct data.
            def check_file_content(out_param_name, out_filename, tmp_fname,
                                   str_content):
                """
                Download a file, read it from local disk, and verify that it has the correct contents
                """
                if not out_param_name in job_output:
                    raise "Error: key {} does not appear in the job output".format(
                        out_param_name)
                dxlink = job_output[out_param_name]

                # check that the filename gets preserved
                trg_fname = dxpy.get_handler(dxlink).name
                self.assertEqual(trg_fname, out_filename)

                # download the file and check the contents
                silent_file_remove(tmp_fname)
                dxpy.download_dxfile(dxlink, tmp_fname)
                with open(tmp_fname, "r") as fh:
                    data = fh.read()
                    print(data)
                    if not (strip_white_space(data)
                            == strip_white_space(str_content)):
                        raise Exception(
                            "contents of file {} do not match".format(
                                out_param_name))
                silent_file_remove(tmp_fname)

            check_file_content('first_file', 'first_file.txt', "f1.txt",
                               "contents of first_file")
            check_file_content('final_file', 'final_file.txt', "f2.txt",
                               "1234ABCD")
コード例 #33
0
    def test_var_initialization(self):
        '''
        This test assumes a well-formed input spec and mostly just
        tests that everything compiles and the variable initialization
        code does not throw any errors.
        '''

        print("Setting current project to", self.project)
        dxpy.WORKSPACE_ID = self.project
        dxpy.PROJECT_CONTEXT_ID = self.project

        # Make some data objects for input
        dxpy.api.applet_new({
            "project": dxpy.WORKSPACE_ID,
            "name": "anapplet",
            "dxapi": "1.0.0",
            "runSpec": {
                "code": "",
                "interpreter": "bash",
                "distribution": "Ubuntu",
                "release": "14.04"
            }
        })['id']
        dxpy.upload_string("foo", name="afile")
        dxrecord = dxpy.new_dxrecord(name="arecord")
        dxrecord.close()

        dxapp_json = {
            "name": "all_vars",
            "title": "all_vars",
            "summary": "all_vars",
            "dxapi": "1.0.0",
            "version": "0.0.1",
            "categories": [],
            "inputSpec": [],
            "outputSpec": []
        }

        classes = [
            'applet', 'record', 'file', 'boolean', 'int', 'float', 'string',
            'hash', 'array:applet', 'array:record', 'array:file',
            'array:boolean', 'array:int', 'array:float', 'array:string'
        ]

        for classname in classes:
            dxapp_json['inputSpec'].append({
                "name":
                "required_" + classname.replace(":", "_"),
                "class":
                classname,
                "optional":
                False
            })
            # Note: marking outputs as optional so that empty arrays
            # will be acceptable; keeping names the same (as required)
            # in order to allow pass-through from input variables
            dxapp_json['outputSpec'].append({
                "name":
                "required_" + classname.replace(":", "_"),
                "class":
                classname,
                "optional":
                True
            })
            dxapp_json['inputSpec'].append({
                "name":
                "optional_" + classname.replace(":", "_"),
                "class":
                classname,
                "optional":
                True
            })

        cmdline_args = [
            '-irequired_applet=anapplet', '-irequired_array_applet=anapplet',
            '-irequired_record=arecord', '-irequired_array_record=arecord',
            '-irequired_file=afile', '-irequired_array_file=afile',
            '-irequired_boolean=true', '-irequired_array_boolean=true',
            '-irequired_array_boolean=false', '-irequired_int=32',
            '-irequired_array_int=42', '-irequired_float=3.4',
            '-irequired_array_float=.42', '-irequired_string=foo',
            '-irequired_array_string=bar', '-irequired_hash={"foo":"bar"}'
        ]
        for lang in supported_languages:
            appdir = create_app_dir_with_dxapp_json(dxapp_json, lang)

            # See PTFM-13697 for CentOS 5 details
            if testutil.TEST_RUN_JOBS and not testutil.host_is_centos_5():
                # Now actually make it an applet and run it
                applet_name = dxapp_json['name'] + '-' + lang
                subprocess.check_output(
                    ['dx', 'build', appdir, '--destination', applet_name])
                subprocess.check_output(
                    ['dx', 'run', applet_name, '-y', '--wait'] + cmdline_args)
コード例 #34
0
    def test_file_download(self):
        '''
        This test assumes a well-formed input spec and tests that the
        templates created automatically download the files only if
        they are available and does something sensible otherwise.
        '''
        print("Setting current project to", self.project)
        dxpy.WORKSPACE_ID = self.project
        dxpy.PROJECT_CONTEXT_ID = self.project
        # Make a couple files for testing
        dxfile = dxpy.upload_string("foo", name="afile")
        dxpy.upload_string("foobar", name="otherfile")

        dxapp_json = {
            "name":
            "files",
            "title":
            "files",
            "summary":
            "files",
            "dxapi":
            "1.0.0",
            "version":
            "0.0.1",
            "categories": [],
            "inputSpec": [{
                "name": "required_file",
                "class": "file",
                "optional": False
            }, {
                "name": "optional_file",
                "class": "file",
                "optional": True
            }, {
                "name": "default_file",
                "class": "file",
                "optional": True,
                "default": {
                    "$dnanexus_link": dxfile.get_id()
                }
            }, {
                "name": "required_file_array",
                "class": "array:file",
                "optional": False
            }, {
                "name": "optional_file_array",
                "class": "array:file",
                "optional": True
            }],
            "outputSpec": []
        }

        for lang in supported_languages:
            appdir = create_app_dir_with_dxapp_json(dxapp_json, lang)
            # Test with bare-minimum of inputs
            output = subprocess.check_output([
                'dx-run-app-locally', appdir, '-irequired_file=afile',
                '-irequired_file_array=afile'
            ])
            print(output)
            self.assertIn("App finished successfully", output)
            self.assertIn("Local job workspaces can be found in:", output)
            local_workdir = output.split(
                "Local job workspaces can be found in:")[1].strip()
            file_list = os.listdir(os.path.join(local_workdir, 'localjob-0'))
            self.assertIn("required_file", file_list)
            self.assertEqual(
                os.path.getsize(
                    os.path.join(local_workdir, 'localjob-0',
                                 'required_file')), 3)
            self.assertNotIn("optional_file", file_list)
            self.assertIn("default_file", file_list)
            self.assertEqual(
                os.path.getsize(
                    os.path.join(local_workdir, 'localjob-0', 'default_file')),
                3)

            # Test with giving an input to everything
            output = subprocess.check_output([
                'dx-run-app-locally', appdir, '-irequired_file=afile',
                '-ioptional_file=afile', '-idefault_file=otherfile',
                '-irequired_file_array=afile', '-ioptional_file_array=afile'
            ])
            print(output)
            self.assertIn("App finished successfully", output)
            self.assertIn("Local job workspaces can be found in:", output)
            local_workdir = output.split(
                "Local job workspaces can be found in:")[1].strip()
            file_list = os.listdir(os.path.join(local_workdir, 'localjob-0'))
            self.assertIn("required_file", file_list)
            self.assertEqual(
                os.path.getsize(
                    os.path.join(local_workdir, 'localjob-0',
                                 'required_file')), 3)
            self.assertIn("optional_file", file_list)
            self.assertEqual(
                os.path.getsize(
                    os.path.join(local_workdir, 'localjob-0',
                                 'optional_file')), 3)
            self.assertIn("default_file", file_list)
            self.assertEqual(
                os.path.getsize(
                    os.path.join(local_workdir, 'localjob-0', 'default_file')),
                6)
            concatenated_file_list = ",".join(file_list)
            # Different languages have different naming conventions
            # right now, so just look for the array variable name
            self.assertIn("required_file_array", concatenated_file_list)
            self.assertIn("optional_file_array", concatenated_file_list)
コード例 #35
0
 def test_sub_jobs(self):
     '''  Tests a bash script that generates sub-jobs '''
     dxpy.upload_string("1234", wait_on_close=True, name="A.txt")
     dxpy.upload_string("ABCD", wait_on_close=True, name="B.txt")
     self.run_test_app_locally('with-subjobs',
                               ["-ifiles=A.txt", "-ifiles=B.txt"])