Example #1
0
    def setUp(self):
        setUpTempProjects(self)
        self.dxapplet = dxpy.DXApplet()
        self.dxapplet.new(name="identity-record",
                          dxapi="1.04",
                          inputSpec=[{"name": "record", "class": "record"}
                                     ],
                          outputSpec=[{"name": "record", "class": "record"}],
                          runSpec={"code": '''
@dxpy.entry_point('main')
def main(record):
    return {'record': record}''',
                                   "interpreter": "python2.7"})
        dxrecord = dxpy.new_dxrecord(name='workflowname',
                                     details={"stages": [{"job": None,
                                                          "inputs": {},
                                                          "app": dxpy.dxlink(self.dxapplet),
                                                          "id": "stage0-id"
                                                          },
                                                         {"job": None,
                                                          "inputs": {"record": {"connectedTo": {"output": "record",
                                                                                                "stage": "stage0-id"}
                                                                                }
                                                                     },
                                                          "app": dxpy.dxlink(self.dxapplet),
                                                          "id": "stage1-id"
                                                          }],
                                              "version": 5},
                                     types=['pipeline'])
        self.workflow = dxpy.DXWorkflow(dxrecord.get_id())
        self.closedrecord = dxpy.new_dxrecord(name='a record')
        self.closedrecord.close()
Example #2
0
 def test_init_from(self):
     dxrecord = dxpy.new_dxrecord(details={"foo": "bar"}, types=["footype"],
                                  tags=["footag"])
     second_record = dxpy.new_dxrecord(init_from=dxrecord, types=["bartype"])
     first_desc = dxrecord.describe(incl_details=True)
     second_desc = second_record.describe(incl_details=True)
     self.assertEqual(first_desc["details"], second_desc["details"])
     self.assertEqual(first_desc["name"], second_desc["name"])
     self.assertEqual(first_desc["tags"], second_desc["tags"])
     self.assertFalse(first_desc["types"] == second_desc["types"])
Example #3
0
    def test_describe_dxrecord(self):
        dxrecord = dxpy.new_dxrecord()
        desc = dxrecord.describe()
        self.assertEqual(desc["project"], proj_id)
        self.assertEqual(desc["id"], dxrecord.get_id())
        self.assertEqual(desc["class"], "record")
        self.assertEqual(desc["types"], [])
        self.assertTrue("created" in desc)
        self.assertEqual(desc["state"], "open")
        self.assertEqual(desc["hidden"], False)
        self.assertEqual(desc["links"], [])
        self.assertEqual(desc["name"], dxrecord.get_id())
        self.assertEqual(desc["folder"], "/")
        self.assertEqual(desc["tags"], [])
        self.assertTrue("modified" in desc)
        self.assertFalse("properties" in desc)
        self.assertFalse("details" in desc)

        desc = dxrecord.describe(incl_properties=True)
        self.assertEqual(desc["properties"], {})

        desc = dxrecord.describe(incl_details=True)
        self.assertEqual(desc["details"], {})

        types = ["mapping", "foo"]
        tags = ["bar", "baz"]
        properties = {"project": "cancer"}
        hidden = True
        details = {"$dnanexus_link": dxrecord.get_id()}
        folder = "/a"
        name = "Name"

        second_dxrecord = dxpy.new_dxrecord(types=types,
                                            properties=properties,
                                            hidden=hidden,
                                            details=details,
                                            tags=tags,
                                            folder=folder,
                                            parents=True,
                                            name=name)
        desc = second_dxrecord.describe(True, True)
        self.assertEqual(desc["project"], proj_id)
        self.assertEqual(desc["id"], second_dxrecord.get_id())
        self.assertEqual(desc["class"], "record")
        self.assertEqual(desc["types"], types)
        self.assertTrue("created" in desc)
        self.assertEqual(desc["state"], "open")
        self.assertEqual(desc["hidden"], hidden)
        self.assertEqual(desc["links"], [dxrecord.get_id()])
        self.assertEqual(desc["name"], name)
        self.assertEqual(desc["folder"], "/a")
        self.assertEqual(desc["tags"], tags)
        self.assertTrue("modified" in desc)
        self.assertEqual(desc["properties"], properties)
        self.assertEqual(desc["details"], details)
Example #4
0
def make_indexed_reference(job_inputs):
    logging.info("Indexing reference genome")

    run_shell("dx-contigset-to-fasta %s reference.fasta" % job_inputs['reference']['$dnanexus_link'])
    ref_details = dxpy.DXRecord(job_inputs['reference']['$dnanexus_link']).get_details()
    ref_name = dxpy.DXRecord(job_inputs['reference']['$dnanexus_link']).describe()['name']

    # TODO: test if the genomes near the boundary work OK
    if sum(ref_details['contigs']['sizes']) < 2*1024*1024*1024:
        subprocess.check_call("bwa index -a is reference.fasta", shell=True)
    else:
        subprocess.check_call("bwa index -a bwtsw reference.fasta", shell=True)

    subprocess.check_call("XZ_OPT=-0 tar -cJf reference.tar.xz reference.fasta*", shell=True)
    indexed_ref_dxfile = dxpy.upload_local_file("reference.tar.xz", hidden=True, wait_on_close=True)

    indexed_ref_record = dxpy.new_dxrecord(name=ref_name + " (indexed for BWA)",
                                           types=["BwaLetterContigSetV3"],
                                           details={'index_archive': dxpy.dxlink(indexed_ref_dxfile.get_id()),
                                                    'original_contigset': job_inputs['reference']})
    indexed_ref_record.close()

    # TODO: dxpy project workspace convenience functions
# FIXME
#    if "projectWorkspace" in job:
#        indexed_ref_record.clone(job["projectWorkspace"])

    return indexed_ref_record
Example #5
0
    def test_visibility_of_dxrecord(self):
        dxrecord = dxpy.new_dxrecord()
        dxrecord.hide()
        self.assertEqual(dxrecord.describe()["hidden"], True)

        dxrecord.unhide()
        self.assertEqual(dxrecord.describe()["hidden"], False)
Example #6
0
    def test_rename_dxrecord(self):
        dxrecord = dxpy.new_dxrecord()
        dxrecord.rename("newname")
        self.assertEqual(dxrecord.describe()["name"], "newname")

        dxrecord.rename("secondname")
        self.assertEqual(dxrecord.describe()["name"], "secondname")
Example #7
0
 def test_run_dxapplet(self):
     dxapplet = dxpy.DXApplet()
     dxapplet.new(name="test_applet",
                   dxapi="1.04",
                   inputSpec=[{"name": "chromosomes", "class": "record"},
                              {"name": "rowFetchChunk", "class": "int"}
                              ],
                   outputSpec=[{"name": "mappings", "class": "record"}],
                   runSpec={"code": "def main(): pass",
                            "interpreter": "python2.7",
                            "execDepends": [{"name": "python-numpy"}]})
     dxrecord = dxpy.new_dxrecord()
     dxrecord.close()
     prog_input = {"chromosomes": {"$dnanexus_link": dxrecord.get_id()},
                   "rowFetchChunk": 100}
     dxjob = dxapplet.run(applet_input=prog_input)
     jobdesc = dxjob.describe()
     self.assertEqual(jobdesc["class"], "job")
     self.assertEqual(jobdesc["function"], "main")
     self.assertEqual(jobdesc["originalInput"], prog_input)
     self.assertEqual(jobdesc["originJob"], jobdesc["id"])
     self.assertEqual(jobdesc["parentJob"], None)
     self.assertEqual(jobdesc["applet"], dxapplet.get_id())
     self.assertEqual(jobdesc["project"], dxapplet.get_proj_id())
     self.assertTrue("state" in jobdesc)
     self.assertTrue("created" in jobdesc)
     self.assertTrue("modified" in jobdesc)
     self.assertTrue("launchedBy" in jobdesc)
     self.assertTrue("output" in jobdesc)
     dxjob.terminate()
Example #8
0
 def test_escaping(self):
     # TODO: test backslash-escaping behavior for use with dx ls
     # (aside from special characters, escape the string so that
     # "*" and "?" aren't used as part of the glob pattern, escape
     # "/")
     r = dxpy.new_dxrecord(name='my <<awesome.>> record !@#$%^&*(){}[]|;:?`')
     self.assert_completion('dx ls my', 'my \\<\\<awesome.\\>\\> record \\!\\@#$%^\\&*\\(\\){}[]\\|\\;\\\\:?\\` ')
Example #9
0
 def find_jobs(self):
     dxapplet = dxpy.DXApplet()
     dxapplet.new(name="test_applet",
                  inputSpec=[{"name": "chromosomes", "class": "record"},
                             {"name": "rowFetchChunk", "class": "int"}
                             ],
                  outputSpec=[{"name": "mappings", "class": "record"}],
                  runSpec={"code": "def main(): pass",
                           "interpreter": "python2.7",
                           "execDepends": [{"name": "python-numpy"}]})
     dxrecord = dxpy.new_dxrecord()
     dxrecord.close()
     prog_input = {"chromosomes": {"$dnanexus_link": dxrecord.get_id()},
                   "rowFetchChunk": 100}
     dxjob = dxapplet.run(applet_input=prog_input)
     results = list(dxpy.find_jobs(launched_by='user-000000000000000000000000',
                                   applet=dxapplet,
                                   project=dxapplet.get_proj_id(),
                                   origin_job=dxjob.get_id(),
                                   parent_job=None,
                                   modified_after=0,
                                   describe=True))
     self.assertEqual(len(results), 1)
     result = results[0]
     self.assertEqual(result["id"], dxjob.get_id())
     self.assertTrue("describe" in result)
     self.assertEqual(result["describe"]["id"], dxjob.get_id())
     self.assertEqual(result["describe"]["class"], "job")
     self.assertEqual(result["describe"]["applet"], dxapplet.get_id())
     self.assertEqual(result["describe"]["project"], dxapplet.get_proj_id())
     self.assertEqual(result["describe"]["originJob"], dxjob.get_id())
     self.assertEqual(result["describe"]["parentJob"], None)
def make_indexed_reference( ref_ID ):

    run_shell("dx-contigset-to-fasta %s reference.fasta" % ref_ID)
    ref_details = dxpy.DXRecord(ref_ID).get_details()
    ref_name = dxpy.DXRecord(ref_ID).describe()['name']

    # call bowtie2-build
    run_shell("bowtie2-build reference.fasta indexed_ref")
    # package it into an archive for uploading
    run_shell("XZ_OPT=-0 tar -cJf reference.tar.xz indexed_ref*")

    indexed_ref_dxfile = dxpy.upload_local_file("reference.tar.xz", hidden=True, wait_on_close=True)

    indexed_ref_record = dxpy.new_dxrecord(name=ref_name + " (indexed for Bowtie2)",
                                           types=["BowtieLetterContigSetV2"],
                                           details={'index_archive': dxpy.dxlink(indexed_ref_dxfile.get_id()),
                                                    'original_contigset': dxpy.dxlink(ref_ID)})
    indexed_ref_record.close()

    '''
    # TODO: dxpy project workspace convenience functions
    if "projectWorkspace" in job:
        indexed_ref_record.clone(job["projectWorkspace"])
    '''

    return indexed_ref_record.get_id()
Example #11
0
    def test_clone(self):
        dxproject = dxpy.DXProject()
        dxproject.new_folder("/a/b/c/d", parents=True)
        dxrecords = []
        for i in range(4):
            dxrecords.append(dxpy.new_dxrecord(name=("record-%d" % i)))

        with self.assertRaises(DXAPIError):
            dxproject.clone(second_proj_id,
                            destination="/",
                            objects=[dxrecords[0].get_id(), dxrecords[1].get_id()],
                            folders=["/a/b/c/d"])

        dxrecords[0].close()
        dxrecords[1].close()
        dxproject.clone(second_proj_id,
                        destination="/",
                        objects=[dxrecords[0].get_id(), dxrecords[1].get_id()],
                        folders=["/a/b/c/d"])

        second_proj = dxpy.DXProject(second_proj_id)
        listf = second_proj.list_folder()
        self.assertEqual(get_objects_from_listf(listf).sort(),
                         [dxrecords[0].get_id(), dxrecords[1].get_id()].sort())
        self.assertEqual(listf["folders"], ["/d"])
def make_indexed_reference(ref_ID):

    run_shell("dx-contigset-to-fasta %s reference.fasta" % ref_ID)
    ref_details = dxpy.DXRecord(ref_ID).get_details()
    ref_name = dxpy.DXRecord(ref_ID).describe()['name']

    # call bowtie2-build
    run_shell("bowtie2-build reference.fasta indexed_ref")
    # package it into an archive for uploading
    run_shell("XZ_OPT=-0 tar -cJf reference.tar.xz indexed_ref*")

    indexed_ref_dxfile = dxpy.upload_local_file("reference.tar.xz",
                                                hidden=True,
                                                wait_on_close=True)

    indexed_ref_record = dxpy.new_dxrecord(
        name=ref_name + " (indexed for Bowtie2)",
        types=["BowtieLetterContigSetV2"],
        details={
            'index_archive': dxpy.dxlink(indexed_ref_dxfile.get_id()),
            'original_contigset': dxpy.dxlink(ref_ID)
        })
    indexed_ref_record.close()
    '''
    # TODO: dxpy project workspace convenience functions
    if "projectWorkspace" in job:
        indexed_ref_record.clone(job["projectWorkspace"])
    '''

    return indexed_ref_record.get_id()
Example #13
0
def copy_across_regions(local_path, record, dest_region, dest_proj,
                        dest_folder):
    print("copy_across_regions {} {} {} {}:{}".format(local_path,
                                                      record.get_id(),
                                                      dest_region,
                                                      dest_proj.get_id(),
                                                      dest_folder))
    # check if we haven't already created this record, and uploaded the file
    dest_asset = find_asset(dest_proj, dest_folder)
    if dest_asset is not None:
        print("Already copied to region {}".format(dest_region))
        return AssetDesc(dest_region, dest_asset.get_id(), dest_proj)

    # upload
    dest_proj.new_folder(dest_folder, parents=True)
    dxfile = upload_local_file(local_path, dest_proj, dest_folder, hidden=True)
    fid = dxfile.get_id()
    dest_asset = dxpy.new_dxrecord(name=record.name,
                                   types=['AssetBundle'],
                                   details={'archiveFileId': dxpy.dxlink(fid)},
                                   properties=record.get_properties(),
                                   project=dest_proj.get_id(),
                                   folder=dest_folder,
                                   close=True)
    return AssetDesc(dest_region, dest_asset.get_id(), dest_proj)
Example #14
0
    def test_new_list_remove_folders(self):
        dxproject = dxpy.DXProject()
        listf = dxproject.list_folder()
        self.assertEqual(listf["folders"], [])
        self.assertEqual(listf["objects"], [])

        dxrecord = dxpy.new_dxrecord()
        dxproject.new_folder("/a/b/c/d", parents=True)
        listf = dxproject.list_folder()
        self.assertEqual(listf["folders"], ["/a"])
        self.assertEqual(listf["objects"], [{"id": dxrecord.get_id()}])
        listf = dxproject.list_folder("/a")
        self.assertEqual(listf["folders"], ["/a/b"])
        self.assertEqual(listf["objects"], [])
        listf = dxproject.list_folder("/a/b")
        self.assertEqual(listf["folders"], ["/a/b/c"])
        listf = dxproject.list_folder("/a/b/c")
        self.assertEqual(listf["folders"], ["/a/b/c/d"])
        listf = dxproject.list_folder("/a/b/c/d")
        self.assertEqual(listf["folders"], [])

        with self.assertRaises(DXAPIError):
            dxproject.remove_folder("/a")
        dxproject.remove_folder("/a/b/c/d")
        dxproject.remove_folder("/a//b////c/")
        dxproject.remove_folder("/a/b")
        dxproject.remove_folder("/a")
        dxrecord.remove()
        listf = dxproject.list_folder()
        self.assertEqual(listf["objects"], [])
Example #15
0
 def test_set_id(self):
     dxrecord = dxpy.new_dxrecord()
     second_dxrecord = dxpy.DXRecord()
     second_dxrecord.set_ids(dxrecord.get_id(), dxrecord.get_proj_id())
     self.assertEqual(second_dxrecord.get_id(), dxrecord.get_id())
     self.assertEqual(second_dxrecord.get_proj_id(), proj_id)
     dxrecord.remove()
Example #16
0
    def test_get_appet_with_asset(self):
        bundle_name = "test-bundle-depends.tar.gz"
        bundle_tmp_dir = tempfile.mkdtemp()
        os.mkdir(os.path.join(bundle_tmp_dir, "a"))
        with open(os.path.join(bundle_tmp_dir, 'a', 'foo.txt'), 'w') as file_in_bundle:
            file_in_bundle.write('foo\n')
        subprocess.check_call(['tar', '-czf', os.path.join(bundle_tmp_dir, bundle_name),
                               '-C', os.path.join(bundle_tmp_dir, 'a'), '.'])
        bundle_file = dxpy.upload_local_file(filename=os.path.join(bundle_tmp_dir, bundle_name),
                                             project=self.project,
                                             wait_on_close=True)

        asset_file = dxpy.upload_local_file(filename=os.path.join(bundle_tmp_dir, bundle_name),
                                            project=self.project,
                                            wait_on_close=True)

        dxrecord_details = {"archiveFileId": {"$dnanexus_link": asset_file.get_id()}}
        dxrecord = dxpy.new_dxrecord(project=self.project, types=["AssetBundle"], details=dxrecord_details,
                                     name='asset-lib-test', properties={"version": "0.0.1"})
        dxrecord.close()
        asset_bundle_id = dxrecord.get_id()

        asset_file.set_properties({"AssetBundle": asset_bundle_id})

        code_str = """#!/bin/bash
                    main(){
                        echo 'Hello World'
                    }
                    """
        app_spec = {
            "name": "asset_depends",
            "dxapi": "1.0.0",
            "runSpec": {
                "code": code_str,
                "interpreter": "bash",
                "assetDepends":  [{"id": asset_bundle_id}],
                "bundledDepends": [{"name": bundle_name, "id": {"$dnanexus_link": bundle_file.get_id()}}]
            },
            "inputSpec": [],
            "outputSpec": [],
            "version": "1.0.0"
        }
        app_dir = self.write_app_directory("asset_depends", json.dumps(app_spec))
        asset_applet_id = json.loads(run("dx build --json {app_dir}".format(app_dir=app_dir)))["id"]
        with chdir(tempfile.mkdtemp()):
            run("dx get --omit-resources " + asset_applet_id)
            self.assertTrue(os.path.exists("asset_depends"))
            self.assertFalse(os.path.exists(os.path.join("asset_depends", "resources")))
            self.assertTrue(os.path.exists(os.path.join("asset_depends", "dxapp.json")))

            applet_spec = json.load(open(os.path.join("asset_depends", "dxapp.json")))
            self.assertEqual([{"name": "asset-lib-test",
                               "project": self.project,
                               "folder": "/",
                               "version": "0.0.1"}
                              ],
                             applet_spec["runSpec"]["assetDepends"])
            self.assertEqual([{"name": bundle_name, "id": {"$dnanexus_link": bundle_file.get_id()}}],
                             applet_spec["runSpec"]["bundledDepends"])
Example #17
0
    def test_job_from_app(self):
        test_json = dxpy.new_dxrecord({"details": {"jobsuccess": False} })
        job_id_json = dxpy.new_dxrecord({"details": {"jobid": None} })
        dxapplet = dxpy.new_dxapplet(codefile='test_dxjob.py')
        dxappletjob = dxapplet.run({"json_dxid": test_json.get_id(),
                                      "job_id_json": job_id_json.get_id()})
        dxappletjob.wait_on_done()

        dxjob_id = job_id_json.get_details()["jobid"]
        self.assertIsNotNone(dxjob_id)
        dxjob = dxpy.DXJob(dxjob_id)
        dxjob.wait_on_done()

        self.assertEqual(test_json.get_details(), {"jobsuccess":True})

        test_json.remove()
        dxapplet.remove()
Example #18
0
    def test_types_of_dxrecord(self):
        dxrecord = dxpy.new_dxrecord()
        types = ["foo", "othertype"]
        dxrecord.add_types(types)
        self.assertEqual(dxrecord.describe()["types"], types)

        dxrecord.remove_types(["foo"])
        self.assertEqual(dxrecord.describe()["types"], ["othertype"])
Example #19
0
    def test_tags_of_dxrecord(self):
        dxrecord = dxpy.new_dxrecord()
        tags = ["foo", "othertag"]
        dxrecord.add_tags(tags)
        self.assertEqual(dxrecord.describe()["tags"], tags)

        dxrecord.remove_tags(["foo"])
        self.assertEqual(dxrecord.describe()["tags"], ["othertag"])
Example #20
0
    def test_set_properties_of_dxrecord(self):
        dxrecord = dxpy.new_dxrecord()
        properties = {"project": "cancer project", "foo": "bar"}
        dxrecord.set_properties(properties)
        desc = dxrecord.describe(True)
        self.assertEqual(desc["properties"], properties)

        dxrecord.set_properties({"project": None})
        self.assertEqual(dxrecord.describe(True)["properties"], {"foo": "bar"})
Example #21
0
    def setUpClass(cls):
        if 'DXTEST_FUSE' not in os.environ:
            return
        proj_name = u"dxclient_test_pröject"
        cls.project_id = subprocess.check_output(u"dx new project '{p}' --brief".format(p=proj_name), shell=True).strip()
        dxpy.config["DX_PROJECT_CONTEXT_ID"] = cls.project_id
        dxpy.config["DX_CLI_WD"] = '/'
        cls.project = dxpy.DXProject(cls.project_id)
        dxpy.config.__init__(suppress_warning=True)

        subprocess.check_call(['dx', 'mkdir', 'foo'])
        subprocess.check_call(['dx', 'mkdir', 'bar'])
        dxpy.upload_local_file(__file__, wait_on_close=True)
        dxpy.new_dxrecord(name="A/B testing")

        cls.mountpoint = tempfile.mkdtemp()
        # TODO: redirect logs to someplace in case we need to debug
        # problems in these tests
        subprocess.check_call(['dx-mount', cls.mountpoint])
Example #22
0
 def test_remove_objects(self):
     dxproject = dxpy.DXProject()
     dxrecord = dxpy.new_dxrecord()
     listf = dxproject.list_folder()
     self.assertEqual(get_objects_from_listf(listf), [dxrecord.get_id()])
     dxproject.remove_objects([dxrecord.get_id()])
     listf = dxproject.list_folder()
     self.assertEqual(listf["objects"], [])
     with self.assertRaises(DXAPIError):
         dxrecord.describe()
Example #23
0
 def test_move(self):
     dxproject = dxpy.DXProject()
     dxproject.new_folder("/a/b/c/d", parents=True)
     dxrecord = dxpy.new_dxrecord()
     dxrecord.move("/a/b/c")
     listf = dxproject.list_folder()
     self.assertEqual(listf["objects"], [])
     listf = dxproject.list_folder("/a/b/c")
     self.assertEqual(get_objects_from_listf(listf), [dxrecord.get_id()])
     desc = dxrecord.describe()
     self.assertEqual(desc["folder"], "/a/b/c")
Example #24
0
    def setUpClass(cls):
        if 'DXTEST_FUSE' not in os.environ:
            return
        proj_name = u"dxclient_test_pröject"
        cls.project_id = subprocess.check_output(
            u"dx new project '{p}' --brief".format(p=proj_name),
            shell=True).strip()
        dxpy.config["DX_PROJECT_CONTEXT_ID"] = cls.project_id
        dxpy.config["DX_CLI_WD"] = '/'
        cls.project = dxpy.DXProject(cls.project_id)
        dxpy.config.__init__(suppress_warning=True)

        subprocess.check_call(['dx', 'mkdir', 'foo'])
        subprocess.check_call(['dx', 'mkdir', 'bar'])
        dxpy.upload_local_file(__file__, wait_on_close=True)
        dxpy.new_dxrecord(name="A/B testing")

        cls.mountpoint = tempfile.mkdtemp()
        # TODO: redirect logs to someplace in case we need to debug
        # problems in these tests
        subprocess.check_call(['dx-mount', cls.mountpoint])
Example #25
0
 def find_data_objs(self):
     dxrecord = dxpy.new_dxrecord()
     results = list(dxpy.search.find_data_objects(state="open"))
     self.assertEqual(len(results), 1)
     self.assertEqual(results[0], {"project": proj_id,
                                   "id": dxrecord.get_id()})
     results = list(dxpy.search.find_data_objects(state="closed"))
     self.assertEqual(len(results), 0)
     dxrecord.close()
     results = list(dxpy.search.find_data_objects(state="closed"))
     self.assertEqual(len(results), 1)
     self.assertEqual(results[0], {"project": proj_id,
                                   "id": dxrecord.get_id()})
Example #26
0
    def test_get_set_details(self):
        details_no_link = {"foo": "bar"}

        dxrecord = dxpy.new_dxrecord()
        dxrecord.set_details(details_no_link)
        self.assertEqual(dxrecord.get_details(), details_no_link)
        self.assertEqual(dxrecord.describe()["links"], [])

        details_two_links = [{"$dnanexus_link": dxrecord.get_id()},
                             {"$dnanexus_link": dxrecord.get_id()}]

        dxrecord.set_details(details_two_links)
        self.assertEqual(dxrecord.get_details(), details_two_links)
        self.assertEqual(dxrecord.describe()["links"], [dxrecord.get_id()])
Example #27
0
    def test_close_dxrecord(self):
        dxrecord = dxpy.new_dxrecord()
        dxrecord.close()
        with self.assertRaises(DXAPIError):
            dxrecord.hide()
        with self.assertRaises(DXAPIError):
            dxrecord.set_details(["foo"])

        self.assertEqual(dxrecord.get_details(), {})
        dxrecord.rename("newname")
        self.assertEqual(dxrecord.describe()["name"], "newname")

        dxrecord.rename("secondname")
        self.assertEqual(dxrecord.describe()["name"], "secondname")
Example #28
0
def create_record(destination, file_ids, width=None, height=None):
    """
    Creates a master record for the HTML report; this doesn't contain contain the actual HTML, but reports
    are required to be records rather than files and we can link more than one HTML file to a report
    """
    [project, path, name] = parse_destination(destination)
    files = [dxpy.dxlink(file_id) for file_id in file_ids]
    details = {"files": files}
    if width:
        details["width"] = width
    if height:
        details["height"] = height
    try:
        dxrecord = dxpy.new_dxrecord(project=project, folder=path, types=["Report", "HTMLReport"], details=details, name=name)
        dxrecord.close()
        return dxrecord.get_id()
    except dxpy.DXAPIError as ex:
        parser.error("Could not create an HTML report record on DNAnexus servers! ({ex})".format(ex=ex))
def create_record(destination, file_ids, width=None, height=None):
    """
    Creates a master record for the HTML report; this doesn't contain contain the actual HTML, but reports
    are required to be records rather than files and we can link more than one HTML file to a report
    """
    [project, path, name] = parse_destination(destination)
    files = [dxpy.dxlink(file_id) for file_id in file_ids]
    details = {"files": files}
    if width:
        details["width"] = width
    if height:
        details["height"] = height
    try:
        dxrecord = dxpy.new_dxrecord(project=project, folder=path, types=["Report", "HTMLReport"], details=details, name=name)
        dxrecord.close()
        return dxrecord.get_id()
    except dxpy.DXAPIError as ex:
        parser.error("Could not create an HTML report record on DNAnexus servers! ({ex})".format(ex=ex))
Example #30
0
    def test_get_handler(self):
        dxpy.set_workspace_id(self.second_proj_id)

        dxrecord = dxpy.new_dxrecord(project=self.proj_id)
        # Simple DXLink
        dxlink = {'$dnanexus_link': dxrecord.get_id()}
        handler = dxpy.get_handler(dxlink)
        self.assertEqual(handler.get_id(), dxrecord.get_id())
        # Default project is not going to be the correct one
        self.assertNotEqual(handler.get_proj_id(), self.proj_id)

        # Extended DXLink
        dxlink = {'$dnanexus_link': {'id': dxrecord.get_id(),
                                     'project': self.proj_id}}
        handler = dxpy.get_handler(dxlink)
        self.assertEqual(handler.get_id(), dxrecord.get_id())
        self.assertEqual(handler.get_proj_id(), self.proj_id)

        # Handle project IDs
        dxproject = dxpy.get_handler(self.proj_id)
Example #31
0
    def test_move(self):
        dxproject = dxpy.DXProject()
        dxproject.new_folder("/a/b/c/d", parents=True)
        dxrecords = []
        for i in range(4):
            dxrecords.append(dxpy.new_dxrecord(name=("record-%d" % i)))
        dxproject.move(destination="/a",
                       objects=[dxrecords[0].get_id(), dxrecords[1].get_id()],
                       folders=["/a/b/c/d"])
        listf = dxproject.list_folder()
        self.assertEqual(get_objects_from_listf(listf).sort(),
                         [dxrecords[2].get_id(), dxrecords[3].get_id()].sort())
        self.assertEqual(listf["folders"], ["/a"])

        listf = dxproject.list_folder("/a")
        self.assertEqual(get_objects_from_listf(listf).sort(),
                         [dxrecords[0].get_id(), dxrecords[1].get_id()].sort())
        self.assertEqual(listf["folders"], ["/a/b", "/a/d"])

        desc = dxrecords[0].describe()
        self.assertEqual(desc["folder"], "/a")
Example #32
0
def makeGenomeObject():
    # NOTE: for these tests we don't upload a full sequence file (which
    # would be huge, for hg19). Importers and exporters that need to
    # look at the full sequence file can't be run on this test
    # contigset.
    sequence_file = dxpy.upload_string("", hidden=True)

    genome_record = dxpy.new_dxrecord()
    genome_record.set_details({
        "flat_sequence_file": {"$dnanexus_link": sequence_file.get_id()},
        "contigs": {
            "offsets": [0],
            "names": ["chr1"],
            "sizes": [249250621]
        }
    })
    genome_record.add_types(["ContigSet"])
    genome_record.close()

    sequence_file.wait_on_close()

    return genome_record.get_id()
Example #33
0
    def test_clone(self):
        dxrecord = dxpy.new_dxrecord(name="firstname", tags=["tag"])

        with self.assertRaises(DXAPIError):
            second_dxrecord = dxrecord.clone(second_proj_id)
        dxrecord.close()

        second_dxrecord = dxrecord.clone(second_proj_id)
        second_dxrecord.rename("newname")

        first_desc = dxrecord.describe()
        second_desc = second_dxrecord.describe()

        self.assertEqual(first_desc["id"], dxrecord.get_id())
        self.assertEqual(second_desc["id"], dxrecord.get_id())
        self.assertEqual(first_desc["project"], proj_id)
        self.assertEqual(second_desc["project"], second_proj_id)
        self.assertEqual(first_desc["name"], "firstname")
        self.assertEqual(second_desc["name"], "newname")
        self.assertEqual(first_desc["tags"], ["tag"])
        self.assertEqual(second_desc["tags"], ["tag"])
        self.assertEqual(first_desc["created"], second_desc["created"])
        self.assertEqual(first_desc["state"], "closed")
        self.assertEqual(second_desc["state"], "closed")
Example #34
0
def main():

    args = parse_args()
    check_input(args)
    run_id = get_run_id(args.run_dir)

    # Set all naming conventions
    REMOTE_RUN_FOLDER = "/" + run_id + "/runs"
    REMOTE_READS_FOLDER = "/" + run_id + "/reads"
    REMOTE_ANALYSIS_FOLDER = "/" + run_id + "/analyses"

    FILE_PREFIX = "run." + run_id + ".lane."

    # Prep log & record names
    lane_info = []

    # If no lanes are specified, set lane to all, otherwise, set to array of lanes
    if not args.num_lanes:
        lanes_to_upload = ["all"]
    else:
        lanes_to_upload = [str(i) for i in range(1, args.num_lanes + 1)]

    for lane in lanes_to_upload:
        lane_prefix = FILE_PREFIX + lane

        lane_info.append({
            "lane":
            lane,
            "prefix":
            lane_prefix,
            "log_path":
            os.path.join(args.log_dir, lane_prefix + ".log"),
            "record_name":
            lane_prefix + ".upload_sentinel",
            "remote_folder":
            get_target_folder(REMOTE_RUN_FOLDER, lane),
            "uploaded":
            False
        })

    # Create upload sentinel for upload, if record already exists, use that
    done_count = 0
    for lane in lane_info:
        lane_num = lane["lane"]
        try:
            old_record = dxpy.find_one_data_object(
                zero_ok=True,
                typename="UploadSentinel",
                name=lane["record_name"],
                project=args.project,
                folder=lane["remote_folder"])
        except dxpy.exceptions.DXSearchError as e:
            raise_error(
                "Encountered an error looking for %s at %s:%s. %s" %
                (lane["record_name"], lane["remote_folder"], args.project, e))

        if old_record:
            lane["dxrecord"] = dxpy.get_handler(old_record["id"],
                                                project=old_record["project"])
            if lane["dxrecord"].describe()["state"] == "closed":
                print_stderr("Run %s, lane %s has already been uploaded" %
                             (run_id, lane_num))
                lane["uploaded"] = True
                done_count += 1
        else:
            properties = {"run_id": run_id, "lanes": lane_num}
            lane["dxrecord"] = dxpy.new_dxrecord(types=["UploadSentinel"],
                                                 project=args.project,
                                                 folder=lane["remote_folder"],
                                                 parents=True,
                                                 name=lane["record_name"],
                                                 properties=properties)

        # upload RunInfo here, before uploading any data, unless it is already uploaded.
        record = lane["dxrecord"]
        properties = record.get_properties()

        runInfo = dxpy.find_one_data_object(zero_ok=True,
                                            name="RunInfo.xml",
                                            project=args.project,
                                            folder=lane["remote_folder"])
        if not runInfo:
            lane["runinfo_file_id"] = upload_single_file(
                args.run_dir + "/RunInfo.xml", args.project,
                lane["remote_folder"], properties)
        else:
            lane["runinfo_file_id"] = runInfo["id"]

        # Upload samplesheet unless samplesheet-delay is specified or it is already uploaded.
        if not args.samplesheet_delay:
            sampleSheet = dxpy.find_one_data_object(
                zero_ok=True,
                name="SampleSheet.csv",
                project=args.project,
                folder=lane["remote_folder"])
            if not sampleSheet:
                lane["samplesheet_file_id"] = upload_single_file(
                    args.run_dir + "/SampleSheet.csv", args.project,
                    lane["remote_folder"], properties)
            else:
                lane["samplesheet_file_id"] = sampleSheet["id"]

    if done_count == len(lane_info):
        print_stderr("EXITING: All lanes already uploaded")
        sys.exit(1)

    seconds_to_wait = (dxpy.utils.normalize_timedelta(args.run_duration) /
                       1000 * args.intervals_to_wait)
    print_stderr("Maximum allowable time for run to complete: %d seconds." %
                 seconds_to_wait)

    initial_start_time = time.time()
    # While loop waiting for RTAComplete.txt or RTAComplete.xml
    while not termination_file_exists(args.novaseq, args.run_dir):
        start_time = time.time()
        run_time = start_time - initial_start_time
        # Fail if run time exceeds total time to wait
        if run_time > seconds_to_wait:
            print_stderr(
                "EXITING: Upload failed. Run did not complete after %d seconds (max wait = %ds)"
                % (run_time, seconds_to_wait))
            sys.exit(1)

        # Loop through all lanes in run directory
        for lane in lane_info:
            lane_num = lane["lane"]
            if lane["uploaded"]:
                continue
            run_sync_dir(lane, args)

        # Wait at least the minimum time interval before running the loop again
        cur_time = time.time()
        diff = cur_time - start_time
        if diff < args.sync_interval:
            print_stderr("Sleeping for %d seconds" %
                         (int(args.sync_interval - diff)))
            time.sleep(int(args.sync_interval - diff))

    # Final synchronization, upload data, set details
    for lane in lane_info:
        if lane["uploaded"]:
            continue
        file_ids = run_sync_dir(lane, args, finish=True)
        record = lane["dxrecord"]
        properties = record.get_properties()
        lane["log_file_id"] = upload_single_file(lane["log_path"],
                                                 args.project,
                                                 lane["remote_folder"],
                                                 properties)

        for file_id in file_ids:
            dxpy.get_handler(file_id,
                             project=args.project).set_properties(properties)
        details = {
            'run_id': run_id,
            'lanes': lane["lane"],
            'upload_thumbnails': str(args.upload_thumbnails).lower(),
            'dnanexus_path': args.project + ":" + lane["remote_folder"],
            'tar_file_ids': file_ids
        }

        # Upload sample sheet here, if samplesheet-delay specified
        if args.samplesheet_delay:
            lane["samplesheet_file_id"] = upload_single_file(
                args.run_dir + "/SampleSheet.csv", args.project,
                lane["remote_folder"], properties)

        # ID to singly uploaded file (when uploaded successfully)
        if lane.get("log_file_id"):
            details.update({'log_file_id': lane["log_file_id"]})
        if lane.get("runinfo_file_id"):
            details.update({'runinfo_file_id': lane["runinfo_file_id"]})
        if lane.get("samplesheet_file_id"):
            details.update(
                {'samplesheet_file_id': lane["samplesheet_file_id"]})

        record.set_details(details)

        record.close()

    print_stderr("Run %s successfully streamed!" % (run_id))

    downstream_input = {}
    if args.downstream_input:
        try:
            input_dict = json.loads(args.downstream_input)
        except ValueError as e:
            raise_error(
                "Failed to read downstream input as JSON string. %s. %s" %
                (args.downstream_input, e))

        if not isinstance(input_dict, dict):
            raise_error("Expected a dict for downstream input. Got %s." %
                        input_dict)

        for k, v in list(input_dict.items()):
            if not (isinstance(k, str) and
                    (isinstance(v, str) or isinstance(v, dict))):
                raise_error(
                    "Expected (string) key and (string or dict) value pairs for downstream input. Got (%s)%s (%s)%s"
                    % (type(k), k, type(v), v))

            downstream_input[k] = v

    if args.applet:
        # project verified in check_input, assuming no change
        project = dxpy.get_handler(args.project)

        print_stderr("Initiating downstream analysis: given app(let) id %s" %
                     args.applet)

        for info in lane_info:
            lane = info["lane"]
            record = info["dxrecord"]

            # applet verified in check_input, assume no change
            applet = dxpy.get_handler(args.applet)

            # Prepare output folder, if downstream analysis specified
            reads_target_folder = get_target_folder(REMOTE_READS_FOLDER, lane)
            print_stderr("Creating output folder %s" % (reads_target_folder))

            try:
                project.new_folder(reads_target_folder, parents=True)
            except dxpy.DXError as e:
                raise_error("Failed to create new folder %s. %s" %
                            (reads_target_folder, e))

            # Decide on job name (<executable>-<run_id>)
            job_name = applet.title + "-" + run_id

            # Overwite upload_sentinel_record input of applet to the record of inc upload
            downstream_input["upload_sentinel_record"] = dxpy.dxlink(record)

            # Run specified applet
            job = applet.run(downstream_input,
                             folder=reads_target_folder,
                             project=args.project,
                             name=job_name)

            print_stderr("Initiated job %s from applet %s for lane %s" %
                         (job, args.applet, lane))
    # Close if args.applet

    # args.workflow and args.applet are mutually exclusive
    elif args.workflow:
        # project verified in check_input, assuming no change
        project = dxpy.get_handler(args.project)

        print_stderr("Initiating downstream analysis: given workflow id %s" %
                     args.workflow)

        for info in lane_info:
            lane = info["lane"]
            record = info["dxrecord"]

            # workflow verified in check_input, assume no change
            workflow = dxpy.get_handler(args.workflow)

            # Prepare output folder, if downstream analysis specified
            analyses_target_folder = get_target_folder(REMOTE_ANALYSIS_FOLDER,
                                                       lane)
            print_stderr("Creating output folder %s" %
                         (analyses_target_folder))

            try:
                project.new_folder(analyses_target_folder, parents=True)
            except dxpy.DXError as e:
                raise_error("Failed to create new folder %s. %s" %
                            (analyses_target_folder, e))

            # Decide on job name (<executable>-<run_id>)
            job_name = workflow.title + "-" + run_id

            # Overwite upload_sentinel_record input of applet to the record of inc upload
            downstream_input["0.upload_sentinel_record"] = dxpy.dxlink(record)

            # Run specified applet
            job = workflow.run(downstream_input,
                               folder=analyses_target_folder,
                               project=args.project,
                               name=job_name)

            print_stderr("Initiated analyses %s from workflow %s for lane %s" %
                         (job, args.workflow, lane))

    # Close if args.workflow

    if args.script:
        # script has been validated to be executable earlier, assume no change
        try:
            sub.check_call([args.script, args.run_dir])
        except sub.CalledProcessError as e:
            raise_error("Executable (%s) failed with error %d: %s" %
                        (args.script, e.returncode, e.output))
    def test_var_initialization(self):
        '''
        This test assumes a well-formed input spec and mostly just
        tests that everything compiles and the variable initialization
        code does not throw any errors.
        '''

        print("Setting current project to", self.project)
        dxpy.WORKSPACE_ID = self.project
        dxpy.PROJECT_CONTEXT_ID = self.project

        # Make some data objects for input
        dxapplet = dxpy.api.applet_new({"project": dxpy.WORKSPACE_ID,
                                        "name": "anapplet",
                                        "dxapi": "1.0.0",
                                        "runSpec": {"code": "", "interpreter": "bash"}})['id']
        dxfile = dxpy.upload_string("foo", name="afile")
        dxgtable = dxpy.new_dxgtable(columns=[{"name": "int_col", "type": "int"}], name="agtable")
        dxgtable.add_rows([[3], [0]])
        dxgtable.close(block=True)
        dxrecord = dxpy.new_dxrecord(name="arecord")
        dxrecord.close()

        dxapp_json = {
            "name": "all_vars",
            "title": "all_vars",
            "summary": "all_vars",
            "dxapi": "1.0.0",
            "version": "0.0.1",
            "categories": [],
            "inputSpec": [],
            "outputSpec": []
        }

        classes = ['applet', 'record', 'file', 'gtable',
                   'boolean', 'int', 'float', 'string', 'hash',
                   'array:applet', 'array:record', 'array:file', 'array:gtable',
                   'array:boolean', 'array:int', 'array:float', 'array:string']

        for classname in classes:
            dxapp_json['inputSpec'].append({"name": "required_" + classname.replace(":", "_"),
                                            "class": classname,
                                            "optional": False})
            # Note: marking outputs as optional so that empty arrays
            # will be acceptable; keeping names the same (as required)
            # in order to allow pass-through from input variables
            dxapp_json['outputSpec'].append({"name": "required_" + classname.replace(":", "_"),
                                             "class": classname,
                                             "optional": True})
            dxapp_json['inputSpec'].append({"name": "optional_" + classname.replace(":", "_"),
                                            "class": classname,
                                            "optional": True})

        cmdline_args = ['-irequired_applet=anapplet',
                        '-irequired_array_applet=anapplet',
                        '-irequired_record=arecord',
                        '-irequired_array_record=arecord',
                        '-irequired_file=afile',
                        '-irequired_array_file=afile',
                        '-irequired_gtable=agtable',
                        '-irequired_array_gtable=agtable',
                        '-irequired_boolean=true',
                        '-irequired_array_boolean=true',
                        '-irequired_array_boolean=false',
                        '-irequired_int=32',
                        '-irequired_array_int=42',
                        '-irequired_float=3.4',
                        '-irequired_array_float=.42',
                        '-irequired_string=foo',
                        '-irequired_array_string=bar',
                        '-irequired_hash={"foo":"bar"}']
        for lang in supported_languages:
            appdir = create_app_dir_with_dxapp_json(dxapp_json, lang)
            # Test with bare-minimum of inputs
            output = subprocess.check_output(['dx-run-app-locally', appdir] + cmdline_args)
            print(output)
            # Verify array is printed total 3 times once in each input, logs, and final output
            self.assertEquals(len(re.findall("required_array_boolean = \[ true, false ]", output)), 3)
            self.assertIn("App finished successfully", output)

            # See PTFM-13697 for CentOS 5 details
            if testutil.TEST_RUN_JOBS and not testutil.host_is_centos_5():
                # Now actually make it an applet and run it
                applet_name = dxapp_json['name'] + '-' + lang
                subprocess.check_output(['dx', 'build', appdir, '--destination', applet_name])
                subprocess.check_output(['dx', 'run', applet_name, '-y', '--wait'] + cmdline_args)
Example #36
0
    def test_get_appet_with_asset(self):
        bundle_name = "test-bundle-depends.tar.gz"
        bundle_tmp_dir = tempfile.mkdtemp()
        os.mkdir(os.path.join(bundle_tmp_dir, "a"))
        with open(os.path.join(bundle_tmp_dir, 'a', 'foo.txt'),
                  'w') as file_in_bundle:
            file_in_bundle.write('foo\n')
        subprocess.check_call([
            'tar', '-czf',
            os.path.join(bundle_tmp_dir, bundle_name), '-C',
            os.path.join(bundle_tmp_dir, 'a'), '.'
        ])
        bundle_file = dxpy.upload_local_file(filename=os.path.join(
            bundle_tmp_dir, bundle_name),
                                             project=self.project,
                                             wait_on_close=True)

        asset_file = dxpy.upload_local_file(filename=os.path.join(
            bundle_tmp_dir, bundle_name),
                                            project=self.project,
                                            wait_on_close=True)

        dxrecord_details = {
            "archiveFileId": {
                "$dnanexus_link": asset_file.get_id()
            }
        }
        dxrecord = dxpy.new_dxrecord(project=self.project,
                                     types=["AssetBundle"],
                                     details=dxrecord_details,
                                     name='asset-lib-test',
                                     properties={"version": "0.0.1"})
        dxrecord.close()
        asset_bundle_id = dxrecord.get_id()

        asset_file.set_properties({"AssetBundle": asset_bundle_id})

        code_str = """#!/bin/bash
                    main(){
                        echo 'Hello World'
                    }
                    """
        app_spec = {
            "name": "asset_depends",
            "dxapi": "1.0.0",
            "runSpec": {
                "code":
                code_str,
                "interpreter":
                "bash",
                "distribution":
                "Ubuntu",
                "release":
                "14.04",
                "assetDepends": [{
                    "id": asset_bundle_id
                }],
                "bundledDepends": [{
                    "name": bundle_name,
                    "id": {
                        "$dnanexus_link": bundle_file.get_id()
                    }
                }]
            },
            "inputSpec": [],
            "outputSpec": [],
            "version": "1.0.0"
        }
        app_dir = self.write_app_directory("asset_depends",
                                           json.dumps(app_spec))
        asset_applet_id = json.loads(
            run("dx build --json {app_dir}".format(app_dir=app_dir)))["id"]
        with chdir(tempfile.mkdtemp()):
            run("dx get --omit-resources " + asset_applet_id)
            self.assertTrue(os.path.exists("asset_depends"))
            self.assertFalse(
                os.path.exists(os.path.join("asset_depends", "resources")))
            self.assertTrue(
                os.path.exists(os.path.join("asset_depends", "dxapp.json")))
            with open(os.path.join("asset_depends", "dxapp.json")) as fh:
                applet_spec = json.load(fh)
            self.assertEqual([{
                "name": "asset-lib-test",
                "project": self.project,
                "folder": "/",
                "version": "0.0.1"
            }], applet_spec["runSpec"]["assetDepends"])
            self.assertEqual([{
                "name": bundle_name,
                "id": {
                    "$dnanexus_link": bundle_file.get_id()
                }
            }], applet_spec["runSpec"]["bundledDepends"])
Example #37
0
def generate_report(geneBody, inner_dist, junc_ann, read_dist, read_dup,
                    mappings, contam, names):

    report_details = {}

    # Gene Body Dist
    loc_in_gene = [n for n in range(100)]

    report_details['Gene Body Coverage'] = {
        "Normalized Location in Gene": loc_in_gene,
        "% of Reads Covering": geneBody
    }

    #########################
    # Inner Distance

    if inner_dist != None:

        dxpy.download_dxfile(inner_dist, "inner_dist.txt")

        inner_bucket = []
        inner_num_reads = []
        inner_total_reads = 0
        # if a bucket has less than 0.1% of reads in it then don't include it
        cutoff = 0.001

        with open("inner_dist.txt", "r") as fh:
            line = fh.readline().rstrip("\n")
            while line != "":
                inner_total_reads += int(line.split()[2])
                line = fh.readline().rstrip("\n")

        bucket_cutoff = cutoff * inner_total_reads
        print "Applying cutoff of: " + str(
            cutoff) + " for inner distance calculation"

        with open("inner_dist.txt", "r") as fh:
            line = fh.readline().rstrip("\n")
            while line != "":
                start, end, num_reads = [int(x) for x in line.split()]
                if num_reads > bucket_cutoff:
                    # store center position of this bucket
                    inner_bucket.append(int(end - ((end - start) / 2)))
                    inner_num_reads.append(num_reads)

                line = fh.readline().rstrip("\n")

        # find total to normalize
        inner_total_reads = sum(inner_num_reads)
        print "Total reads for inner distance calculation: " + str(
            inner_total_reads)
        inner_median = None
        running_total = 0
        inner_length_sum = 0
        for i in range(len(inner_bucket)):
            # multiply read length by number of observations for the mean
            inner_length_sum += inner_bucket[i] * inner_num_reads[i]

            # calculate median
            running_total += inner_num_reads[i]
            if running_total >= inner_total_reads / 2 and inner_median == None:
                inner_median = inner_bucket[i]

        inner_mean = inner_length_sum / inner_total_reads
        print "inner distance metrics: " + " ".join(
            [str(inner_length_sum),
             str(inner_total_reads)])

        # calc standard deviation
        std_sum = 0
        for i in range(len(inner_bucket)):
            std_sum += ((inner_bucket[i] - inner_mean)**2) * inner_num_reads[i]

        std_sum /= inner_total_reads
        inner_std = int(math.sqrt(std_sum) + 0.5)

        report_details['Paired Read Inner Distance'] = {
            "Inner Distance (bp)": inner_bucket,
            "Count": inner_num_reads,
            "Mean": inner_mean,
            "Median": inner_median,
            "Standard Deviation": inner_std
        }

    ############################
    # Junction Annotation

    dxpy.download_dxfile(junc_ann, "junc_ann.r")

    # initialize splicing values in case there was no splicing
    sj_k = 0
    sj_pn = 0
    sj_cn = 0

    se_k = 0
    se_pn = 0
    se_cn = 0

    if os.path.getsize("junc_ann.r") == 0:
        print "No splicing events found so setting all junction stats to 0"
    else:
        with open("junc_ann.r", "r") as fh:

            line = fh.readline()
            while line != "":
                line = line.rstrip("\n")
                if line.startswith("events"):
                    # parse out the % and assign them
                    se_pn, se_cn, se_k = [
                        float(n) / 100 for n in line[9:-1].split(",")
                    ]

                if line.startswith("junction"):
                    sj_pn, sj_cn, sj_k = [
                        float(n) / 100 for n in line[11:-1].split(",")
                    ]

                line = fh.readline()

    report_details['Junction Annotation'] = {
        "Splicing Junctions": {
            "known": sj_k,
            "partial novel": sj_pn,
            "complete novel": sj_cn
        },
        "Splicing Events": {
            "known": se_k,
            "partial novel": se_pn,
            "complete novel": se_cn
        }
    }

    ############################
    # read duplication

    dxpy.download_dxfile(read_dup, "read_dup.txt")

    pos_copy = []
    pos_num_reads = []
    pos_total_reads = 0
    seq_copy = []
    seq_num_reads = []
    seq_total_reads = 0

    with open("read_dup.txt", "r") as fh:
        # pull of first header
        line = fh.readline()
        line = fh.readline()
        # read until we hit the stats for sequence based duplication
        while not line.startswith("Occurrence"):
            c, r = [int(n) for n in line.split()]
            pos_copy.append(c)
            pos_num_reads.append(float(r))
            pos_total_reads += r
            line = fh.readline()

        #get next line to start with the data
        line = fh.readline()
        while line != "":
            c, r = [int(n) for n in line.split()]
            seq_copy.append(c)
            seq_num_reads.append(float(r))
            seq_total_reads += r
            line = fh.readline()

    pos_total_reads = float(pos_total_reads)
    seq_total_reads = float(seq_total_reads)

    for i in range(len(pos_num_reads)):
        pos_num_reads[i] /= pos_total_reads

    for i in range(len(seq_num_reads)):
        seq_num_reads[i] /= seq_total_reads

    report_details['Read Duplication'] = {
        "Position Based": {
            "Read Occurrences": pos_copy,
            "% Reads": pos_num_reads
        },
        "Sequence Based": {
            "Read Occurrences": seq_copy,
            "% Reads": seq_num_reads
        }
    }

    ############################
    # read distribution report
    if read_dist != None:
        dxpy.download_dxfile(read_dist, "read_dist.txt")

        report_details['Read Distribution'] = {}

        with open("read_dist.txt", "r") as rd_file:
            report_details['Read Distribution']['Total Reads'] = int(
                rd_file.readline().split()[-1])
            report_details['Read Distribution']['Total Tags'] = int(
                rd_file.readline().split()[-1])
            report_details['Read Distribution']['Total Assigned Tags'] = int(
                rd_file.readline().split()[-1])

            # pull out line of "="s
            rd_file.readline()
            # pull header line
            rd_file.readline()
            line = rd_file.readline()
            while not line.startswith("="):
                fields = line.split()
                report_details['Read Distribution'][fields[0]] = [
                    int(fields[1]),
                    int(fields[2]),
                    float(fields[3])
                ]
                line = rd_file.readline()

    #############################
    # add report of contaminations if calculated

    if contam != None:
        contam_report = []
        for i in range(len(contam)):
            contam_report.append({
                "Contaminant Name": names[i],
                "% Reads Mapping": contam[i]
            })

        report_details['Contamination'] = contam_report

    #############################
    # add link to mappings
    report_details['original_mappings'] = mappings

    report_name = dxpy.DXGTable(mappings).describe()['name'] + " RSeQC report"

    # create report
    report = dxpy.new_dxrecord(name=report_name,
                               details=report_details,
                               types=["Report", "RSeQC"])
    report.close()

    return {"Report": dxpy.dxlink(report.get_id())}
Example #38
0
    def test_var_initialization(self):
        '''
        This test assumes a well-formed input spec and mostly just
        tests that everything compiles and the variable initialization
        code does not throw any errors.
        '''

        print("Setting current project to", self.project)
        dxpy.WORKSPACE_ID = self.project
        dxpy.PROJECT_CONTEXT_ID = self.project

        # Make some data objects for input
        dxpy.api.applet_new({
            "project": dxpy.WORKSPACE_ID,
            "name": "anapplet",
            "dxapi": "1.0.0",
            "runSpec": {
                "code": "",
                "interpreter": "bash",
                "distribution": "Ubuntu",
                "release": "14.04"
            }
        })['id']
        dxpy.upload_string("foo", name="afile")
        dxrecord = dxpy.new_dxrecord(name="arecord")
        dxrecord.close()

        dxapp_json = {
            "name": "all_vars",
            "title": "all_vars",
            "summary": "all_vars",
            "dxapi": "1.0.0",
            "version": "0.0.1",
            "categories": [],
            "inputSpec": [],
            "outputSpec": []
        }

        classes = [
            'applet', 'record', 'file', 'boolean', 'int', 'float', 'string',
            'hash', 'array:applet', 'array:record', 'array:file',
            'array:boolean', 'array:int', 'array:float', 'array:string'
        ]

        for classname in classes:
            dxapp_json['inputSpec'].append({
                "name":
                "required_" + classname.replace(":", "_"),
                "class":
                classname,
                "optional":
                False
            })
            # Note: marking outputs as optional so that empty arrays
            # will be acceptable; keeping names the same (as required)
            # in order to allow pass-through from input variables
            dxapp_json['outputSpec'].append({
                "name":
                "required_" + classname.replace(":", "_"),
                "class":
                classname,
                "optional":
                True
            })
            dxapp_json['inputSpec'].append({
                "name":
                "optional_" + classname.replace(":", "_"),
                "class":
                classname,
                "optional":
                True
            })

        cmdline_args = [
            '-irequired_applet=anapplet', '-irequired_array_applet=anapplet',
            '-irequired_record=arecord', '-irequired_array_record=arecord',
            '-irequired_file=afile', '-irequired_array_file=afile',
            '-irequired_boolean=true', '-irequired_array_boolean=true',
            '-irequired_array_boolean=false', '-irequired_int=32',
            '-irequired_array_int=42', '-irequired_float=3.4',
            '-irequired_array_float=.42', '-irequired_string=foo',
            '-irequired_array_string=bar', '-irequired_hash={"foo":"bar"}'
        ]
        for lang in supported_languages:
            appdir = create_app_dir_with_dxapp_json(dxapp_json, lang)

            # See PTFM-13697 for CentOS 5 details
            if testutil.TEST_RUN_JOBS and not testutil.host_is_centos_5():
                # Now actually make it an applet and run it
                applet_name = dxapp_json['name'] + '-' + lang
                subprocess.check_output(
                    ['dx', 'build', appdir, '--destination', applet_name])
                subprocess.check_output(
                    ['dx', 'run', applet_name, '-y', '--wait'] + cmdline_args)
Example #39
0
def _clone_asset(record, folder, regions, project_dict):
    """
    This function will attempt to clone the given record into all of the given regions.
    It will return a dictionary with the regions as keys and the record-ids of the
    corresponding asset as the values.  If an asset is not able to be created in a given
    region, the value will be set to None.
    """
    # Get the asset record
    fid = record.get_details()['archiveFileId']['$dnanexus_link']
    curr_region = dxpy.describe(record.project)['region']

    # Only run once per region
    regions = set(regions) - set([curr_region])
    if len(regions) == 0:
        # there is nothing to do
        return

    app_supported_regions = set(
        COPY_FILE_APP.describe()['regionalOptions'].keys())
    if len(regions - app_supported_regions) > 0:
        print('Currently no support for the following region(s): [{regions}]'.
              format(regions=', '.join(regions - app_supported_regions)),
              file=sys.stderr)
        sys.exit(1)

    # Get information about the asset
    asset_properties = record.get_properties()
    asset_properties['cloned_from'] = record.get_id()
    asset_file_name = dxpy.describe(fid)['name']
    url = dxpy.DXFile(fid).get_download_url(
        preauthenticated=True,
        project=dxpy.DXFile.NO_PROJECT_HINT,
        duration=URL_DURATION)[0]

    # setup target folders
    region2projid = {}
    for region in regions:
        dest_proj = util.get_project(project_dict[region])
        dest_proj.new_folder(folder, parents=True)
        region2projid[region] = dest_proj.get_id()
    print(region2projid)

    # Fire off a clone process for each region
    # Wait for the cloning to complete
    for i in [1, 2, 3]:
        jobs = _clone_to_all_regions(region2projid, regions, asset_file_name,
                                     folder, url)
        retval = _wait_for_completion(jobs)
        if retval:
            break

    # make records for each file
    for region in regions:
        dest_proj_id = region2projid[region]
        results = list(
            dxpy.find_data_objects(classname="file",
                                   visibility="hidden",
                                   name=asset_file_name,
                                   project=dest_proj_id,
                                   folder=folder))
        file_ids = [p["id"] for p in results]
        if len(file_ids) == 0:
            raise RuntimeError("Found no files {}:{}/{}".format(
                dest_proj_id, folder, asset_file_name))
        if len(file_ids) > 1:
            raise RuntimeError(
                "Found {} files {}:{}/{}, instead of just one".format(
                    len(dxfiles), dest_proj_id, folder, asset_file_name))
        dest_asset = dxpy.new_dxrecord(
            name=record.name,
            types=['AssetBundle'],
            details={'archiveFileId': dxpy.dxlink(file_ids[0])},
            properties=record.get_properties(),
            project=dest_proj_id,
            folder=folder,
            close=True)
Example #40
0
    def test_dx_jobutil_new_job(self):
        first_record = dxpy.new_dxrecord(name="first_record")
        second_record = dxpy.new_dxrecord(name="second_record")
        dxpy.new_dxrecord(name="duplicate_name_record")
        dxpy.new_dxrecord(name="duplicate_name_record")
        # In a different project...
        third_record = dxpy.new_dxrecord(name="third_record", project=self.aux_project.get_id())

        test_cases = (
            # string
            ("-ifoo=input_string", {"foo": "input_string"}),
            # string that looks like a {job,analysis} ID
            ("-ifoo=job-012301230123012301230123", {"foo": "job-012301230123012301230123"}),
            ("-ifoo=analysis-012301230123012301230123", {"foo": "analysis-012301230123012301230123"}),
            # int
            ("-ifoo=24", {"foo": 24}),
            # float
            ("-ifoo=24.5", {"foo": 24.5}),
            # json
            ('-ifoo=\'{"a": "b"}\'', {"foo": {"a": "b"}}),
            ('-ifoo=\'["a", "b"]\'', {"foo": ["a", "b"]}),
            # objectName
            ("-ifoo=first_record", {"foo": dxpy.dxlink(first_record.get_id(), self.project)}),
            # objectId
            ("-ifoo=" + first_record.get_id(), {"foo": dxpy.dxlink(first_record.get_id())}),
            # project:objectName
            ("-ifoo=" + self.aux_project.get_id() + ":third_record",
             {"foo": dxpy.dxlink(third_record.get_id(), self.aux_project.get_id())}),
            # project:objectId
            ("-ifoo=" + self.aux_project.get_id() + ":" + third_record.get_id(),
             {"foo": dxpy.dxlink(third_record.get_id(), self.aux_project.get_id())}),
            # same, but wrong project is specified
            ("-ifoo=" + self.project + ":" + third_record.get_id(),
             {"foo": dxpy.dxlink(third_record.get_id(), self.aux_project.get_id())}),
            # glob
            ("-ifoo=first*", {"foo": dxpy.dxlink(first_record.get_id(), self.project)}),
            # JBOR
            ("-ifoo=job-012301230123012301230123:outputfield",
             {"foo": {"$dnanexus_link": {"job": "job-012301230123012301230123", "field": "outputfield"}}}),
            # order of inputs is preserved from command line to API call
            ("-ifoo=first* -ifoo=second_record -ifoo=job-012301230123012301230123:outputfield",
             {"foo": [dxpy.dxlink(first_record.get_id(), self.project),
                      dxpy.dxlink(second_record.get_id(), self.project),
                      {"$dnanexus_link": {"job": "job-012301230123012301230123", "field": "outputfield"}}]}),
            ("-ifoo=job-012301230123012301230123:outputfield -ifoo=first_record -ifoo=second_*",
             {"foo": [{"$dnanexus_link": {"job": "job-012301230123012301230123", "field": "outputfield"}},
                      dxpy.dxlink(first_record.get_id(), self.project),
                      dxpy.dxlink(second_record.get_id(), self.project)]}),
            # if there is any ambiguity, the name is left unresolved
            ("-ifoo=duplicate_name_record", {"foo": "duplicate_name_record"}),
            ("-ifoo=*record", {"foo": "*record"}),
            # Override class
            ("-ifoo:int=24", {"foo": 24}),
            ("-ifoo:string=24", {"foo": "24"}),
            ("-ifoo:string=first_record", {"foo": "first_record"}),
            ('-ifoo:hash=\'{"a": "b"}\'', {"foo": {"a": "b"}}),
            ('-ifoo:hash=\'["a", "b"]\'', {"foo": ["a", "b"]}),
            ("-ifoo:file=first_record", None),  # Error
            ("-ifoo:int=foo", None),  # Error
            ("-ifoo:int=24.5", None),  # Error

            # Array inputs

            # implicit array notation
            ("-ifoo=24 -ifoo=25", {"foo": [24, 25]}),
            ("-ifoo=25 -ibar=1 -ifoo=24", {"foo": [25, 24], "bar": 1}),
            ("-ifoo=first_record -ifoo=second_record",
             {"foo": [dxpy.dxlink(first_record.get_id(), self.project),
                      dxpy.dxlink(second_record.get_id(), self.project)]}),
            # different types (unusual, but potentially meaningful if
            # foo is a json input)
            ("-ifoo=24 -ifoo=bar", {"foo": [24, "bar"]}),

            # explicit array notation is NOT respected (in contexts with
            # no inputSpec such as this one)
            ("-ifoo:array:int=24", {"foo": 24}),
            ("-ifoo:array:record=first_record", {"foo": dxpy.dxlink(first_record.get_id(), self.project)}),
        )
        env = override_environment(DX_JOB_ID="job-000000000000000000000001",
                                   DX_WORKSPACE_ID=self.project)
        for cmd_snippet, expected_input_hash in test_cases:
            cmd = "dx-jobutil-new-job " + cmd_snippet + " entrypointname --test"
            if expected_input_hash is None:
                with self.assertSubprocessFailure(exit_code=1):
                    run(cmd, env=env)
            else:
                output = run(cmd, env=env)
                self.assertEqual(json.loads(output), {"input": expected_input_hash, "function": "entrypointname"})
    def test_input(self):
        first_record = dxpy.new_dxrecord(name="first_record")
        second_record = dxpy.new_dxrecord(name="second_record")
        dxpy.new_dxrecord(name="duplicate_name_record")
        dxpy.new_dxrecord(name="duplicate_name_record")
        # In a different project...
        third_record = dxpy.new_dxrecord(name="third_record",
                                         project=self.aux_project.get_id())

        test_cases = (
            # string
            ("-ifoo=input_string", {
                "foo": "input_string"
            }),
            # string that looks like a {job,analysis} ID
            ("-ifoo=job-012301230123012301230123", {
                "foo": "job-012301230123012301230123"
            }),
            ("-ifoo=analysis-012301230123012301230123", {
                "foo": "analysis-012301230123012301230123"
            }),
            # int
            ("-ifoo=24", {
                "foo": 24
            }),
            # float
            ("-ifoo=24.5", {
                "foo": 24.5
            }),
            # json
            ('-ifoo=\'{"a": "b"}\'', {
                "foo": {
                    "a": "b"
                }
            }),
            ('-ifoo=\'["a", "b"]\'', {
                "foo": ["a", "b"]
            }),
            # objectName
            ("-ifoo=first_record", {
                "foo": dxpy.dxlink(first_record.get_id(), self.project)
            }),
            # objectId
            ("-ifoo=" + first_record.get_id(), {
                "foo": dxpy.dxlink(first_record.get_id())
            }),
            # project:objectName
            ("-ifoo=" + self.aux_project.get_id() + ":third_record", {
                "foo":
                dxpy.dxlink(third_record.get_id(), self.aux_project.get_id())
            }),
            # project:objectId
            ("-ifoo=" + self.aux_project.get_id() + ":" +
             third_record.get_id(), {
                 "foo":
                 dxpy.dxlink(third_record.get_id(), self.aux_project.get_id())
             }),
            # same, but wrong project is specified
            ("-ifoo=" + self.project + ":" + third_record.get_id(), {
                "foo":
                dxpy.dxlink(third_record.get_id(), self.aux_project.get_id())
            }),
            # glob
            ("-ifoo=first*", {
                "foo": dxpy.dxlink(first_record.get_id(), self.project)
            }),
            # JBOR
            ("-ifoo=job-012301230123012301230123:outputfield", {
                "foo": {
                    "$dnanexus_link": {
                        "job": "job-012301230123012301230123",
                        "field": "outputfield"
                    }
                }
            }),
            # order of inputs is preserved from command line to API call
            ("-ifoo=first* -ifoo=second_record -ifoo=job-012301230123012301230123:outputfield",
             {
                 "foo": [
                     dxpy.dxlink(first_record.get_id(), self.project),
                     dxpy.dxlink(second_record.get_id(), self.project), {
                         "$dnanexus_link": {
                             "job": "job-012301230123012301230123",
                             "field": "outputfield"
                         }
                     }
                 ]
             }),
            ("-ifoo=job-012301230123012301230123:outputfield -ifoo=first_record -ifoo=second_*",
             {
                 "foo": [{
                     "$dnanexus_link": {
                         "job": "job-012301230123012301230123",
                         "field": "outputfield"
                     }
                 },
                         dxpy.dxlink(first_record.get_id(), self.project),
                         dxpy.dxlink(second_record.get_id(), self.project)]
             }),
            # if there is any ambiguity, the name is left unresolved
            ("-ifoo=duplicate_name_record", {
                "foo": "duplicate_name_record"
            }),
            ("-ifoo=*record", {
                "foo": "*record"
            }),
            # Override class
            ("-ifoo:int=24", {
                "foo": 24
            }),
            ("-ifoo:string=24", {
                "foo": "24"
            }),
            ("-ifoo:string=first_record", {
                "foo": "first_record"
            }),
            ('-ifoo:hash=\'{"a": "b"}\'', {
                "foo": {
                    "a": "b"
                }
            }),
            ('-ifoo:hash=\'["a", "b"]\'', {
                "foo": ["a", "b"]
            }),

            # Array inputs

            # implicit array notation
            ("-ifoo=24 -ifoo=25", {
                "foo": [24, 25]
            }),
            ("-ifoo=25 -ibar=1 -ifoo=24", {
                "foo": [25, 24],
                "bar": 1
            }),
            ("-ifoo=first_record -ifoo=second_record", {
                "foo": [
                    dxpy.dxlink(first_record.get_id(), self.project),
                    dxpy.dxlink(second_record.get_id(), self.project)
                ]
            }),
            # different types (unusual, but potentially meaningful if
            # foo is a json input)
            ("-ifoo=24 -ifoo=bar", {
                "foo": [24, "bar"]
            }),

            # explicit array notation is NOT respected (in contexts with
            # no inputSpec such as this one)
            ("-ifoo:array:int=24", {
                "foo": 24
            }),
            ("-ifoo:array:record=first_record", {
                "foo": dxpy.dxlink(first_record.get_id(), self.project)
            }),
        )

        for cmd_snippet, expected_input_hash in test_cases:
            arguments_hash = {"input": expected_input_hash}
            self.assertNewJobInputHash(cmd_snippet, arguments_hash)
Example #42
0
 def test_pipeline_completion(self):
     dxpipeline = dxpy.new_dxrecord(name="my workflow", types=["pipeline"])
     self.assert_completion("dx run my", "my workflow ")