Example #1
0
def setUpTempProjects(thing):
    thing.old_workspace_id = dxpy.WORKSPACE_ID
    thing.proj_id = dxpy.api.project_new({
        'name': 'azure-test-project',
        'region': testutil.TEST_AZURE
    })['id']
    dxpy.set_workspace_id(thing.proj_id)
 def setUp(self):
     os.environ['IFS'] = IFS
     os.environ['_ARGCOMPLETE'] = '1'
     os.environ['_DX_ARC_DEBUG'] = '1'
     os.environ['COMP_WORDBREAKS'] = '"\'@><=;|&(:'
     os.environ['DX_PROJECT_CONTEXT_ID'] = self.project_id
     dxpy.set_workspace_id(self.project_id)
Example #3
0
 def setUp(self):
     os.environ["IFS"] = IFS
     os.environ["_ARGCOMPLETE"] = "1"
     os.environ["_DX_ARC_DEBUG"] = "1"
     os.environ["COMP_WORDBREAKS"] = "\"'@><=;|&(:"
     os.environ["DX_PROJECT_CONTEXT_ID"] = self.project_id
     dxpy.set_workspace_id(self.project_id)
 def setUp(self):
     os.environ['IFS'] = IFS
     os.environ['_ARGCOMPLETE'] = '1'
     os.environ['_DX_ARC_DEBUG'] = '1'
     os.environ['COMP_WORDBREAKS'] = '"\'@><=;|&(:'
     os.environ['DX_PROJECT_CONTEXT_ID'] = self.project_id
     dxpy.set_workspace_id(self.project_id)
Example #5
0
def run_bwa_mem(sample, fastq_dict, mapper_app_dxid, ref_genome_index,
                project_id):
    '''
    Description: Maps sample fastq files to a reference genome
    Args:
        sample (dict) - sample[<barcode>] = [<fastq files>]
        mapper (dxid) 
        ref_genome (dxid)
    '''

    ## Stock DNAnexus BWA-MEM app
    #mapper_app_name = 'bwa_mem_fastq_read_mapper'
    #mapper_app_version = '1.5.0'
    #mapper_app = MapperApp(name=mapper_app_name, version=mapper_app_version)   # DXApp object

    dxpy.set_workspace_id(project_id)
    # Create dict to store mapper app inputs
    mapper_app = dxpy.DXApp(mapper_app_dxid)
    mapper_input = {
        'genomeindex_targz': dxpy.dxlink(ref_genome_index)
    }  # hg19 : file-B6qq53v2J35Qyg04XxG0000V

    # Add fastq files to mapper app input dict
    if len(fastq_dict) == 0:
        print 'Error: No fastq files listed for sample %s' % sample
        sys.exit()
    elif len(fastq_dict) == 1:
        mapper_input['reads_fastqgz'] = dxpy.dxlink(fastq_dict['1'])
    elif len(fastq_dict) == 2:
        mapper_input['reads_fastqgz'] = dxpy.dxlink(fastq_dict['1'])
        mapper_input['reads2_fastqgz'] = dxpy.dxlink(fastq_dict['2'])
    else:
        print 'Error: More than 2 fastq files passed for mapping sample %s' % sample
        sys.exit()
    print mapper_input

    mapper_job = mapper_app.run(mapper_input)
    mapper_output = {
        "BAM": {
            "job": mapper_job.get_id(),
            "field": "sorted_bam"
        },
        "BAI": {
            "job": mapper_job.get_id(),
            "field": "sorted_bai"
        }
    }
    return mapper_output
Example #6
0
    def _sync_dxpy_state(self):
        dxpy.set_api_server_info(host=environ.get("DX_APISERVER_HOST", None),
                                 port=environ.get("DX_APISERVER_PORT", None),
                                 protocol=environ.get("DX_APISERVER_PROTOCOL", None))

        if "DX_SECURITY_CONTEXT" in environ:
            dxpy.set_security_context(json.loads(environ["DX_SECURITY_CONTEXT"]))

        if "DX_JOB_ID" in environ:
            dxpy.set_job_id(environ["DX_JOB_ID"])
            dxpy.set_workspace_id(environ.get("DX_WORKSPACE_ID"))
        else:
            dxpy.set_job_id(None)
            dxpy.set_workspace_id(environ.get("DX_PROJECT_CONTEXT_ID"))

        dxpy.set_project_context(environ.get("DX_PROJECT_CONTEXT_ID"))
Example #7
0
def test_mapping():
    dxpy.set_workspace_id('project-BpBjyqQ0Jk0Xv2B11Q8P6X59')
    applet = dxpy.find_one_data_object(
        name='bwa_mem_fastq_read_mapper',
        classname='applet',
        return_handler=True,
        zero_ok=False,
        project='project-B406G0x2fz2B3GVk65200003')
    applet.run({
        'genomeindex_targz':
        dxpy.dxlink('file-B6qq53v2J35Qyg04XxG0000V'),
        'reads_fastqgz':
        dxpy.dxlink('file-BpBjzFQ0Jk0Xk73YqQgJKg9Z'),
        'reads2_fastqgz':
        dxpy.dxlink('file-BpBk0400Jk0Xk73YqQgJKg9f')
    })
Example #8
0
    def _sync_dxpy_state(self):
        dxpy.set_api_server_info(host=environ.get("DX_APISERVER_HOST", None),
                                 port=environ.get("DX_APISERVER_PORT", None),
                                 protocol=environ.get("DX_APISERVER_PROTOCOL", None))

        if "DX_SECURITY_CONTEXT" in environ:
            dxpy.set_security_context(json.loads(environ["DX_SECURITY_CONTEXT"]))

        if "DX_JOB_ID" in environ:
            dxpy.set_job_id(environ["DX_JOB_ID"])
            dxpy.set_workspace_id(environ.get("DX_WORKSPACE_ID"))
        else:
            dxpy.set_job_id(None)
            dxpy.set_workspace_id(environ.get("DX_PROJECT_CONTEXT_ID"))

        dxpy.set_project_context(environ.get("DX_PROJECT_CONTEXT_ID"))
def main(token):
    # Configure dxpy authentication
    dxpy.set_security_context({'auth_token_type': 'Bearer', 'auth_token': token})

    # Resolve FACTORY_PROJECT by ID
    proj = dxpy.DXProject(FACTORY_PROJECT)
    print 'Resolved project:', proj.describe()['name'], proj.get_id()

    # Set FACTORY_PROJECT as the workspace for subsequent operations
    # (sort of like the current working directory)
    dxpy.set_workspace_id(FACTORY_PROJECT)

    # Resolve the workflow by name. (Could also store ID like the project)
    wf = list(dxpy.search.find_data_objects(classname="workflow", name="RNA-seq pipeline",
                                            return_handler=True))[0]
    print 'Resolved workflow:', wf.describe()['name'], wf.get_id()

    # TODO: Stage the inputs. Here we find them in the IN folder
    left_reads = list(dxpy.search.find_data_objects(classname="file", name="ENCFF001JPX.1k.fastq.gz",
                                                    folder="/IN", return_handler=True))[0]
    print 'Resolved left reads:', left_reads.describe()['name'], left_reads.get_id()
    right_reads = list(dxpy.search.find_data_objects(classname="file", name="ENCFF001JQB.1k.fastq.gz",
                                                     folder="/IN", return_handler=True))[0]
    print 'Resolved right reads:', right_reads.describe()['name'], right_reads.get_id()

    # Launch the workflow
    analysis = wf.run({'0.fastqs': [dxpy.dxlink(left_reads.get_id())],
                       '0.fastq_pairs': [dxpy.dxlink(right_reads.get_id())]})
    print 'Launched analysis:', analysis.get_id()
    print 'Analysis state:', analysis.describe()['state']

    # TODO: Poll for (or come back when) analysis state 'done' or 'failed'.
    # Handle any failures.

    # Cooking-show-style substitution with completed analysis
    analysis = dxpy.DXAnalysis(COMPLETED_ANALYSIS)
    print 'Analysis state:', analysis.describe()['state']

    # Enumerate outputs
    print 'Analysis outputs:'
    for one_output_name, one_output_link in analysis.describe()['output'].iteritems():
        one_output = dxpy.get_handler(one_output_link) # one_output : dxpy.DXFile
        one_file_name = one_output.describe()['name']
        one_file_url, _ = one_output.get_download_url(preauthenticated=True, filename=one_file_name)
        print one_file_name, one_file_url
Example #10
0
def main():
    import argparse
    parser = argparse.ArgumentParser(
        description="Module to grab QC metrics from ENCODE pipelines",
        epilog=EPILOG,
        formatter_class=argparse.RawDescriptionHelpFormatter,
    )
    group = parser.add_mutually_exclusive_group(required=True)
    group.add_argument('--encode-url', help="ENCODE assay search URL")
    group.add_argument('--dx-file', help="Text file with DNA NEXUS IDs")

    args = parser.parse_args()

    try:
        dxpy.set_workspace_id(data['dx_project'])
    except DXError:
        print "Please enter a valid project ID in auth.json"
    else:
        if args.encode_url:
            if 'assay_term_name' not in args.encode_url:
                print "Please select exactly one assay type."
                sys.exit(1)
            for exp in get_assay_JSON(args.encode_url):
                exp_details = {
                    'assay_term_name': exp['assay_term_name'],
                    'assay_term_id': exp['assay_term_id'],
                    'accession': exp['accession']
                }
                print "Started processing the experimet - %s" % \
                    exp['accession']
                for f in exp['original_files']:
                    f_json = get_encode_object(f)
                    if 'notes' in f_json:
                        print " Processing file - %s" % f_json.get('accession')
                        status = load_metadata(f_json, exp_details)
                        if not status:
                            print "   Update failed - %s" % f_json['accession']
                        else:
                            print " ...Done"
                print "Finished processing the experiment - %s" % \
                    exp['accession']
        elif args.dx_file:
            # Have to include the code to make the script work with
            # DNA Nexus analyis IDs
            pass
    def run_analysis(self):
        #pdb.set_trace()
        self.record = dxpy.DXRecord(dxid=self.record_id, project=self.dashboard_project_id)
        properties = self.record.get_properties()
        if not 'analysis_started' in properties.keys():
            print 'Warning: Could not determine whether or not analysis had been started'
            dxpy.set_workspace_id(dxid=self.project_id)
            self.workflow_object = dxpy.DXWorkflow(
                                                   dxid=self.workflow_id, 
                                                   project=self.workflow_project_id)
            print 'Launching workflow %s with input: %s' % (
                                                            self.workflow_object.describe()['id'], 
                                                            self.analysis_input)
            self.workflow_object.run(
                                     workflow_input=self.analysis_input, 
                                     project=self.project_id, 
                                     folder='/')
            self.record.set_properties({'analysis_started': 'true'})
        elif properties['analysis_started'] == 'true':
            print 'Info: Analysis has already been started; skipping.'
            pass
        elif properties['analysis_started'] == 'false':
            dxpy.set_workspace_id(dxid=self.project_id)
            self.workflow_object = dxpy.DXWorkflow(
                                                   dxid=self.workflow_id,
                                                   project=self.workflow_project_id)
            print 'Launching workflow %s with input: %s' % (
                                                            self.workflow_object.describe()['id'], 
                                                            self.analysis_input)
            self.workflow_object.run(
                                     workflow_input=self.analysis_input, 
                                     project=self.project_id, 
                                     folder='/')
            self.record.set_properties({'analysis_started': 'true'})

            # Create new pipeline run in LIMS
            if not self.develop:
                if self.lane_index == 1:
                    param_dict = {'started': True}
                    json = self.connection.createpipelinerun(self.run_name, param_dict)
                    self.record.set_properties({'pipeline_id': str(json['id'])})
                    print 'Info: Created new LIMS pipeline run %s' % str(json['id'])
Example #12
0
    def test_get_handler(self):
        dxpy.set_workspace_id(self.second_proj_id)

        dxrecord = dxpy.new_dxrecord(project=self.proj_id)
        # Simple DXLink
        dxlink = {'$dnanexus_link': dxrecord.get_id()}
        handler = dxpy.get_handler(dxlink)
        self.assertEqual(handler.get_id(), dxrecord.get_id())
        # Default project is not going to be the correct one
        self.assertNotEqual(handler.get_proj_id(), self.proj_id)

        # Extended DXLink
        dxlink = {'$dnanexus_link': {'id': dxrecord.get_id(),
                                     'project': self.proj_id}}
        handler = dxpy.get_handler(dxlink)
        self.assertEqual(handler.get_id(), dxrecord.get_id())
        self.assertEqual(handler.get_proj_id(), self.proj_id)

        # Handle project IDs
        dxproject = dxpy.get_handler(self.proj_id)
 def test_completion_with_bad_current_project(self):
     os.environ['DX_PROJECT_CONTEXT_ID'] = ''
     dxpy.set_workspace_id('')
     self.assert_completion("dx select ", "tab-completion project\\:")
     self.assert_completion("dx cd ta", "tab-completion project\\:")
Example #14
0
def tearDownTempProjects(thing):
    dxpy.api.project_destroy(thing.proj_id, {'terminateJobs': True})
    dxpy.api.project_destroy(thing.second_proj_id, {'terminateJobs': True})
    dxpy.set_workspace_id(thing.old_workspace_id)
 def test_completion_with_bad_current_project(self):
     os.environ['DX_PROJECT_CONTEXT_ID'] = ''
     dxpy.set_workspace_id('')
     self.assert_completion("dx select ", "tab-completion project\\:")
     self.assert_completion("dx cd ta", "tab-completion project\\:")
Example #16
0
def _build_app_remote(
    mode,
    src_dir,
    publish=False,
    destination_override=None,
    version_override=None,
    bill_to_override=None,
    dx_toolkit_autodep="stable",
    do_version_autonumbering=True,
    do_try_update=True,
    do_parallel_build=True,
    do_check_syntax=True,
    region=None,
):
    if mode == "app":
        builder_app = "app-tarball_app_builder"
    else:
        builder_app = "app-tarball_applet_builder"

    temp_dir = tempfile.mkdtemp()

    # TODO: this is vestigial, the "auto" setting should be removed.
    if dx_toolkit_autodep == "auto":
        dx_toolkit_autodep = "stable"

    build_options = {"dx_toolkit_autodep": dx_toolkit_autodep}

    if version_override:
        build_options["version_override"] = version_override
    elif do_version_autonumbering:
        # If autonumbering is DISABLED, the interior run of dx-build-app
        # will detect the correct version to use without our help. If it
        # is ENABLED, the version suffix might depend on the state of
        # the git repository. Since we'll remove the .git directory
        # before uploading, we need to determine the correct version to
        # use here and pass it in to the interior run of dx-build-app.
        if do_version_autonumbering:
            app_spec = _parse_app_spec(src_dir)
            original_version = app_spec["version"]
            app_describe = None
            try:
                app_describe = dxpy.api.app_describe(
                    "app-" + app_spec["name"], alias=original_version, always_retry=False
                )
            except dxpy.exceptions.DXAPIError as e:
                if e.name == "ResourceNotFound" or (mode == "applet" and e.name == "PermissionDenied"):
                    pass
                else:
                    raise e
            if app_describe is not None:
                if "published" in app_describe or not do_try_update:
                    # The version we wanted was taken; fall back to the
                    # autogenerated version number.
                    build_options["version_override"] = original_version + _get_version_suffix(
                        src_dir, original_version
                    )

    # The following flags are basically passed through verbatim.
    if bill_to_override:
        build_options["bill_to_override"] = bill_to_override
    if not do_version_autonumbering:
        build_options["do_version_autonumbering"] = False
    if not do_try_update:
        build_options["do_try_update"] = False
    if not do_parallel_build:
        build_options["do_parallel_build"] = False
    if not do_check_syntax:
        build_options["do_check_syntax"] = False

    using_temp_project_for_remote_build = False

    # If building an applet, run the builder app in the destination
    # project. If building an app, run the builder app in a temporary
    # project.
    dest_folder = None
    dest_applet_name = None
    if mode == "applet":
        # Translate the --destination flag as follows. If --destination
        # is PROJ:FOLDER/NAME,
        #
        # 1. Run the builder app in PROJ
        # 2. Make the output folder FOLDER
        # 3. Supply --destination=NAME to the interior call of dx-build-applet.
        build_project_id = dxpy.WORKSPACE_ID
        if destination_override:
            build_project_id, dest_folder, dest_applet_name = parse_destination(destination_override)
        if build_project_id is None:
            parser.error(
                "Can't create an applet without specifying a destination project; please use the -d/--destination flag to explicitly specify a project"
            )
        if dest_applet_name:
            build_options["destination_override"] = "/" + dest_applet_name

    elif mode == "app":
        using_temp_project_for_remote_build = True
        try:
            if region:
                build_project_id = dxpy.api.project_new(
                    {"name": "dx-build-app --remote temporary project", "region": region}
                )["id"]
            else:
                build_project_id = dxpy.api.project_new({"name": "dx-build-app --remote temporary project"})["id"]
        except:
            err_exit()

    try:
        # Resolve relative paths and symlinks here so we have something
        # reasonable to write in the job name below.
        src_dir = os.path.realpath(src_dir)

        # Show the user some progress as the tarball is being generated.
        # Hopefully this will help them to understand when their tarball
        # is huge (e.g. the target directory already has a whole bunch
        # of binaries in it) and interrupt before uploading begins.
        app_tarball_file = os.path.join(temp_dir, "app_tarball.tar.gz")
        tar_subprocess = subprocess.Popen(
            ["tar", "-czf", "-", "--exclude", "./.git", "."], cwd=src_dir, stdout=subprocess.PIPE
        )
        with open(app_tarball_file, "wb") as tar_output_file:
            total_num_bytes = 0
            last_console_update = 0
            start_time = time.time()
            printed_static_message = False
            # Pipe the output of tar into the output file
            while True:
                tar_exitcode = tar_subprocess.poll()
                data = tar_subprocess.stdout.read(4 * 1024 * 1024)
                if tar_exitcode is not None and len(data) == 0:
                    break
                tar_output_file.write(data)
                total_num_bytes += len(data)
                current_time = time.time()
                # Don't show status messages at all for very short tar
                # operations (< 1.0 sec)
                if current_time - last_console_update > 0.25 and current_time - start_time > 1.0:
                    if sys.stderr.isatty():
                        if last_console_update > 0:
                            sys.stderr.write("\r")
                        sys.stderr.write(
                            "Compressing target directory {dir}... ({kb_so_far:,} kb)".format(
                                dir=src_dir, kb_so_far=total_num_bytes // 1024
                            )
                        )
                        sys.stderr.flush()
                        last_console_update = current_time
                    elif not printed_static_message:
                        # Print a message (once only) when stderr is not
                        # going to a live console
                        sys.stderr.write("Compressing target directory %s..." % (src_dir,))
                        printed_static_message = True

        if last_console_update > 0:
            sys.stderr.write("\n")
        if tar_exitcode != 0:
            raise Exception("tar exited with non-zero exit code " + str(tar_exitcode))

        dxpy.set_workspace_id(build_project_id)

        remote_file = dxpy.upload_local_file(
            app_tarball_file, media_type="application/gzip", wait_on_close=True, show_progress=True
        )

        try:
            input_hash = {"input_file": dxpy.dxlink(remote_file), "build_options": build_options}
            if mode == "app":
                input_hash["publish"] = publish
            api_options = {
                "name": "Remote build of %s" % (os.path.basename(src_dir),),
                "input": input_hash,
                "project": build_project_id,
            }
            if dest_folder:
                api_options["folder"] = dest_folder
            app_run_result = dxpy.api.app_run(builder_app, input_params=api_options)
            job_id = app_run_result["id"]
            print("Started builder job %s" % (job_id,))
            try:
                subprocess.check_call(["dx", "watch", job_id])
            except subprocess.CalledProcessError as e:
                if e.returncode == 3:
                    # Some kind of failure to build the app. The reason
                    # for the failure is probably self-evident from the
                    # job log (and if it's not, the CalledProcessError
                    # is not informative anyway), so just propagate the
                    # return code without additional remarks.
                    sys.exit(3)
                else:
                    raise e

            dxpy.DXJob(job_id).wait_on_done(interval=1)

            if mode == "applet":
                applet_id, _ = dxpy.get_dxlink_ids(dxpy.api.job_describe(job_id)["output"]["output_applet"])
                return applet_id
            else:
                # TODO: determine and return the app ID, to allow
                # running the app if args.run is specified
                return None
        finally:
            if not using_temp_project_for_remote_build:
                dxpy.DXProject(build_project_id).remove_objects([remote_file.get_id()])
    finally:
        if using_temp_project_for_remote_build:
            dxpy.api.project_destroy(build_project_id, {"terminateJobs": True})
        shutil.rmtree(temp_dir)
Example #17
0
def _build_app_remote(mode, src_dir, publish=False, destination_override=None,
                      version_override=None, bill_to_override=None, dx_toolkit_autodep="stable",
                      do_version_autonumbering=True, do_try_update=True, do_parallel_build=True,
                      do_check_syntax=True, region=None, watch=True):
    if mode == 'app':
        builder_app = 'app-tarball_app_builder'
    else:
        builder_app = 'app-tarball_applet_builder'

    app_spec = _parse_app_spec(src_dir)
    if app_spec['runSpec'].get('release') == '14.04':
        builder_app += "_trusty"

    temp_dir = tempfile.mkdtemp()

    build_options = {'dx_toolkit_autodep': dx_toolkit_autodep}

    if version_override:
        build_options['version_override'] = version_override
    elif do_version_autonumbering:
        # If autonumbering is DISABLED, the interior run of dx-build-app
        # will detect the correct version to use without our help. If it
        # is ENABLED, the version suffix might depend on the state of
        # the git repository. Since we'll remove the .git directory
        # before uploading, we need to determine the correct version to
        # use here and pass it in to the interior run of dx-build-app.
        if do_version_autonumbering:
            original_version = app_spec['version']
            app_describe = None
            try:
                app_describe = dxpy.api.app_describe("app-" + app_spec["name"], alias=original_version, always_retry=False)
            except dxpy.exceptions.DXAPIError as e:
                if e.name == 'ResourceNotFound' or (mode == 'applet' and e.name == 'PermissionDenied'):
                    pass
                else:
                    raise e
            if app_describe is not None:
                if 'published' in app_describe or not do_try_update:
                    # The version we wanted was taken; fall back to the
                    # autogenerated version number.
                    build_options['version_override'] = original_version + _get_version_suffix(src_dir, original_version)

    # The following flags are basically passed through verbatim.
    if bill_to_override:
        build_options['bill_to_override'] = bill_to_override
    if not do_version_autonumbering:
        build_options['do_version_autonumbering'] = False
    if not do_try_update:
        build_options['do_try_update'] = False
    if not do_parallel_build:
        build_options['do_parallel_build'] = False
    if not do_check_syntax:
        build_options['do_check_syntax'] = False

    using_temp_project_for_remote_build = False

    # If building an applet, run the builder app in the destination
    # project. If building an app, run the builder app in a temporary
    # project.
    dest_folder = None
    dest_applet_name = None
    if mode == "applet":
        # Translate the --destination flag as follows. If --destination
        # is PROJ:FOLDER/NAME,
        #
        # 1. Run the builder app in PROJ
        # 2. Make the output folder FOLDER
        # 3. Supply --destination=NAME to the interior call of dx-build-applet.
        build_project_id = dxpy.WORKSPACE_ID
        if destination_override:
            build_project_id, dest_folder, dest_applet_name = parse_destination(destination_override)
        if build_project_id is None:
            parser.error("Can't create an applet without specifying a destination project; please use the -d/--destination flag to explicitly specify a project")
        if dest_applet_name:
            build_options['destination_override'] = '/' + dest_applet_name

    elif mode == "app":
        using_temp_project_for_remote_build = True
        try:
            project_input = {}
            project_input["name"] = "dx-build-app --remote temporary project"
            if bill_to_override:
                project_input["billTo"] = bill_to_override
            if region:
                project_input["region"] = region
            build_project_id = dxpy.api.project_new(project_input)["id"]
        except:
            err_exit()

    try:
        # Resolve relative paths and symlinks here so we have something
        # reasonable to write in the job name below.
        src_dir = os.path.realpath(src_dir)

        # Show the user some progress as the tarball is being generated.
        # Hopefully this will help them to understand when their tarball
        # is huge (e.g. the target directory already has a whole bunch
        # of binaries in it) and interrupt before uploading begins.
        app_tarball_file = os.path.join(temp_dir, "app_tarball.tar.gz")
        tar_subprocess = subprocess.Popen(["tar", "-czf", "-", "--exclude", "./.git", "."], cwd=src_dir, stdout=subprocess.PIPE)
        with open(app_tarball_file, 'wb') as tar_output_file:
            total_num_bytes = 0
            last_console_update = 0
            start_time = time.time()
            printed_static_message = False
            # Pipe the output of tar into the output file
            while True:
                tar_exitcode = tar_subprocess.poll()
                data = tar_subprocess.stdout.read(4 * 1024 * 1024)
                if tar_exitcode is not None and len(data) == 0:
                    break
                tar_output_file.write(data)
                total_num_bytes += len(data)
                current_time = time.time()
                # Don't show status messages at all for very short tar
                # operations (< 1.0 sec)
                if current_time - last_console_update > 0.25 and current_time - start_time > 1.0:
                    if sys.stderr.isatty():
                        if last_console_update > 0:
                            sys.stderr.write("\r")
                        sys.stderr.write("Compressing target directory {dir}... ({kb_so_far:,} kb)".format(dir=src_dir, kb_so_far=total_num_bytes // 1024))
                        sys.stderr.flush()
                        last_console_update = current_time
                    elif not printed_static_message:
                        # Print a message (once only) when stderr is not
                        # going to a live console
                        sys.stderr.write("Compressing target directory %s..." % (src_dir,))
                        printed_static_message = True

        if last_console_update > 0:
            sys.stderr.write("\n")
        if tar_exitcode != 0:
            raise Exception("tar exited with non-zero exit code " + str(tar_exitcode))

        dxpy.set_workspace_id(build_project_id)

        remote_file = dxpy.upload_local_file(app_tarball_file, media_type="application/gzip",
                                             wait_on_close=True, show_progress=True)

        try:
            input_hash = {
                "input_file": dxpy.dxlink(remote_file),
                "build_options": build_options
                }
            if mode == 'app':
                input_hash["publish"] = publish
            api_options = {
                "name": "Remote build of %s" % (os.path.basename(src_dir),),
                "input": input_hash,
                "project": build_project_id,
                }
            if dest_folder:
                api_options["folder"] = dest_folder
            app_run_result = dxpy.api.app_run(builder_app, input_params=api_options)
            job_id = app_run_result["id"]
            print("Started builder job %s" % (job_id,))
            if watch:
                try:
                    subprocess.check_call(["dx", "watch", job_id])
                except subprocess.CalledProcessError as e:
                    if e.returncode == 3:
                        # Some kind of failure to build the app. The reason
                        # for the failure is probably self-evident from the
                        # job log (and if it's not, the CalledProcessError
                        # is not informative anyway), so just propagate the
                        # return code without additional remarks.
                        sys.exit(3)
                    else:
                        raise e

            dxpy.DXJob(job_id).wait_on_done(interval=1)

            if mode == 'applet':
                applet_id, _ = dxpy.get_dxlink_ids(dxpy.api.job_describe(job_id)['output']['output_applet'])
                return applet_id
            else:
                # TODO: determine and return the app ID, to allow
                # running the app if args.run is specified
                return None
        finally:
            if not using_temp_project_for_remote_build:
                dxpy.DXProject(build_project_id).remove_objects([remote_file.get_id()])
    finally:
        if using_temp_project_for_remote_build:
            dxpy.api.project_destroy(build_project_id, {"terminateJobs": True})
        shutil.rmtree(temp_dir)
Example #18
0
def setUpTempProject(thing):
    thing.old_workspace_id = dxpy.WORKSPACE_ID
    thing.proj_id = dxpy.api.project_new({'name':
                                          'symlink test project'})['id']
    dxpy.set_workspace_id(thing.proj_id)
Example #19
0
def tearDownTempProject(thing):
    dxpy.api.project_destroy(thing.proj_id, {'terminateJobs': True})
    dxpy.set_workspace_id(thing.old_workspace_id)
 def setUpClass(cls):
     cls.project_id = dxpy.api.project_new({"name": "tab-completion project"})['id']
     os.environ['DX_PROJECT_CONTEXT_ID'] = cls.project_id
     dxpy.set_workspace_id(cls.project_id)
Example #21
0
def setUpTempProjects(thing):
    thing.old_workspace_id = dxpy.WORKSPACE_ID
    thing.proj_id = dxpy.api.project_new({'name': 'test project 1'})['id']
    thing.second_proj_id = dxpy.api.project_new({'name': 'test project 2'})['id']
    dxpy.set_workspace_id(thing.proj_id)
def main(token):
    # Configure dxpy authentication
    dxpy.set_security_context({
        'auth_token_type': 'Bearer',
        'auth_token': token
    })

    # Resolve FACTORY_PROJECT by ID
    proj = dxpy.DXProject(FACTORY_PROJECT)
    print 'Resolved project:', proj.describe()['name'], proj.get_id()

    # Set FACTORY_PROJECT as the workspace for subsequent operations
    # (sort of like the current working directory)
    dxpy.set_workspace_id(FACTORY_PROJECT)

    # Resolve the workflow by name. (Could also store ID like the project)
    wf = list(
        dxpy.search.find_data_objects(classname="workflow",
                                      name="RNA-seq pipeline",
                                      return_handler=True))[0]
    print 'Resolved workflow:', wf.describe()['name'], wf.get_id()

    # TODO: Stage the inputs. Here we find them in the IN folder
    left_reads = list(
        dxpy.search.find_data_objects(classname="file",
                                      name="ENCFF001JPX.1k.fastq.gz",
                                      folder="/IN",
                                      return_handler=True))[0]
    print 'Resolved left reads:', left_reads.describe(
    )['name'], left_reads.get_id()
    right_reads = list(
        dxpy.search.find_data_objects(classname="file",
                                      name="ENCFF001JQB.1k.fastq.gz",
                                      folder="/IN",
                                      return_handler=True))[0]
    print 'Resolved right reads:', right_reads.describe(
    )['name'], right_reads.get_id()

    # Launch the workflow
    analysis = wf.run({
        '0.fastqs': [dxpy.dxlink(left_reads.get_id())],
        '0.fastq_pairs': [dxpy.dxlink(right_reads.get_id())]
    })
    print 'Launched analysis:', analysis.get_id()
    print 'Analysis state:', analysis.describe()['state']

    # TODO: Poll for (or come back when) analysis state 'done' or 'failed'.
    # Handle any failures.

    # Cooking-show-style substitution with completed analysis
    analysis = dxpy.DXAnalysis(COMPLETED_ANALYSIS)
    print 'Analysis state:', analysis.describe()['state']

    # Enumerate outputs
    print 'Analysis outputs:'
    for one_output_name, one_output_link in analysis.describe(
    )['output'].iteritems():
        one_output = dxpy.get_handler(
            one_output_link)  # one_output : dxpy.DXFile
        one_file_name = one_output.describe()['name']
        one_file_url, _ = one_output.get_download_url(preauthenticated=True,
                                                      filename=one_file_name)
        print one_file_name, one_file_url
Example #23
0
def setUpTempProjects(thing):
    thing.old_workspace_id = dxpy.WORKSPACE_ID
    thing.proj_id = dxpy.api.project_new({'name': 'azure-test-project', 'region': testutil.TEST_AZURE})['id']
    dxpy.set_workspace_id(thing.proj_id)
Example #24
0
def _build_app_remote(mode, src_dir, destination=None, publish=False,
                      dx_toolkit_autodep="auto", version_override=None,
                      bill_to=None, version_autonumbering=True, update=True,
                      parallel_build=True):
    if mode == 'app':
        builder_app = 'app-tarball_app_builder'
    else:
        builder_app = 'app-tarball_applet_builder'

    temp_dir = tempfile.mkdtemp()

    # We have to resolve the correct dx-toolkit dependency type here and
    # explicitly pass it into the interior call of dx-build-app, because
    # within the execution environment of tarball_app(let)_builder,
    # APISERVER_HOST is set to the address of the proxy (a 10.x.x.x
    # address) and doesn't give us any information about whether we are
    # talking to preprod.
    if dx_toolkit_autodep == "auto":
        # "auto" (the default) means dx-toolkit (stable) on preprod and prod, and
        # dx-toolkit-beta on all other systems.
        if dxpy.APISERVER_HOST == "preprodapi.dnanexus.com" or dxpy.APISERVER_HOST == "api.dnanexus.com":
            dx_toolkit_autodep_flag = "--dx-toolkit-stable-autodep"
        else:
            dx_toolkit_autodep_flag = "--dx-toolkit-beta-autodep"
    elif dx_toolkit_autodep == "git":
        dx_toolkit_autodep_flag = "--dx-toolkit-legacy-git-autodep"
    elif dx_toolkit_autodep == "stable":
        dx_toolkit_autodep_flag = "--dx-toolkit-stable-autodep"
    elif dx_toolkit_autodep == "beta":
        dx_toolkit_autodep_flag = "--dx-toolkit-beta-autodep"
    elif dx_toolkit_autodep == "unstable":
        dx_toolkit_autodep_flag = "--dx-toolkit-unstable-autodep"
    elif dx_toolkit_autodep == False:
        dx_toolkit_autodep_flag = "--no-dx-toolkit-autodep"

    extra_flags = [dx_toolkit_autodep_flag]

    # These flags are basically passed through verbatim.
    if version_override:
        extra_flags.extend(['--version', version_override])
    if bill_to:
        extra_flags.extend(['--bill-to', bill_to])
    if not version_autonumbering:
        extra_flags.append('--no-version-autonumbering')
    if not update:
        extra_flags.append('--no-update')
    if not parallel_build:
        extra_flags.append('--no-parallel-build')

    using_temp_project_for_remote_build = False

    # If building an applet, run the builder app in the destination
    # project. If building an app, run the builder app in a temporary
    # project.
    dest_folder = None
    dest_applet_name = None
    if mode == "applet":
        # Translate the --destination flag as follows. If --destination
        # is PROJ:FOLDER/NAME,
        #
        # 1. Run the builder app in PROJ
        # 2. Make the output folder FOLDER
        # 3. Supply --destination=NAME to the interior call of dx-build-applet.
        build_project_id = dxpy.WORKSPACE_ID
        if destination:
            build_project_id, dest_folder, dest_applet_name = parse_destination(destination)
        if build_project_id is None:
            parser.error("Can't create an applet without specifying a destination project; please use the -d/--destination flag to explicitly specify a project")
        if dest_applet_name:
            # TODO: escape this correctly, or find a way of passing the
            # extra_flags that doesn't get screwed up by names that have
            # spaces in them.
            extra_flags.extend(['--destination', '/' + dest_applet_name])

    elif mode == "app":
        using_temp_project_for_remote_build = True
        build_project_id = dxpy.api.projectNew({"name": "dx-build-app --remote temporary project"})["id"]

    try:
        # Resolve relative paths and symlinks here so we have something
        # reasonable to write in the job name below.
        src_dir = os.path.realpath(src_dir)

        # Show the user some progress as the tarball is being generated.
        # Hopefully this will help them to understand when their tarball
        # is huge (e.g. the target directory already has a whole bunch
        # of binaries in it) and interrupt before uploading begins.
        app_tarball_file = os.path.join(temp_dir, "app_tarball.tar.gz")
        # TODO: figure out if we can use --exclude-vcs here (conditional
        # on presence of GNU tar). This might require propagating the
        # --version directly to the interior dx-build-app since in
        # general that can depend on the git metadata.
        tar_subprocess = subprocess.Popen(["tar", "-czf", "-", "."], cwd=src_dir, stdout=subprocess.PIPE)
        with open(app_tarball_file, 'w') as tar_output_file:
            total_num_bytes = 0
            last_console_update = 0
            start_time = time.time()
            printed_static_message = False
            # Pipe the output of tar into the output file, and
            while True:
                tar_exitcode = tar_subprocess.poll()
                data = tar_subprocess.stdout.read(4 * 1024 * 1024)
                if tar_exitcode is not None and len(data) == 0:
                    break
                tar_output_file.write(data)
                total_num_bytes += len(data)
                current_time = time.time()
                # Don't show status messages at all for very short tar
                # operations (< 1.0 sec)
                if current_time - last_console_update > 0.25 and current_time - start_time > 1.0:
                    if sys.stderr.isatty():
                        if last_console_update > 0:
                            sys.stderr.write("\r")
                        sys.stderr.write("Compressing target directory %s... (%s kb)" % (src_dir, locale.format("%d", (total_num_bytes / 1024,), grouping=True),))
                        sys.stderr.flush()
                        last_console_update = current_time
                    elif not printed_static_message:
                        # Print a message (once only) when stderr is not
                        # going to a live console
                        sys.stderr.write("Compressing target directory %s..." % (src_dir,))
                        printed_static_message = True

        if last_console_update > 0:
            sys.stderr.write("\n")
        if tar_exitcode != 0:
            raise Exception("tar exited with non-zero exit code " + str(tar_exitcode))

        dxpy.set_workspace_id(build_project_id)

        remote_file = dxpy.upload_local_file(app_tarball_file, media_type="application/gzip",
                                             wait_on_close=True, show_progress=True)

        try:
            extra_flags_str = " ".join(pipes.quote(s) for s in extra_flags)
            input_hash = {
                "input_file": dxpy.dxlink(remote_file),
                "extra_flags": extra_flags_str
                }
            if mode == 'app':
                input_hash["publish"] = publish
            api_options = {
                "name": "Remote build of %s" % (os.path.basename(src_dir),),
                "input": input_hash,
                "project": build_project_id,
                }
            if dest_folder:
                api_options["folder"] = dest_folder
            app_run_result = dxpy.api.appRun(builder_app, input_params=api_options)
            job_id = app_run_result["id"]
            print "Started builder job %s" % (job_id,)
            subprocess.check_call(["dx", "watch", job_id])
        finally:
            if not using_temp_project_for_remote_build:
                dxpy.DXProject(build_project_id).remove_objects([remote_file.get_id()])
    finally:
        if using_temp_project_for_remote_build:
            dxpy.api.projectDestroy(build_project_id)
        shutil.rmtree(temp_dir)

    return