Beispiel #1
0
def main(reorg_conf___=None, reorg_status___=None):

    job_describe = dxpy.describe(dxpy.JOB_ID)
    analysis_id = job_describe["analysis"]

    stages = dxpy.describe(analysis_id)["stages"]

    # key is the name of the output and the value is the link of the file.
    output_map = [
        x['execution']['output'] for x in stages if x['id'] == 'stage-outputs'
    ][0]
    print(output_map)

    output_file = list(output_map.get('output_file').values())
    output_config_file = list(output_map.get('output_config_file').values())

    output_folder_1 = '/tests/test_reorg/out_1'
    output_folder_2 = '/tests/test_reorg/out_2'

    dx_container = dxpy.DXProject(dxpy.PROJECT_CONTEXT_ID)

    dx_container.move(destination=output_folder_1, objects=output_file)
    dx_container.move(
        objects=output_config_file,
        destination=output_folder_2,
    )

    return {
        "outputs":
        [output_map.get('output_file'),
         output_map.get('output_config_file')]
    }
Beispiel #2
0
def job_2_app(job_id):
    try:
        app_id = dxpy.describe(job_id)['app']

    except KeyError:
        app_id = dxpy.describe(job_id)['applet']
    return app_id.strip()
Beispiel #3
0
def copy_files(fids, projectId, folder, overwrite=False):
    '''Copies array of dx file dicts to project:/folder, returning new array of dx file dicts.'''
    newFids = []
    for fid in fids:
        fileDict = dxpy.describe(FILES[fid]) # FILES contain dxLinks
        if fileDict['project'] == projectId:
            # cannot copy into the same project!!!
            # so just leave in place and pretend that we did!
            #proj = dxpy.DXProject(projectId)
            #proj.move(folder,[fid])
            newFids.append( fid )
            continue

        # Check to see if file already exists.
        alreadyThere = find_file(folder+'/'+fileDict['name'],projectId)
        if alreadyThere is None or overwrite:
            # remove what is alreadyThere?
            #if alreadyThere is not None:
            #    proj = dxpy.DXProject(projectId)
            #    proj.remove_objects([alreadyThere])
            dxFile = dxpy.get_handler(FILES[fid])
            newLink = dxpy.dxlink(dxFile.clone(projectId, folder))
        else:
            newLink = FILES(alreadyThere)
        if newLink == None:
            print "ERROR: Failed in copy of '" + fileDict['project'] + ":" + fileDict['name'] + \
                    "' to '" + projectId + ":" + folder + "'."
            sys.exit(1)
        newDict = dxpy.describe(newLink)
        FILES[newDict['id']] = newLink
        newFids.append( newDict['id'] )

    return newFids
Beispiel #4
0
    def connect(self):
        while True:
            self.error = False
            self.exception = None
            self.closed_code = None
            self.closed_reason = None

            try:
                self._app = WebSocketApp(
                    self.url,
                    on_open=self.opened,
                    on_close=self.closed,
                    on_error=self.errored,
                    on_message=self.received_message
                )
                self._app.run_forever(sslopt={"cert_reqs": ssl.CERT_NONE})
            except:
                if not self.server_restarted():
                    raise
            finally:
                self._app = None

            if self.server_restarted():
                # Instead of trying to reconnect in a retry loop with backoff, run an
                # API call that will do the same and block while it retries.
                logger.warn("Server restart, reconnecting...")
                time.sleep(1)
                dxpy.describe(self.job_id)
            else:
                break
Beispiel #5
0
def copy_files(fids, projectId, folder, overwrite=False):
    '''Copies array of dx file dicts to project:/folder, returning new array of dx file dicts.'''
    newFids = []
    for fid in fids:
        fileDict = dxpy.describe(FILES[fid])  # FILES contain dxLinks
        if fileDict['project'] == projectId:
            # cannot copy into the same project!!!
            # so just leave in place and pretend that we did!
            #proj = dxpy.DXProject(projectId)
            #proj.move(folder,[fid])
            newFids.append(fid)
            continue

        # Check to see if file already exists.
        alreadyThere = find_file(folder + '/' + fileDict['name'], projectId)
        if alreadyThere is None or overwrite:
            # remove what is alreadyThere?
            #if alreadyThere is not None:
            #    proj = dxpy.DXProject(projectId)
            #    proj.remove_objects([alreadyThere])
            dxFile = dxpy.get_handler(FILES[fid])
            newLink = dxpy.dxlink(dxFile.clone(projectId, folder))
        else:
            newLink = FILES(alreadyThere)
        if newLink == None:
            print "ERROR: Failed in copy of '" + fileDict['project'] + ":" + fileDict['name'] + \
                    "' to '" + projectId + ":" + folder + "'."
            sys.exit(1)
        newDict = dxpy.describe(newLink)
        FILES[newDict['id']] = newLink
        newFids.append(newDict['id'])

    return newFids
Beispiel #6
0
    def connect(self):
        while True:
            self.error = False
            self.exception = None
            self.closed_code = None
            self.closed_reason = None

            try:
                self._app = WebSocketApp(
                    self.url,
                    on_open=self.opened,
                    on_close=self.closed,
                    on_error=self.errored,
                    on_message=self.received_message
                )
                self._app.run_forever(sslopt={"cert_reqs": ssl.CERT_NONE})
            except:
                if not self.server_restarted():
                    raise
            finally:
                self._app = None

            if self.server_restarted():
                # Instead of trying to reconnect in a retry loop with backoff, run an
                # API call that will do the same and block while it retries.
                logger.warn("Server restart, reconnecting...")
                time.sleep(1)
                dxpy.describe(self.job_id)
            else:
                break
Beispiel #7
0
    def closed(self, code, reason):
        self.closed_code, self.closed_reason = code, reason

        if not (self.closed_code == 1000
                or getattr(self.stream.closing, 'code', None) == 1000):
            try:
                error = json.loads(self.closed_reason)
                raise DXJobLogStreamingException(
                    "Error while streaming job logs: {type}: {message}\n".
                    format(**error))
            except (KeyError, ValueError):
                error = "Error while streaming job logs: {code} {reason}\n".format(
                    code=self.closed_code, reason=self.closed_reason)
                raise DXJobLogStreamingException(error)
        elif self.print_job_info:
            if self.job_id not in self.seen_jobs:
                self.seen_jobs[self.job_id] = {}
            for job_id in self.seen_jobs.keys():
                self.seen_jobs[job_id] = dxpy.describe(job_id)
                print(
                    get_find_executions_string(self.seen_jobs[job_id],
                                               has_children=False,
                                               show_outputs=True))
        else:
            self.seen_jobs[self.job_id] = dxpy.describe(self.job_id)

        if self.seen_jobs[self.job_id].get('state') in [
                'failed', 'terminated'
        ]:
            err_exit(code=3)
Beispiel #8
0
 def reconnect(self):
     # Instead of trying to reconnect in a retry loop with backoff, run an API call that will do the same
     # and block while it retries.
     time.sleep(1)
     dxpy.describe(self.job_id)
     WebSocketBaseClient.__init__(self, self.url, protocols=None, extensions=None)
     self.connect()
Beispiel #9
0
def app_2_version(app_id):
    if app_id == 'job_not_found':
        return 'job_not_found'
    try:
        version = dxpy.describe(app_id)['version']
    except KeyError:
        version = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(dxpy.describe(app_id)['created']/1000.0))
    return version
Beispiel #10
0
def app_2_version(app_id):
    try:
        version = dxpy.describe(app_id)['version']
    except KeyError:
        version = time.strftime(
            '%Y-%m-%d %H:%M:%S',
            time.localtime(dxpy.describe(app_id)['created']))
    return version
Beispiel #11
0
def process(reads_file, reference_tar, bwa_aln_params, bwa_version):
    # reads_file, reference_tar should be links to file objects.
    # reference_tar should be a tar of files generated by bwa index and
    # the tar should be uncompressed to avoid repeating the decompression.

    print "In process"

    if bwa_version == "0.7.7":
        bwa = "bwa0.7.7"
    elif bwa_version == "0.7.10":
        bwa = "bwa0.7.10"
    else:
        bwa = "bwa0.7.7"
    print "Using bwa version %s" %(bwa_version)

    # Generate filename strings and download the files to the local filesystem
    reads_filename = dxpy.describe(reads_file)['name']
    reads_basename = reads_filename
    # the order of this list is important.  It strips from the right inward, so
    # the expected right-most extensions should appear first (like .gz)
    for extension in ['.gz', '.fq', '.fastq', '.fa', '.fasta']:
        reads_basename = reads_basename.rstrip(extension)
    reads_file = dxpy.download_dxfile(reads_file,reads_filename)

    reference_tar_filename = dxpy.describe(reference_tar)['name']
    reference_tar_file = dxpy.download_dxfile(reference_tar,reference_tar_filename)
    # extract the reference files from the tar
    if reference_tar_filename.endswith('.gz') or reference_tar_filename.endswith('.tgz'):
        tar_command = 'tar -xzv --no-same-owner --no-same-permissions -f %s' %(reference_tar_filename)
    else:
        tar_command = 'tar -xv --no-same-owner --no-same-permissions -f %s' %(reference_tar_filename)
    print "Unpacking %s" %(reference_tar_filename)
    print tar_command
    print subprocess.check_output(shlex.split(tar_command))
    reference_filename = resolve_reference()
    print "Using reference file: %s" %(reference_filename)

    print subprocess.check_output('ls -l', shell=True)

    #generate the suffix array index file
    sai_filename = '%s.sai' %(reads_basename)
    with open(sai_filename,'w') as sai_file:
        # Build the bwa command and call bwa
        bwa_command = "%s aln %s -t %d %s %s" \
            %(bwa, bwa_aln_params, cpu_count(), reference_filename, reads_filename)
        print bwa_command
        subprocess.check_call(shlex.split(bwa_command), stdout=sai_file) 

    print subprocess.check_output('ls -l', shell=True)

    # Upload the output to the DNAnexus project
    print "Uploading %s" %(sai_filename)
    sai_dxfile = dxpy.upload_local_file(sai_filename)
    process_output = { "output": dxpy.dxlink(sai_dxfile) }
    print "Returning from process:"
    print process_output
    return process_output
Beispiel #12
0
def job_2_app(job_id):
    if job_id == 'job_not_found':
        return 'job_not_found'
    try:
        app_id = dxpy.describe(job_id)['app']

    except KeyError:
        app_id = dxpy.describe(job_id)['applet']
    return app_id.strip()
Beispiel #13
0
def _check_suggestions(app_json, publish=False):
    """
    Examines the specified dxapp.json file and warns about any
    violations of suggestions guidelines.

    :raises: AppBuilderException for data objects that could not be found
    """
    for input_field in app_json.get('inputSpec', []):
        for suggestion in input_field.get('suggestions', []):
            if 'project' in suggestion:
                try:
                    project = dxpy.api.project_describe(suggestion['project'], {"permissions": True})
                    if 'PUBLIC' not in project['permissions'] and publish:
                        logger.warn('Project {name} NOT PUBLIC!'.format(name=project['name']))
                except dxpy.exceptions.DXAPIError as e:
                    if e.code == 404:
                        logger.warn('Suggested project {name} does not exist, or not accessible by user'.format(
                                     name=suggestion['project']))
                if 'path' in suggestion:
                    try:
                        check_folder_exists(suggestion['project'], suggestion['path'], '')
                    except ResolutionError as e:
                        logger.warn('Folder {path} could not be found in project {project}'.format(
                                     path=suggestion['path'], project=suggestion['project']))
            if '$dnanexus_link' in suggestion:
                if suggestion['$dnanexus_link'].startswith(('file-', 'record-', 'gtable-')):
                    try:
                        dnanexus_link = dxpy.describe(suggestion['$dnanexus_link'])
                    except dxpy.exceptions.DXAPIError as e:
                        if e.code == 404:
                            raise dxpy.app_builder.AppBuilderException(
                                'Suggested object {name} could not be found'.format(
                                    name=suggestion['$dnanexus_link']))
                    except Exception as e:
                        raise dxpy.app_builder.AppBuilderException(str(e))
            if 'value' in suggestion:
                if '$dnanexus_link' in suggestion['value']:
                    # Check if we have JSON or string
                    if isinstance(suggestion['value']['$dnanexus_link'], dict):
                        if 'project' in suggestion['value']['$dnanexus_link']:
                            try:
                                dxpy.api.project_describe(suggestion['value']['$dnanexus_link']['project'])
                            except dxpy.exceptions.DXAPIError as e:
                                if e.code == 404:
                                    logger.warn('Suggested project {name} does not exist, or not accessible by user'.format(
                                                 name=suggestion['value']['$dnanexus_link']['project']))
                    elif isinstance(suggestion['value']['$dnanexus_link'], basestring):
                        if suggestion['value']['$dnanexus_link'].startswith(('file-', 'record-', 'gtable-')):
                            try:
                                dnanexus_link = dxpy.describe(suggestion['value']['$dnanexus_link'])
                            except dxpy.exceptions.DXAPIError as e:
                                if e.code == 404:
                                    raise dxpy.app_builder.AppBuilderException(
                                        'Suggested object {name} could not be found'.format(
                                            name=suggestion['value']['$dnanexus_link']))
                            except Exception as e:
                                raise dxpy.app_builder.AppBuilderException(str(e))
Beispiel #14
0
 def reconnect(self):
     # Instead of trying to reconnect in a retry loop with backoff, run an API call that will do the same
     # and block while it retries.
     time.sleep(1)
     dxpy.describe(self.job_id)
     WebSocketBaseClient.__init__(self,
                                  self.url,
                                  protocols=None,
                                  extensions=None)
     self.connect()
Beispiel #15
0
def get_mapping_analysis(bam):
    try:
        job_alias = next(detail['dx_job_id']
                         for detail in bam['step_run']['dx_applet_details'])
    except:
        logging.error('Failed to find step_run.dx_applet_details in bam %s' %
                      (bam.get('accession')))
        raise
    job_id = re.findall('job-\w*', job_alias)[0]
    analysis_id = dxpy.describe(job_id)['parentAnalysis']
    return dxpy.describe(analysis_id)
def get_mapping_analysis(bam):
    try:
        job_alias = next(detail['dx_job_id'] for detail
                         in bam['step_run']['dx_applet_details'])
    except:
        logging.error(
            'Failed to find step_run.dx_applet_details in bam %s'
            % (bam.get('accession')))
        raise
    job_id = re.findall('job-\w*', job_alias)[0]
    analysis_id = dxpy.describe(job_id)['parentAnalysis']
    return dxpy.describe(analysis_id)
Beispiel #17
0
def BuildPindelCommand(kwargs, chrom, input_fn, is_pindel_input_type=False):
    # Download Reference FASTA
    reference_fasta_id = kwargs["reference_fasta"]
    ref_fn = DownloadRefFasta(reference_fasta_id)
    
    create_index = True
    if "fasta_index" in kwargs:
        fasta_idx_id = kwargs["fasta_index"]
        if dxpy.describe(fasta_idx_id)["name"].rstrip(".fa.fai") ==  dxpy.describe(reference_fasta_id)["name"].rstrip(".fa"):
            dxpy.download_dxfile(fasta_idx_id, ref_fn+".fai")
            create_index = False
    if create_index: 
        print "No FASTA index was provided as input. Making one now."
        samtools_command = "samtools faidx {fasta}".format(fasta=ref_fn)
        subprocess.check_call(samtools_command, shell=True)
    
    print "\nBuilding pindel command from app inputs"
    command_args = ["pindel"]    
    output_path = "output/" + kwargs["output_prefix"]
    command_args.append("-o {output_path}".format(output_path=output_path))
    command_args.append("-f {fa}".format(fa=ref_fn))
    
    if is_pindel_input_type: 
        command_args.append("-P {pindel_config}".format(pindel_config=input_fn))
    else:
        command_args.append("-i {bam_config}".format(bam_config=input_fn))
        
    command_args.append("-T {option}".format(option=kwargs["num_threads_per_instance"]))
    command_args.append("-c {chrom}".format(chrom=chrom))
    
    if kwargs["report_only_close_mapped_reads"]:
        command_args.append("-S {option}".format(option=kwargs["report_only_close_mapped_reads"]))
    else:
        command_args.append("-r {option}".format(option=kwargs["report_inversions"]))
        command_args.append("-t {option}".format(option=kwargs["report_duplications"]))
        command_args.append("-l {option}".format(option=kwargs["report_long_insertions"]))
        command_args.append("-k {option}".format(option=kwargs["report_breakpoints"]))
        command_args.append("-s {option}".format(option=kwargs["report_close_mapped_reads"]))     

    if "breakdancer_calls_file" in kwargs:
        breakdancer_fn = DownloadFilesFromArray([kwargs["breakdancer_calls_file"]["$dnanexus_link"]])[0]
        print breakdancer_fn
        command_args.append("-b {option}".format(option=breakdancer_fn))
            
    if "pindel_command_line" in kwargs:
        advanced_command = kwargs["pindel_command_line"]
        if advanced_command.startswith("pindel"):
            advanced_command = advanced_command.replace("pindel", "")
        command_args.append(advanced_command)
 
    command = " ".join(command_args)
    print command
    return command, output_path
Beispiel #18
0
def process(reads_file, reference_tar, bwa_aln_params, bwa_version):
    # reads_file, reference_tar should be links to file objects.
    # reference_tar should be a tar of files generated by bwa index and
    # the tar should be uncompressed to avoid repeating the decompression.

    print "In process"

    if bwa_version == "0.7.7":
        bwa = "bwa0.7.7"
    elif bwa_version == "0.7.10":
        bwa = "bwa0.7.10"
    else:
        bwa = "bwa0.7.7"
    print "Using bwa version %s" %(bwa_version)

    # Generate filename strings and download the files to the local filesystem
    reads_filename = dxpy.describe(reads_file)['name']
    reads_basename = reads_filename.rstrip('.gz').rstrip('.fq').rstrip('.fastq')
    reads_file = dxpy.download_dxfile(reads_file,reads_filename)

    reference_tar_filename = dxpy.describe(reference_tar)['name']
    reference_tar_file = dxpy.download_dxfile(reference_tar,reference_tar_filename)
    # extract the reference files from the tar
    if reference_tar_filename.endswith('.gz'):
        tar_command = 'tar -xzvf %s' %(reference_tar_filename)
    else:
        tar_command = 'tar -xvf %s' %(reference_tar_filename)
    print "Unpacking %s" %(reference_tar_filename)
    print subprocess.check_output(shlex.split(tar_command))
    # assume the reference file is the only .fa file
    reference_filename = subprocess.check_output('ls *.fna', shell=True).rstrip()

    print subprocess.check_output('ls -l', shell=True)

    #generate the suffix array index file
    sai_filename = '%s.sai' %(reads_basename)
    with open(sai_filename,'w') as sai_file:
        # Build the bwa command and call bwa
        bwa_command = "%s aln %s -t %d %s %s" \
            %(bwa, bwa_aln_params, cpu_count(), reference_filename, reads_filename)
        print bwa_command
        subprocess.check_call(shlex.split(bwa_command), stdout=sai_file) 

    print subprocess.check_output('ls -l', shell=True)

    # Upload the output to the DNAnexus project
    print "Uploading %s" %(sai_filename)
    sai_dxfile = dxpy.upload_local_file(sai_filename)
    process_output = { "output": dxpy.dxlink(sai_dxfile) }
    print "Returning from process:"
    print process_output
    return process_output
Beispiel #19
0
def main(reorg_conf___=None, reorg_status___=None):  # pylint: disable=unused-argument

    # find the output stage of the current analysis
    analysis_id = dxpy.describe(dxpy.JOB_ID)["analysis"]
    stages = dxpy.describe(analysis_id)["stages"]

    # retrieve the dictionary containing outputs
    output_map = [
        x["execution"]["output"] for x in stages if x["id"] == "stage-outputs"
    ][0]
    folder_location = dxpy.describe(analysis_id)["folder"]

    project_container = dxpy.DXProject(dxpy.PROJECT_CONTEXT_ID)

    # move required outputfiles to their preferred permanent folders
    for file_identifiers in output_map.values():
        if isinstance(file_identifiers, (list, tuple)):
            for indvfile in file_identifiers:
                try:
                    default_location = dxpy.describe(
                        dxpy.describe(indvfile["$dnanexus_link"])["createdBy"]
                        ["job"])["runInput"]["default_location"]
                    folder = folder_location + "/" + default_location
                except:
                    folder = folder_location
                project_container.new_folder(folder, parents=True)

                file_container = dxpy.describe(
                    indvfile["$dnanexus_link"])["project"]
                file_object = dxpy.bindings.DXFile(indvfile["$dnanexus_link"],
                                                   project=file_container)
                if file_container == dxpy.PROJECT_CONTEXT_ID:
                    file_object.move(folder)
                else:
                    cloned_file = file_object.clone(  # pylint: disable=unused-variable
                        dxpy.PROJECT_CONTEXT_ID,
                        folder=folder)
        elif isinstance(file_identifiers, dict):
            if '$dnanexus_link' in file_identifiers:
                try:
                    default_location = dxpy.describe(
                        dxpy.describe(file_identifiers["$dnanexus_link"])
                        ["createdBy"]["job"])["runInput"]["default_location"]
                    folder = folder_location + "/" + default_location
                except:
                    folder = folder_location
                project_container.new_folder(folder, parents=True)

                file_container = dxpy.describe(
                    file_identifiers["$dnanexus_link"])["project"]
                file_object = dxpy.bindings.DXFile(
                    file_identifiers["$dnanexus_link"], project=file_container)
                if file_container == dxpy.PROJECT_CONTEXT_ID:
                    file_object.move(folder)
                else:
                    cloned_file = file_object.clone(dxpy.PROJECT_CONTEXT_ID,
                                                    folder=folder)
Beispiel #20
0
 def test_dx_project_tagging(self):
     the_tags = [u"$my.tag", u"secoиdtag", u"тhird тagggg"]
     # tag
     run(u"dx tag : \\" + the_tags[0] + u" " + the_tags[1] + u" '" + the_tags[2] + u"'")
     mytags = dxpy.describe(self.project)['tags']
     for tag in the_tags:
         self.assertIn(tag, mytags)
     # untag
     run(u"dx untag : \\" + the_tags[0] + u" '" + the_tags[2] + u"'")
     mytags = dxpy.describe(self.project)['tags']
     self.assertIn(the_tags[1], mytags)
     for tag in [the_tags[0], the_tags[2]]:
         self.assertNotIn(tag, mytags)
def scatter(orig_reads, split_size):
    # Fill in code here to do whatever is necessary to scatter the
    # input.
    if DEBUG:
        logger.setLevel(logging.DEBUG)
    else:
        logger.setLevel(logging.INFO)

    splitsize = split_size * 1000000 * 4
    # each FQ read is 4 lines
    os.mkdir('splits')

    for f in orig_reads:
        reads_filename = dxpy.describe(f)['name']
        reads_basename = strip_extensions(reads_filename, STRIP_EXTENSIONS)
        dxpy.download_dxfile(dxpy.DXFile(f).get_id(), reads_filename)

        reads_root_name = simplify_name() or reads_basename

        logger.info('* RUNNING /bin/zcat %s | /usr/bin/split -l %d -d - %s ' % (reads_filename, splitsize, 'splits/' + reads_root_name))
        split_out = subprocess.check_output('/bin/zcat %s | /usr/bin/split -l %d -d - %s ' % (reads_filename, splitsize, 'splits/' + reads_root_name), shell=True)
        # can't shlex because of |

    logger.info(split_out)
    splits = os.listdir('splits')
    logger.info("* Return from scatter: %s *" % splits)

    # SHould we gzip here?
    return {
        "array_of_scattered_input": [ 
            dxpy.dxlink(dxpy.upload_local_file('splits/' + split_file)) for split_file in splits]
        }
Beispiel #22
0
def ExportVCF(kwargs, output_path, ref_fn):
    ref_name_version = dxpy.describe(kwargs["reference_fasta"])["name"]
    ref_name_version = ref_name_version.rstrip(".fa")
    vcf_out_fn = kwargs["output_prefix"] + '.pindel.vcf'

    command_args = ["pindel2vcf"]
    command_args.append("-r {input}".format(input=ref_fn))
    command_args.append("-P {input}".format(input=output_path))
    command_args.append("-v {input}".format(input=vcf_out_fn))
    if kwargs["vcf_gatk_compatible"]:
        command_args.append("-G")

    if "export_vcf_advanced_options" in kwargs:
        command_args.append(kwargs["export_vcf_advanced_options"])
    else:
        ref_date = str(datetime.date.today())
        command_args.append("-R {input}".format(input=ref_name_version))
        command_args.append("-d ''")

    try:
        vcf_command = " ".join(command_args)
        print "Executing: " + vcf_command
        print subprocess.check_output(vcf_command,
                                      stderr=subprocess.STDOUT,
                                      shell=True)
    except subprocess.CalledProcessError, e:
        print e
        print e.output
        raise dxpy.AppError(
            "APP ERROR: App was not able to convert pindel to vcf. Please check pindel2vcf inputs"
        )
def start_time(job_id):
    if job_id == 'job_not_found':
        return 'job_not_found'
    epoch_time = dxpy.describe(job_id)['startedRunning']
    startedRunning = time.strftime('%Y-%m-%d %H:%M:%S',
                                   time.localtime(epoch_time / 1000.0))
    return startedRunning
def merge_map_reports(map_report_set, target_root):
    '''Merges techrep map_reports.'''

    # Working on map_reports now
    all_reports=""
    biorep_map_report = target_root + '_map_report.txt'
    append_line("### Combined Bismark map report for several technical replicates ###\n",biorep_map_report)
    for techrep_map_report_dlink in map_report_set:
        file_desc = dxpy.describe(techrep_map_report_dlink)
        file_root = file_desc['name']
        file_root = file_root.replace('_techrep_bismark_map_report.txt','') 
        file_root = file_root.replace('_bismark_map_report.txt','') 
        file_root = file_root.replace('_map_report.txt','')
        techrep_map_report = file_root + '_techrep_map_report.txt' 
        append_line("###################################",biorep_map_report)
        append_line("### Map report for ${file_root} ###",biorep_map_report)
        print "* Downloading %s_techrep_bismark_map_report.txt file..." % file_root
        dxpy.download_dxfile(techrep_map_report_dlink, techrep_map_report)
        run_cmd('cat ' + techrep_map_report, out=biorep_map_report,append=True)
        if len(all_reports) == 0:
            all_reports = techrep_map_report
        else:
            all_reports += ',' + techrep_map_report
        
    if all_reports == techrep_map_report: # only one
        run_cmd('mv %s %s' % (techrep_map_report,biorep_map_report) )
        all_reports = biorep_map_report
        
    return (biorep_map_report,all_reports)
def merge_map_reports(map_report_set, target_root):
    """Merges techrep map_reports."""

    # Working on map_reports now
    all_reports = ""
    biorep_map_report = target_root + "_map_report.txt"
    append_line("### Combined Bismark map report for several technical replicates ###\n", biorep_map_report)
    for techrep_map_report_dlink in map_report_set:
        file_desc = dxpy.describe(techrep_map_report_dlink)
        file_root = file_desc["name"]
        file_root = file_root.replace("_techrep_bismark_map_report.txt", "")
        file_root = file_root.replace("_bismark_map_report.txt", "")
        file_root = file_root.replace("_map_report.txt", "")
        techrep_map_report = file_root + "_techrep_map_report.txt"
        append_line("###################################", biorep_map_report)
        append_line("### Map report for ${file_root} ###", biorep_map_report)
        print "* Downloading %s_techrep_bismark_map_report.txt file..." % file_root
        dxpy.download_dxfile(techrep_map_report_dlink, techrep_map_report)
        run_cmd("cat " + techrep_map_report, out=biorep_map_report, append=True)
        if len(all_reports) == 0:
            all_reports = techrep_map_report
        else:
            all_reports += "," + techrep_map_report

    if all_reports == techrep_map_report:  # only one
        run_cmd("mv %s %s" % (techrep_map_report, biorep_map_report))
        all_reports = biorep_map_report

    return (biorep_map_report, all_reports)
 def test_set_assetbundle_tarball_property(self):
     asset_spec = {
         "name": "tarball_property_assetbundle",
         "title": "A human readable name",
         "description": "A detailed description about the asset",
         "version": "0.0.1",
         "distribution": "Ubuntu",
         "release": "12.04"
     }
     asset_dir = self.write_asset_directory("set_tarball_property", json.dumps(asset_spec))
     asset_bundle_id = json.loads(run('dx build_asset --json ' + asset_dir))['id']
     self.assertIn('record', asset_bundle_id)
     tarball_file_id = dxpy.describe(asset_bundle_id,
                                     fields={"details"})["details"]["archiveFileId"]["$dnanexus_link"]
     self.assertEqual(dxpy.describe(tarball_file_id,
                                    fields={"properties"})["properties"]["AssetBundle"], asset_bundle_id)
Beispiel #27
0
def main():
    cmnd = get_args()

    ## resolve projects
    project = dxencode.resolve_project(PROJECT_NAME)
    print 'Project: ' + project.describe()['name']
    pid =  project.get_id()

    counts = {}
    n = 0
    summaries = dxpy.find_data_objects(classname='file', folder='/runs', name='*_summary.txt', recurse=True, name_mode='glob', project=pid, return_handler=False)
    while summaries:
        try:
            flink = dxpy.dxlink(summaries.next())
            n = n+1
        except StopIteration:
            break
        fd = dxpy.describe(flink)
        fn = "fastqc/%s" % fd['name']
        if not os.path.isfile(fn):
            print 'Downloading: %s from %s' % (fn, fd['folder'])
            try:
                dxpy.download_dxfile(flink, fn)
            except Exception, e:
                print "Error %s" % e

        parse_summary(fn, counts)
Beispiel #28
0
    def received_message(self, message):
        message_dict = json.loads(message)

        if (
            self.print_job_info and
            'job' in message_dict and
            message_dict['job'] not in self.seen_jobs
        ):
            self.seen_jobs[message_dict['job']] = dxpy.describe(message_dict['job'])
            print(
                get_find_executions_string(
                    self.seen_jobs[message_dict['job']],
                    has_children=False,
                    show_outputs=False
                )
            )

        if (
            message_dict.get('source') == 'SYSTEM' and
            message_dict.get('msg') == 'END_LOG'
        ):
            self._app.keep_running = False
        elif self.msg_callback:
            self.msg_callback(message_dict)
        else:
            print(self.msg_output_format.format(**message_dict))
def process(fastq):
    # Change the following to process whatever input this stage
    # receives.  You may also want to copy and paste the logic to download
    # and upload files here as well if this stage receives file input
    # and/or makes file output.

    print fastq
    reads_filename = dxpy.describe(fastq)["name"]
    reads_basename = reads_filename.rstrip(".gz").rstrip(".fq").rstrip(".fastq")
    reads_file = dxpy.download_dxfile(fastq, "fastq.gz")

    subprocess.check_call(["mkdir", "output"])
    print "Run QC"
    fqc_command = "/usr/bin/FastQC/fastqc fastq.gz -o output"
    print fqc_command
    stdio = subprocess.check_output(shlex.split(fqc_command))
    print stdio
    print subprocess.check_output(["ls", "-l", "output"])
    subprocess.check_call(["unzip", "output/fastq_fastqc.zip"])
    print "Upload results"
    subprocess.check_call(["mv", "fastq_fastqc/fastqc_data.txt", "%s_data.txt" % reads_basename])
    subprocess.check_call(["mv", "fastq_fastqc/summary.txt", "%s_summary.txt" % reads_basename])
    subprocess.check_call(["mv", "output/fastq_fastqc.zip", "%s_fastqc.zip" % reads_basename])
    report_dxfile = dxpy.upload_local_file("%s_data.txt" % reads_basename)
    summary_dxfile = dxpy.upload_local_file("%s_summary.txt" % reads_basename)
    zip_dxfile = dxpy.upload_local_file("%s_fastqc.zip" % reads_basename)
    print report_dxfile
    return {"report": report_dxfile, "summary": summary_dxfile, "zip": zip_dxfile}
Beispiel #30
0
def main():
    cmnd = get_args()

    ## resolve projects
    project = dxencode.resolve_project(PROJECT_NAME)
    print 'Project: ' + project.describe()['name']
    pid = project.get_id()

    counts = {}
    n = 0
    summaries = dxpy.find_data_objects(classname='file',
                                       folder='/runs',
                                       name='*_summary.txt',
                                       recurse=True,
                                       name_mode='glob',
                                       project=pid,
                                       return_handler=False)
    while summaries:
        try:
            flink = dxpy.dxlink(summaries.next())
            n = n + 1
        except StopIteration:
            break
        fd = dxpy.describe(flink)
        fn = "fastqc/%s" % fd['name']
        if not os.path.isfile(fn):
            print 'Downloading: %s from %s' % (fn, fd['folder'])
            try:
                dxpy.download_dxfile(flink, fn)
            except Exception, e:
                print "Error %s" % e

        parse_summary(fn, counts)
def flagstat_parse(dxlink):
	desc = dxpy.describe(dxlink)
	with dxpy.DXFile(desc['id'], mode='r') as flagstat_file:
		if not flagstat_file:
			return None

	qc_dict = { #values are regular expressions, will be replaced with scores [hiq, lowq]
		'in_total': 'in total',
		'duplicates': 'duplicates',
		'mapped': 'mapped',
		'paired_in_sequencing': 'paired in sequencing',
		'read1': 'read1',
		'read2': 'read2',
		'properly_paired': 'properly paired',
		'with_self_mate_mapped': 'with itself and mate mapped',
		'singletons': 'singletons',
		'mate_mapped_different_chr': 'with mate mapped to a different chr$', #i.e. at the end of the line
		'mate_mapped_different_chr_hiQ': 'with mate mapped to a different chr \(mapQ>=5\)' #RE so must escape
	}
	flagstat_lines = flagstat_file.read().splitlines()
	for (qc_key, qc_pattern) in qc_dict.items():
		qc_metrics = next(re.split(qc_pattern, line) for line in flagstat_lines if re.search(qc_pattern, line))
		(hiq, lowq) = qc_metrics[0].split(' + ')
		qc_dict[qc_key] = [int(hiq.rstrip()), int(lowq.rstrip())]

	return qc_dict
Beispiel #32
0
def main():
    args = get_args()
    first_analysis = True

    for (i, analysis_id) in enumerate(args.infile):
        analysis_id = analysis_id.strip()
        try:
            analysis = dxpy.describe(analysis_id)
        except:
            print "Invalid analysis ID %s. Skipping." % (analysis_id)
            continue

        experiment_m = re.match('^(ENCSR[0-9]{3}[A-Z]{3}) Peaks',
                                analysis['name'])
        experiment_accession = experiment_m.group(1)
        if not experiment_m:
            print "No accession in %s, skipping." % (analysis['name'])
            continue

        if args.pipeline:
            pipeline = args.pipeline
        elif analysis['executableName'] == 'histone_chip_seq':
            pipeline = 'histone'
        elif analysis['executableName'] == 'tf_chip_seq':
            pipeline = 'tf'

        if pipeline == 'histone':
            histone(args, analysis, experiment_accession, first_analysis)
        elif pipeline == 'tf':
            tf(args, analysis, experiment_accession, first_analysis)
        else:
            print "Unrecognized pipeline: %s, skipping." % (pipeline)
            continue

        first_analysis = False
def main():
	args = get_args()
	first_analysis = True

	for (i, analysis_id) in enumerate(args.infile):
		analysis_id = analysis_id.strip()
		try:
			analysis = dxpy.describe(analysis_id)
		except:
			print "Invalid analysis ID %s. Skipping." %(analysis_id)
			continue

		if args.pipeline:
			if args.pipeline == 'histone':
				histone_m = re.match('^(ENCSR[0-9]{3}[A-Z]{3}) Peaks',analysis['executableName'])
				tf_m = None
			elif args.pipeline == 'tf':
				tf_m = re.match('^(ENCSR[0-9]{3}[A-Z]{3}) Peaks',analysis['name'])
				histone_m = None
		else:
			histone_m = re.match('^(ENCSR[0-9]{3}[A-Z]{3}) Peaks',analysis['executableName'])
			tf_m = re.match('^(ENCSR[0-9]{3}[A-Z]{3}) Peaks',analysis['name'])
		if histone_m:
			experiment_accession = histone_m.group(1)
			histone(args, analysis, experiment_accession, first_analysis)
		elif tf_m:
			experiment_accession = tf_m.group(1)
			tf(args, analysis, experiment_accession, first_analysis)
		else:
			print "No accession in %s, skipping." %(analysis['executableName'])
			continue

		first_analysis = False
Beispiel #34
0
    def received_message(self, message):
        message = json.loads(message.__unicode__())

        if "job" in message and "level" in message and "line" in message:
            last_line = self.last_seen_log_lines[message["job"]].get(
                message["level"], 0)
            if last_line < message["line"]:
                self.last_seen_log_lines[message["job"]][
                    message["level"]] = message["line"]
                if self.skipped_messages > 0:
                    logger.warn("Skipped {} seen messages".format(
                        self.skipped_messages))
                    self.skipped_messages = 0
            else:
                self.skipped_messages += 1
                return

        if self.print_job_info and 'job' in message and message[
                'job'] not in self.seen_jobs:
            self.seen_jobs[message['job']] = dxpy.describe(message['job'])
            print(
                get_find_executions_string(self.seen_jobs[message['job']],
                                           has_children=False,
                                           show_outputs=False))

        if message.get('source') == 'SYSTEM' and message.get(
                'msg') == 'END_LOG':
            self.close()
        elif self.msg_callback:
            self.msg_callback(message)
        else:
            print(self.msg_output_format.format(**message))
Beispiel #35
0
 def test_build_asset_with_valid_destination(self):
     asset_spec = {
         "name": "foo",
         "title": "A human readable name",
         "description": "A detailed description about the asset",
         "version": "0.0.1",
         "distribution": "Ubuntu",
         "release": "14.04",
         "execDepends": [{
             "name": "python-numpy"
         }]
     }
     asset_dir = self.write_asset_directory("asset_with_valid_destination",
                                            json.dumps(asset_spec))
     with testutil.temporary_project() as other_project:
         test_dirname = 'asset_dir'
         run('dx mkdir -p {project}:{dirname}'.format(
             project=other_project.get_id(), dirname=test_dirname))
         asset_bundle_id = json.loads(
             run('dx build_asset --json --destination ' +
                 other_project.get_id() + ':/' + test_dirname + '/ ' +
                 asset_dir))['id']
         self.assertIn('record', asset_bundle_id)
         asset_desc = dxpy.describe(asset_bundle_id)
         self.assertEqual(asset_desc['project'], other_project.get_id())
         self.assertEqual(asset_desc['folder'], '/asset_dir')
def main():
    args = get_args()
    first_analysis = True

    for (i, analysis_id) in enumerate(args.infile):
        analysis_id = analysis_id.strip()
        try:
            analysis = dxpy.describe(analysis_id)
        except:
            print "Invalid analysis ID %s. Skipping." % (analysis_id)
            continue

        experiment_m = re.match('^(ENCSR[0-9]{3}[A-Z]{3}) Peaks', analysis['name'])
        experiment_accession = experiment_m.group(1)
        if not experiment_m:
            print "No accession in %s, skipping." % (analysis['name'])
            continue

        if args.pipeline:
            pipeline = args.pipeline
        elif analysis['executableName'] == 'histone_chip_seq':
            pipeline = 'histone'
        elif analysis['executableName'] == 'tf_chip_seq':
            pipeline = 'tf'

        if pipeline == 'histone':
            histone(args, analysis, experiment_accession, first_analysis)
        elif pipeline == 'tf':
            tf(args, analysis, experiment_accession, first_analysis)
        else:
            print "Unrecognized pipeline: %s, skipping." % (pipeline)
            continue

        first_analysis = False
def get_notebook_app_versions():
    """
    Get the valid version numbers of the notebook app.
    """
    notebook_apps = dxpy.find_apps(name=NOTEBOOK_APP, all_versions=True)
    versions = [str(dxpy.describe(app['id'])['version']) for app in notebook_apps]
    return versions
Beispiel #38
0
def build(project, folder, version_id, top_dir, path_dict):
    asset = find_asset(project, folder)
    if asset is None:
        # get a copy of the dxfuse executable
        _add_dxfuse_to_resources(top_dir)

        # Create a configuration file
        _gen_config_file(version_id, top_dir, path_dict)
        jar_path = _sbt_assembly(top_dir, version_id)

        # get a copy of the download agent (dxda)
        _download_dxda_into_resources(top_dir)

        make_prerequisits(project, folder, version_id, top_dir)
        asset = find_asset(project, folder)

        # Move the file to the top level directory
        all_in_one_jar = os.path.join(top_dir,
                                      "dxWDL-{}.jar".format(version_id))
        shutil.move(os.path.join(top_dir, jar_path), all_in_one_jar)

    region = dxpy.describe(project.get_id())['region']
    ad = AssetDesc(region, asset.get_id(), project)

    # Hygiene, remove the new configuration file, we
    # don't want it to leak into the next build cycle.
    # os.remove(crnt_conf_path)
    return ad
Beispiel #39
0
def merge_map_reports(map_report_set, target_root):
    '''Merges techrep map_reports.'''

    # Working on map_reports now
    all_reports = ""
    biorep_map_report = target_root + '_map_report.txt'
    append_line(
        "### Combined Bismark map report for several technical replicates ###\n",
        biorep_map_report)
    for techrep_map_report_dlink in map_report_set:
        file_desc = dxpy.describe(techrep_map_report_dlink)
        file_root = file_desc['name']
        file_root = file_root.replace('_techrep_bismark_map_report.txt', '')
        file_root = file_root.replace('_bismark_map_report.txt', '')
        file_root = file_root.replace('_map_report.txt', '')
        techrep_map_report = file_root + '_techrep_map_report.txt'
        append_line("###################################", biorep_map_report)
        append_line("### Map report for ${file_root} ###", biorep_map_report)
        print "* Downloading %s_techrep_bismark_map_report.txt file..." % file_root
        dxpy.download_dxfile(techrep_map_report_dlink, techrep_map_report)
        run_cmd('cat ' + techrep_map_report,
                out=biorep_map_report,
                append=True)
        if len(all_reports) == 0:
            all_reports = techrep_map_report
        else:
            all_reports += ',' + techrep_map_report

    if all_reports == techrep_map_report:  # only one
        run_cmd('mv %s %s' % (techrep_map_report, biorep_map_report))
        all_reports = biorep_map_report

    return (biorep_map_report, all_reports)
Beispiel #40
0
def ExportVCF(kwargs, output_path, ref_fn):
    ref_name_version = dxpy.describe(kwargs["reference_fasta"])["name"]
    ref_name_version = ref_name_version.rstrip(".fa")
    vcf_out_fn = kwargs["output_prefix"] + '.pindel.vcf'
    
    command_args = ["pindel2vcf"]
    command_args.append("-r {input}".format(input=ref_fn))
    command_args.append("-P {input}".format(input=output_path))
    command_args.append("-v {input}".format(input=vcf_out_fn))
    if kwargs["vcf_gatk_compatible"]:
        command_args.append("-G")  

    if "export_vcf_advanced_options" in kwargs: 
        command_args.append(kwargs["export_vcf_advanced_options"])
    else: 
        ref_date = str(datetime.date.today())
        command_args.append("-R {input}".format(input=ref_name_version))
        command_args.append("-d ''")

    try:
        vcf_command = " ".join(command_args)
        print "Executing: " + vcf_command
        print subprocess.check_output(vcf_command, stderr=subprocess.STDOUT, shell=True)
    except subprocess.CalledProcessError, e: 
        print e
        print e.output
        raise dxpy.AppError("APP ERROR: App was not able to convert pindel to vcf. Please check pindel2vcf inputs")
Beispiel #41
0
    def received_message(self, message):
        message_dict = json.loads(message)

        if (
            self.print_job_info and
            'job' in message_dict and
            message_dict['job'] not in self.seen_jobs
        ):
            self.seen_jobs[message_dict['job']] = dxpy.describe(message_dict['job'])
            print(
                get_find_executions_string(
                    self.seen_jobs[message_dict['job']],
                    has_children=False,
                    show_outputs=False
                )
            )

        if (
            message_dict.get('source') == 'SYSTEM' and
            message_dict.get('msg') == 'END_LOG'
        ):
            self._app.keep_running = False
        elif self.msg_callback:
            self.msg_callback(message_dict)
        else:
            print(self.msg_output_format.format(**message_dict))
Beispiel #42
0
    def closed(self, code=None, reason=None):
        if code:
            self.closed_code = code
            self.closed_reason = reason
        elif not self.error:
            self.closed_code = 1000
            self.closed_reason = "Normal"
        elif self.exception and type(self.exception) in {KeyboardInterrupt, SystemExit}:
            self.closed_code = 1000
            self.closed_reason = "Connection terminated by client"
        else:
            self.closed_code = 1006
            self.closed_reason = str(self.exception) if self.exception else "Abnormal"

        if self.closed_code != 1000:
            try:
                error = json.loads(self.closed_reason)
                raise DXJobLogStreamingException(
                    "Error while streaming job logs: {type}: {message}\n".format(
                        **error
                    )
                )
            except (KeyError, ValueError):
                raise DXJobLogStreamingException(
                    "Error while streaming job logs: {code}: {reason}\n".format(
                        code=self.closed_code, reason=self.closed_reason
                    )
                )
        elif self.print_job_info:
            if self.job_id not in self.seen_jobs:
                self.seen_jobs[self.job_id] = {}
            for job_id in self.seen_jobs.keys():
                self.seen_jobs[job_id] = dxpy.describe(job_id)
                print(
                    get_find_executions_string(
                        self.seen_jobs[job_id],
                        has_children=False,
                        show_outputs=True
                    )
                )
        else:
            self.seen_jobs[self.job_id] = dxpy.describe(self.job_id)

        if (self.exit_on_failed
                and self.seen_jobs[self.job_id].get('state') in {'failed', 'terminated'}):
            err_exit(code=3)
 def test_build_asset_with_valid_dxasset(self):
     asset_spec = {
         "name": "asset_library_name",
         "title": "A human readable name",
         "description": "A detailed description about the asset",
         "version": "0.0.1",
         "distribution": "Ubuntu",
         "release": "12.04",
         "instanceType": "mem1_ssd1_x2",
         "execDepends": [{"name": "python-numpy"}]
     }
     asset_dir = self.write_asset_directory("asset_with_valid_json", json.dumps(asset_spec))
     asset_bundle_id = json.loads(run('dx build_asset --json ' + asset_dir))['id']
     self.assertIn('record', asset_bundle_id)
     self.assertEqual(dxpy.describe(asset_bundle_id)['project'], self.project)
     job_id = dxpy.describe(asset_bundle_id)['createdBy']['job']
     self.assertEqual(dxpy.describe(job_id)['instanceType'], "mem1_ssd1_x2")
Beispiel #44
0
def build(project, folder, version_id, top_dir):
    sbt_assembly(top_dir)
    asset = find_asset(project, folder)
    if asset is None:
        make_prerequisits(project, folder, version_id, top_dir)
        asset = find_asset(project, folder)
    region = dxpy.describe(project.get_id())['region']
    return AssetDesc(region, asset.get_id(), project)
Beispiel #45
0
def process(reads_file, reference_tar, bwa_aln_params, bwa_version, debug):
    # reads_file, reference_tar should be links to file objects.
    # reference_tar should be a tar of files generated by bwa index and
    # the tar should be uncompressed to avoid repeating the decompression.

    if debug:
        logger.setLevel(logging.DEBUG)
    else:
        logger.setLevel(logging.INFO)

    bwa = BWA_PATH.get(bwa_version)
    assert bwa, "BWA version %s is not supported" % (bwa_version)
    logger.info("In process with bwa %s" % (bwa))

    # Generate filename strings and download the files to the local filesystem
    reads_filename = dxpy.describe(reads_file)['name']
    reads_file = dxpy.download_dxfile(reads_file, reads_filename)
    reads_basename = strip_extensions(reads_filename, STRIP_EXTENSIONS)

    reference_tar_filename = dxpy.describe(reference_tar)['name']
    dxpy.download_dxfile(reference_tar, reference_tar_filename)
    reference_dirname = 'reference_files'
    reference_filename = \
        resolve_reference(reference_tar_filename, reference_dirname)
    logger.info("Using reference file: %s" % (reference_filename))

    print(subprocess.check_output('ls -l', shell=True))

    # generate the suffix array index file
    sai_filename = '%s.sai' % (reads_basename)
    with open(sai_filename, 'w') as sai_file:
        # Build the bwa command and call bwa
        bwa_command = "%s aln %s -t %d %s %s" \
            % (bwa, bwa_aln_params, cpu_count(),
               reference_filename, reads_filename)
        logger.info("Running bwa with %s" % (bwa_command))
        subprocess.check_call(shlex.split(bwa_command), stdout=sai_file)

    print(subprocess.check_output('ls -l', shell=True))

    # Upload the output to the DNAnexus project
    logger.info("Uploading suffix array %s" % (sai_filename))
    sai_dxfile = dxpy.upload_local_file(sai_filename)
    output = {"suffix_array_index": dxpy.dxlink(sai_dxfile)}
    logger.info("Returning from process with %s" % (output))
    return output
def process(reads_file, reference_tar, bwa_aln_params, bwa_version, debug):
    # reads_file, reference_tar should be links to file objects.
    # reference_tar should be a tar of files generated by bwa index and
    # the tar should be uncompressed to avoid repeating the decompression.

    if debug:
        logger.setLevel(logging.DEBUG)
    else:
        logger.setLevel(logging.INFO)

    bwa = BWA_PATH.get(bwa_version)
    assert bwa, "BWA version %s is not supported" % (bwa_version)
    logger.info("In process with bwa %s" % (bwa))

    # Generate filename strings and download the files to the local filesystem
    reads_filename = dxpy.describe(reads_file)['name']
    reads_file = dxpy.download_dxfile(reads_file, reads_filename)
    reads_basename = strip_extensions(reads_filename, STRIP_EXTENSIONS)

    reference_tar_filename = dxpy.describe(reference_tar)['name']
    dxpy.download_dxfile(reference_tar, reference_tar_filename)
    reference_dirname = 'reference_files'
    reference_filename = \
        resolve_reference(reference_tar_filename, reference_dirname)
    logger.info("Using reference file: %s" % (reference_filename))

    print(subprocess.check_output('ls -l', shell=True))

    # generate the suffix array index file
    sai_filename = '%s.sai' % (reads_basename)
    with open(sai_filename, 'w') as sai_file:
        # Build the bwa command and call bwa
        bwa_command = "%s aln %s -t %d %s %s" \
            % (bwa, bwa_aln_params, cpu_count(),
               reference_filename, reads_filename)
        logger.info("Running bwa with %s" % (bwa_command))
        subprocess.check_call(shlex.split(bwa_command), stdout=sai_file)

    print(subprocess.check_output('ls -l', shell=True))

    # Upload the output to the DNAnexus project
    logger.info("Uploading suffix array %s" % (sai_filename))
    sai_dxfile = dxpy.upload_local_file(sai_filename)
    output = {"suffix_array_index": dxpy.dxlink(sai_dxfile)}
    logger.info("Returning from process with %s" % (output))
    return output
Beispiel #47
0
    def closed(self, code=None, reason=None):
        if code:
            self.closed_code = code
            self.closed_reason = reason
        elif not self.error:
            self.closed_code = 1000
            self.closed_reason = "Normal"
        elif self.exception and type(self.exception) in {KeyboardInterrupt, SystemExit}:
            self.closed_code = 1000
            self.closed_reason = "Connection terminated by client"
        else:
            self.closed_code = 1006
            self.closed_reason = str(self.exception) if self.exception else "Abnormal"

        if self.closed_code != 1000:
            try:
                error = json.loads(self.closed_reason)
                raise DXJobLogStreamingException(
                    "Error while streaming job logs: {type}: {message}\n".format(
                        **error
                    )
                )
            except (KeyError, ValueError):
                raise DXJobLogStreamingException(
                    "Error while streaming job logs: {code}: {reason}\n".format(
                        code=self.closed_code, reason=self.closed_reason
                    )
                )
        elif self.print_job_info:
            if self.job_id not in self.seen_jobs:
                self.seen_jobs[self.job_id] = {}
            for job_id in self.seen_jobs.keys():
                self.seen_jobs[job_id] = dxpy.describe(job_id)
                print(
                    get_find_executions_string(
                        self.seen_jobs[job_id],
                        has_children=False,
                        show_outputs=True
                    )
                )
        else:
            self.seen_jobs[self.job_id] = dxpy.describe(self.job_id)

        if self.seen_jobs[self.job_id].get('state') in {'failed', 'terminated'}:
            err_exit(code=3)
Beispiel #48
0
def app_2_upversion(app_id):
    try:
        upversion = dxpy.describe(app_id)['details']['upstreamVersion']
    except KeyError:
        if app_id in app_id_hardcode_up_version:
            return app_id_hardcode_up_version[app_id]
        else:
            upversion = 'NA'
    return upversion.strip()
Beispiel #49
0
def description_from_fid(fid,properties=False):
    '''Returns file description object from fid.'''
    try:
        dxlink = FILES[fid]
    except:
        #print >> sys.stderr, "File %s not cached, trying id" % fid)
        dxlink = fid

    return dxpy.describe(dxlink,incl_properties=properties)
def get_notebook_app_versions():
    """
    Get the valid version numbers of the notebook app.
    """
    notebook_apps = dxpy.find_apps(name=NOTEBOOK_APP, all_versions=True)
    versions = [
        str(dxpy.describe(app['id'])['version']) for app in notebook_apps
    ]
    return versions
Beispiel #51
0
def _get_sequence_stream(dxf):
    """From the given dxfile, create a command to stream the contents
    to stdout and  gunzip it if needed."""
    fn = dxpy.describe(dxf)['name']
    cmd = 'dx cat {0} '.format(dxf['$dnanexus_link'])
    if os.path.splitext(fn)[-1] == '.gz':
        cmd += '| gunzip '

    return cmd
Beispiel #52
0
def description_from_fid(fid, properties=False):
    '''Returns file description object from fid.'''
    try:
        dxlink = FILES[fid]
    except:
        #print >> sys.stderr, "File %s not cached, trying id" % fid)
        dxlink = fid

    return dxpy.describe(dxlink, incl_properties=properties)
def get_control_mapping_stages(peaks_analysis, experiment, keypair, server, reps=[1,2]):
	#Find the control inputs
	logger.debug('in get_control_mapping_stages with peaks_analysis %s; experiment %s; reps %s' %(peaks_analysis['id'], experiment['accession'], reps))
	peaks_stages = peaks_analysis.get('stages')
	peaks_stage = next(stage for stage in peaks_stages if stage['execution']['name'] == "ENCODE Peaks")
	tas = [dxpy.describe(peaks_stage['execution']['input']['ctl%s_ta' %(n)]) for n in reps]
	mapping_jobs = [dxpy.describe(ta['createdBy']['job']) for ta in tas]
	mapping_analyses = [dxpy.describe(mapping_job['analysis']) for mapping_job in mapping_jobs]

	mapping_stages = []
	for (i,repn) in enumerate(reps):
		mapping_stage = get_mapping_stages(mapping_analyses[i], keypair, server, repn)
		if not mapping_stage:
			logger.error('%s: failed to find mapping stages for rep%d' %(peaks_analysis['id'], repn))
			return None
		else:
			mapping_stages.append(mapping_stage)

	return mapping_stages
Beispiel #54
0
def file_path_from_fid(fid,projectToo=False):
    '''Returns full dx path to file from a file id.'''
    fileDict = description_from_fid(fid)
    if fileDict['folder'] == '/':
        path = '/' + fileDict['name']
    else:
        path = fileDict['folder'] + '/' + fileDict['name']
    if projectToo:
        projDict = dxpy.describe(fileDict['project'])
        path = projDict['name'] + ':' + path
    return path
def poll_for_server_running(job_id):
    """
    Poll for the job to start running and post the SERVER_READY_TAG.
    """
    sys.stdout.write('Waiting for server in {0} to initialize ...'.format(job_id))
    sys.stdout.flush()
    desc = dxpy.describe(job_id)
    # Keep checking until the server has begun or it has failed.
    while(SERVER_READY_TAG not in desc['tags'] and desc['state'] != 'failed'):
        time.sleep(SLEEP_PERIOD)
        sys.stdout.write('.')
        sys.stdout.flush()
        desc = dxpy.describe(job_id)

    # If the server job failed, provide friendly advice.
    if desc['state'] == 'failed':
        msg = RED('Error:') + ' Server failed to run.\n'
        msg += 'You may want to check the job logs by running:'
        msg += BOLD('dx watch {0}'.format(job_id))
        err_exit(msg)
def get_input_spec_patterns():
    ''' Extract the inputSpec patterns, if they exist -- modifed from dx-upload-all-outputs

    Returns a dict of all patterns, with keys equal to the respective
    input parameter names.
    '''
    input_spec = None
    if 'DX_JOB_ID' in environ:
        # works in the cloud, not locally
        job_desc = dxpy.describe(dxpy.JOB_ID)
        if job_desc["function"] == "main":
            # The input spec does not apply for subjobs
            desc = dxpy.describe(job_desc.get("app", job_desc.get("applet")))
            if "inputSpec" in desc:
                input_spec = desc["inputSpec"]
    elif 'DX_TEST_DXAPP_JSON' in environ:
        # works only locally
        path_to_dxapp_json = environ['DX_TEST_DXAPP_JSON']
        with open(path_to_dxapp_json) as fd:
            dxapp_json = json.load(fd)
            input_spec = dxapp_json.get('inputSpec')

    # convert to a dictionary. Each entry in the input spec
    # has {name, class} attributes.
    if input_spec is None:
        return {}

    # For each field name, return its patterns.
    # Make sure a pattern is legal, ignore illegal patterns.
    def is_legal_pattern(pattern):
        return "*" in pattern
    patterns_dict = {}
    for spec in input_spec:
        name = spec['name']
        if 'patterns' in spec:
            patterns_dict[name] = []
            for p in spec['patterns']:
                if is_legal_pattern(p):
                    patterns_dict[name].append(p)
    return patterns_dict
Beispiel #57
0
    def test_dx_object_tagging(self):
        the_tags = [u"Σ1=n", u"helloo0", u"ωω"]
        # tag
        record_id = run(u"dx new record Ψ --brief").strip()
        run(u"dx tag Ψ " + u" ".join(the_tags))
        mytags = dxpy.describe(record_id)['tags']
        for tag in the_tags:
            self.assertIn(tag, mytags)
        # untag
        run(u"dx untag Ψ " + u" ".join(the_tags[:2]))
        mytags = dxpy.describe(record_id)['tags']
        for tag in the_tags[:2]:
            self.assertNotIn(tag, mytags)
        self.assertIn(the_tags[2], mytags)

        # -a flag
        second_record_id = run(u"dx new record Ψ --brief").strip()
        self.assertNotEqual(record_id, second_record_id)
        run(u"dx tag -a Ψ " + u" ".join(the_tags))
        mytags = dxpy.describe(record_id)['tags']
        for tag in the_tags:
            self.assertIn(tag, mytags)
        second_tags = dxpy.describe(second_record_id)['tags']
        for tag in the_tags:
            self.assertIn(tag, second_tags)

        run(u"dx untag -a Ψ " + u" ".join(the_tags))
        mytags = dxpy.describe(record_id)['tags']
        self.assertEqual(len(mytags), 0)
        second_tags = dxpy.describe(second_record_id)['tags']
        self.assertEqual(len(second_tags), 0)
Beispiel #58
0
def DownloadFilesFromArray(input_ids):
    print "\nDownloading {n} files".format(n=len(input_ids))
    if len(input_ids) < 1:
        raise dxpy.AppInternalError("No files were given as input")
    filenames = []
    
    start_time = time.time()
    for id in input_ids:
        fn = dxpy.describe(id)["name"]
        filenames.append(fn)
        dxpy.download_dxfile(dxid=id, filename=fn)
    print "Downloaded {files} in {min} minutes".format(files=sorted(filenames), min=float((time.time()-start_time)/60))
    return sorted(filenames)