def download_project(self, download_dir):
     msg = "Downloading DNAnexus project {proj_name} ({proj_id}).".format(
         proj_name=self.dx_project_name, proj_id=self.dx_project_id)
     log_success_and_debug(msg)
     if not os.path.isdir(download_dir):
         os.makedirs(download_dir)
     download_dir = os.path.join(download_dir, self.dx_project_name)
     if not os.path.isdir(download_dir):
         os.mkdir(download_dir)
     #download the FASTQC files
     fastqc_dir = os.path.join(download_dir, "FASTQC")
     self.download_fastqc_reports(download_dir=fastqc_dir)
     #download the in-house QC report
     self.download_qc_report(download_dir=download_dir)
     #download the SampleSheet used in demultiplexing
     self.download_samplesheet(download_dir=download_dir)
     #download the run_details.json
     self.download_run_details_json(download_dir=download_dir)
     #download the barcodes.json
     self.download_barcodes_json(download_dir=download_dir)
     #download the ${run_name}.metadata.tar file.
     self.download_metadata_tar(download_dir=download_dir)
     #download the FASTQ files into a FASTQ folder
     log_success_and_debug("Downloading the FASTQ files:")
     fastq_dir = os.path.join(download_dir, "FASTQ")
     dxpy.download_folder(project=self.dx_project_id,
                          destdir=fastq_dir,
                          folder=self.DX_FASTQ_FOLDER,
                          overwrite=False)
     #rename the downloaded folder to ${download_dir}/FASTQ
     open(os.path.join(download_dir, "COPY_COMPLETE.txt"), "w").close()
Beispiel #2
0
def main(**kwargs):

    dxpy.download_folder(DCC_CREDENTIALS_PROJECT,
                         '.',
                         folder=DCC_CREDENTIALS_FOLDER)
    if 'key' in kwargs:
        key = '-'.join([dxpy.api.system_whoami()['id'], kwargs.pop('key')])
    else:
        key = dxpy.api.system_whoami()['id']
    key_tuple = common.processkey(key, KEYFILE)
    if not key_tuple:
        logger.error("Key %s is not found in the keyfile %s" % (key, KEYFILE))
        raise PortalCredentialsError("Supply a valid keypair ID")
    authid, authpw, server = key_tuple
    if 'url' in kwargs:
        server = kwargs.pop('url')
    keypair = (authid, authpw)

    tokens = ['python3 checkfiles.py']
    for k, v in kwargs.iteritems():
        if isinstance(v, bool):
            if v:
                tokens.append("--" + k.replace('_', '-'))
            continue
        if isinstance(v, str) or isinstance(v, unicode) or isinstance(v, int):
            tokens.append(' '.join(["--" + k.replace('_', '-'), str(v)]))

    if 'dx_file' in kwargs:
        dxfile = dxpy.DXFile(kwargs.get('dx_file'))
        local_file = dxpy.download_dxfile(dxfile, dxfile.name)
        tokens.append("--local-file %s" % (dxfile.name))

    # this is just to get a command string to print that has no secrets
    tokens_safe = deepcopy(tokens)
    tokens_safe.append("--username %s --password %s" %
                       ("." * len(authid), "." * len(authpw)))
    tokens_safe.append(server)
    logger.info(' '.join(tokens_safe))

    tokens.append("--username %s --password %s" % (authid, authpw))
    # this needs to be the last token
    tokens.append(server)

    checkfiles_command = ' '.join(tokens)
    subprocess.check_call(shlex.split(checkfiles_command))

    output = {}
    outfilename = kwargs.get('out')
    errfilename = kwargs.get('err')
    if outfilename:
        out = dxpy.upload_local_file(outfilename)
        output.update({'out': dxpy.dxlink(out)})
    if errfilename:
        err = dxpy.upload_local_file(errfilename)
        output.update({'err': dxpy.dxlink(err)})

    return output
Beispiel #3
0
def _download_folders(folders, destdir, args):
    for project in folders:
        for folder, strip_prefix in folders[project]:
            if not args.recursive:
                err_exit('Error: "' + folder + '" is a folder but the -r/--recursive option was not given')
            assert(folder.startswith(strip_prefix))
            folder_destdir = os.path.join(destdir, folder[len(strip_prefix):].lstrip('/'))
            try:
                dxpy.download_folder(project, folder_destdir, folder=folder)
            except:
                err_exit()
Beispiel #4
0
    def download(self, dest, **kwargs):
        """Download a directory.

        Args:
            dest (Path): The output directory

        Raises:
            NotFoundError: When source or dest path is not a directory
        """
        dxpy.download_folder(project=self.canonical_project,
                             destdir=dest,
                             folder='/' + (self.resource or ''))
 def download_fastqc_reports(self, download_dir):
     """
     Downloads the QC report from the DNAnexus sequencing results project.
 
     Args: 
         download_dir: `str` - The local directory path to download the QC report to.
 
     Returns: 
         `str`. The filepath to the downloaded FASTQC reports folder.
     """
     if not os.path.isdir(download_dir):
         os.makedirs(download_dir)
     msg = "the FASTQC reports to {download_dir}.".format(
         download_dir=download_dir)
     debug_logger.debug("Downloading " + msg)
     dxpy.download_folder(project=self.dx_project_id,
                          destdir=download_dir,
                          folder=self.DX_FASTQC_FOLDER,
                          overwrite=True)
     success_logger.info("Downloaded " + msg)
     #rename the downloaded folder to ${download_dir}/FASTQC
     return download_dir
def main(reads1, reads2, crop_length, reference_tar,
         bwa_aln_params, bwa_version, samtools_version,
         keyfile, debug, key=None):

    # reads1 and reads2 are expected to be an arrays of file identifiers
    # indentifiers can be DNAnexus files or ENCODE file accession numbers
    # For SE, reads2 is empty
    # For PE, len(reads1) = len(reads2)
    # Multiple PE pairs or SE files are just catted before mapping
    # Error on mixed SE/PE - although this can be implemented as just a
    # "" entry at that position in reads2 array
    # TODO: Add option to down-sample mixed PE/SE to SE

    if debug:
        logger.setLevel(logging.DEBUG)
    else:
        logger.setLevel(logging.INFO)

    # fetch the credentials from the DCC Credentials project
    dxpy.download_folder(
        DCC_CREDENTIALS_PROJECT, '.', folder=DCC_CREDENTIALS_FOLDER)

    if not key or key in ['www', 'submit', 'production']:
        key = dxpy.api.system_whoami()['id']
    elif key == 'test':
        key = dxpy.api.system_whoami()['id'] + "-test"

    key_tuple = common.processkey(key, keyfile)
    assert key_tuple, "ERROR: Key %s is not found in the keyfile %s" % (key, keyfile)
    authid, authpw, server = key_tuple
    keypair = (authid, authpw)

    logger.info("reads1: %s" % (reads1))
    logger.info("reads2: %s" % (reads2))

    if reads2:
        paired_end = True
        assert len(reads1) == len(reads2), "Paired-end and unequal numbers of read1 and read2 identifiers: %s %s" % (reads1, reads2)
    else:
        paired_end = False

    reads1_files = [resolve_file(read, server, keypair) for read in reads1]

    if paired_end:
        reads2_files = [resolve_file(read, server, keypair) for read in reads2]
    else:
        reads2_files = []

    # pooling multiple fastqs
    if len(reads1_files) > 1:
        reads1_file = pooled(reads1_files)
    else:
        reads1_file = reads1_files[0]

    if len(reads2_files) > 1:
        reads2_file = pooled(reads2_files)
    elif len(reads2_files) == 1:
        reads2_file = reads2_files[0]
    else:
        reads2_file = None

    reference_tar_file = resolve_file(reference_tar, server, keypair)

    logger.info('Resolved reads1 to %s', reads1_file)
    if reads2_file:
        logger.info('Resolved reads2 to %s', reads2_file)
    logger.info('Resolved reference_tar to %s', reference_tar_file)

    output = {
        "reads1": reads1_file,
        "reference_tar": reference_tar_file,
        "crop_length": crop_length,
        "bwa_aln_params": bwa_aln_params,
        "bwa_version": bwa_version,
        "samtools_version": samtools_version,
        "debug": debug
    }
    if reads2_file:
        output.update({"reads2": reads2_file})

    logger.info('Exiting with output: %s' % (output))

    return output
def main(reads1,
         reads2,
         crop_length,
         reference_tar,
         bwa_aln_params,
         bwa_version,
         samtools_version,
         keyfile,
         debug,
         key=None):

    # reads1 and reads2 are expected to be an arrays of file identifiers
    # indentifiers can be DNAnexus files or ENCODE file accession numbers
    # For SE, reads2 is empty
    # For PE, len(reads1) = len(reads2)
    # Multiple PE pairs or SE files are just catted before mapping
    # Error on mixed SE/PE - although this can be implemented as just a
    # "" entry at that position in reads2 array
    # TODO: Add option to down-sample mixed PE/SE to SE

    if debug:
        logger.setLevel(logging.DEBUG)
    else:
        logger.setLevel(logging.INFO)

    # fetch the credentials from the DCC Credentials project
    dxpy.download_folder(DCC_CREDENTIALS_PROJECT,
                         '.',
                         folder=DCC_CREDENTIALS_FOLDER)

    if not key or key in ['www', 'submit', 'production']:
        key = dxpy.api.system_whoami()['id']
    elif key == 'test':
        key = dxpy.api.system_whoami()['id'] + "-test"

    key_tuple = common.processkey(key, keyfile)
    assert key_tuple, "ERROR: Key %s is not found in the keyfile %s" % (
        key, keyfile)
    authid, authpw, server = key_tuple
    keypair = (authid, authpw)

    logger.info("reads1: %s" % (reads1))
    logger.info("reads2: %s" % (reads2))

    if reads2:
        paired_end = True
        assert len(reads1) == len(
            reads2
        ), "Paired-end and unequal numbers of read1 and read2 identifiers: %s %s" % (
            reads1, reads2)
    else:
        paired_end = False

    reads1_files = [resolve_file(read, server, keypair) for read in reads1]

    if paired_end:
        reads2_files = [resolve_file(read, server, keypair) for read in reads2]
    else:
        reads2_files = []

    # pooling multiple fastqs
    if len(reads1_files) > 1:
        reads1_file = pooled(reads1_files)
    else:
        reads1_file = reads1_files[0]

    if len(reads2_files) > 1:
        reads2_file = pooled(reads2_files)
    elif len(reads2_files) == 1:
        reads2_file = reads2_files[0]
    else:
        reads2_file = None

    reference_tar_file = resolve_file(reference_tar, server, keypair)

    logger.info('Resolved reads1 to %s', reads1_file)
    if reads2_file:
        logger.info('Resolved reads2 to %s', reads2_file)
    logger.info('Resolved reference_tar to %s', reference_tar_file)

    output = {
        "reads1": reads1_file,
        "reference_tar": reference_tar_file,
        "crop_length": crop_length,
        "bwa_aln_params": bwa_aln_params,
        "bwa_version": bwa_version,
        "samtools_version": samtools_version,
        "debug": debug
    }
    if reads2_file:
        output.update({"reads2": reads2_file})

    logger.info('Exiting with output: %s' % (output))

    return output