def download_project(self, download_dir): msg = "Downloading DNAnexus project {proj_name} ({proj_id}).".format( proj_name=self.dx_project_name, proj_id=self.dx_project_id) log_success_and_debug(msg) if not os.path.isdir(download_dir): os.makedirs(download_dir) download_dir = os.path.join(download_dir, self.dx_project_name) if not os.path.isdir(download_dir): os.mkdir(download_dir) #download the FASTQC files fastqc_dir = os.path.join(download_dir, "FASTQC") self.download_fastqc_reports(download_dir=fastqc_dir) #download the in-house QC report self.download_qc_report(download_dir=download_dir) #download the SampleSheet used in demultiplexing self.download_samplesheet(download_dir=download_dir) #download the run_details.json self.download_run_details_json(download_dir=download_dir) #download the barcodes.json self.download_barcodes_json(download_dir=download_dir) #download the ${run_name}.metadata.tar file. self.download_metadata_tar(download_dir=download_dir) #download the FASTQ files into a FASTQ folder log_success_and_debug("Downloading the FASTQ files:") fastq_dir = os.path.join(download_dir, "FASTQ") dxpy.download_folder(project=self.dx_project_id, destdir=fastq_dir, folder=self.DX_FASTQ_FOLDER, overwrite=False) #rename the downloaded folder to ${download_dir}/FASTQ open(os.path.join(download_dir, "COPY_COMPLETE.txt"), "w").close()
def main(**kwargs): dxpy.download_folder(DCC_CREDENTIALS_PROJECT, '.', folder=DCC_CREDENTIALS_FOLDER) if 'key' in kwargs: key = '-'.join([dxpy.api.system_whoami()['id'], kwargs.pop('key')]) else: key = dxpy.api.system_whoami()['id'] key_tuple = common.processkey(key, KEYFILE) if not key_tuple: logger.error("Key %s is not found in the keyfile %s" % (key, KEYFILE)) raise PortalCredentialsError("Supply a valid keypair ID") authid, authpw, server = key_tuple if 'url' in kwargs: server = kwargs.pop('url') keypair = (authid, authpw) tokens = ['python3 checkfiles.py'] for k, v in kwargs.iteritems(): if isinstance(v, bool): if v: tokens.append("--" + k.replace('_', '-')) continue if isinstance(v, str) or isinstance(v, unicode) or isinstance(v, int): tokens.append(' '.join(["--" + k.replace('_', '-'), str(v)])) if 'dx_file' in kwargs: dxfile = dxpy.DXFile(kwargs.get('dx_file')) local_file = dxpy.download_dxfile(dxfile, dxfile.name) tokens.append("--local-file %s" % (dxfile.name)) # this is just to get a command string to print that has no secrets tokens_safe = deepcopy(tokens) tokens_safe.append("--username %s --password %s" % ("." * len(authid), "." * len(authpw))) tokens_safe.append(server) logger.info(' '.join(tokens_safe)) tokens.append("--username %s --password %s" % (authid, authpw)) # this needs to be the last token tokens.append(server) checkfiles_command = ' '.join(tokens) subprocess.check_call(shlex.split(checkfiles_command)) output = {} outfilename = kwargs.get('out') errfilename = kwargs.get('err') if outfilename: out = dxpy.upload_local_file(outfilename) output.update({'out': dxpy.dxlink(out)}) if errfilename: err = dxpy.upload_local_file(errfilename) output.update({'err': dxpy.dxlink(err)}) return output
def _download_folders(folders, destdir, args): for project in folders: for folder, strip_prefix in folders[project]: if not args.recursive: err_exit('Error: "' + folder + '" is a folder but the -r/--recursive option was not given') assert(folder.startswith(strip_prefix)) folder_destdir = os.path.join(destdir, folder[len(strip_prefix):].lstrip('/')) try: dxpy.download_folder(project, folder_destdir, folder=folder) except: err_exit()
def download(self, dest, **kwargs): """Download a directory. Args: dest (Path): The output directory Raises: NotFoundError: When source or dest path is not a directory """ dxpy.download_folder(project=self.canonical_project, destdir=dest, folder='/' + (self.resource or ''))
def download_fastqc_reports(self, download_dir): """ Downloads the QC report from the DNAnexus sequencing results project. Args: download_dir: `str` - The local directory path to download the QC report to. Returns: `str`. The filepath to the downloaded FASTQC reports folder. """ if not os.path.isdir(download_dir): os.makedirs(download_dir) msg = "the FASTQC reports to {download_dir}.".format( download_dir=download_dir) debug_logger.debug("Downloading " + msg) dxpy.download_folder(project=self.dx_project_id, destdir=download_dir, folder=self.DX_FASTQC_FOLDER, overwrite=True) success_logger.info("Downloaded " + msg) #rename the downloaded folder to ${download_dir}/FASTQC return download_dir
def main(reads1, reads2, crop_length, reference_tar, bwa_aln_params, bwa_version, samtools_version, keyfile, debug, key=None): # reads1 and reads2 are expected to be an arrays of file identifiers # indentifiers can be DNAnexus files or ENCODE file accession numbers # For SE, reads2 is empty # For PE, len(reads1) = len(reads2) # Multiple PE pairs or SE files are just catted before mapping # Error on mixed SE/PE - although this can be implemented as just a # "" entry at that position in reads2 array # TODO: Add option to down-sample mixed PE/SE to SE if debug: logger.setLevel(logging.DEBUG) else: logger.setLevel(logging.INFO) # fetch the credentials from the DCC Credentials project dxpy.download_folder( DCC_CREDENTIALS_PROJECT, '.', folder=DCC_CREDENTIALS_FOLDER) if not key or key in ['www', 'submit', 'production']: key = dxpy.api.system_whoami()['id'] elif key == 'test': key = dxpy.api.system_whoami()['id'] + "-test" key_tuple = common.processkey(key, keyfile) assert key_tuple, "ERROR: Key %s is not found in the keyfile %s" % (key, keyfile) authid, authpw, server = key_tuple keypair = (authid, authpw) logger.info("reads1: %s" % (reads1)) logger.info("reads2: %s" % (reads2)) if reads2: paired_end = True assert len(reads1) == len(reads2), "Paired-end and unequal numbers of read1 and read2 identifiers: %s %s" % (reads1, reads2) else: paired_end = False reads1_files = [resolve_file(read, server, keypair) for read in reads1] if paired_end: reads2_files = [resolve_file(read, server, keypair) for read in reads2] else: reads2_files = [] # pooling multiple fastqs if len(reads1_files) > 1: reads1_file = pooled(reads1_files) else: reads1_file = reads1_files[0] if len(reads2_files) > 1: reads2_file = pooled(reads2_files) elif len(reads2_files) == 1: reads2_file = reads2_files[0] else: reads2_file = None reference_tar_file = resolve_file(reference_tar, server, keypair) logger.info('Resolved reads1 to %s', reads1_file) if reads2_file: logger.info('Resolved reads2 to %s', reads2_file) logger.info('Resolved reference_tar to %s', reference_tar_file) output = { "reads1": reads1_file, "reference_tar": reference_tar_file, "crop_length": crop_length, "bwa_aln_params": bwa_aln_params, "bwa_version": bwa_version, "samtools_version": samtools_version, "debug": debug } if reads2_file: output.update({"reads2": reads2_file}) logger.info('Exiting with output: %s' % (output)) return output
def main(reads1, reads2, crop_length, reference_tar, bwa_aln_params, bwa_version, samtools_version, keyfile, debug, key=None): # reads1 and reads2 are expected to be an arrays of file identifiers # indentifiers can be DNAnexus files or ENCODE file accession numbers # For SE, reads2 is empty # For PE, len(reads1) = len(reads2) # Multiple PE pairs or SE files are just catted before mapping # Error on mixed SE/PE - although this can be implemented as just a # "" entry at that position in reads2 array # TODO: Add option to down-sample mixed PE/SE to SE if debug: logger.setLevel(logging.DEBUG) else: logger.setLevel(logging.INFO) # fetch the credentials from the DCC Credentials project dxpy.download_folder(DCC_CREDENTIALS_PROJECT, '.', folder=DCC_CREDENTIALS_FOLDER) if not key or key in ['www', 'submit', 'production']: key = dxpy.api.system_whoami()['id'] elif key == 'test': key = dxpy.api.system_whoami()['id'] + "-test" key_tuple = common.processkey(key, keyfile) assert key_tuple, "ERROR: Key %s is not found in the keyfile %s" % ( key, keyfile) authid, authpw, server = key_tuple keypair = (authid, authpw) logger.info("reads1: %s" % (reads1)) logger.info("reads2: %s" % (reads2)) if reads2: paired_end = True assert len(reads1) == len( reads2 ), "Paired-end and unequal numbers of read1 and read2 identifiers: %s %s" % ( reads1, reads2) else: paired_end = False reads1_files = [resolve_file(read, server, keypair) for read in reads1] if paired_end: reads2_files = [resolve_file(read, server, keypair) for read in reads2] else: reads2_files = [] # pooling multiple fastqs if len(reads1_files) > 1: reads1_file = pooled(reads1_files) else: reads1_file = reads1_files[0] if len(reads2_files) > 1: reads2_file = pooled(reads2_files) elif len(reads2_files) == 1: reads2_file = reads2_files[0] else: reads2_file = None reference_tar_file = resolve_file(reference_tar, server, keypair) logger.info('Resolved reads1 to %s', reads1_file) if reads2_file: logger.info('Resolved reads2 to %s', reads2_file) logger.info('Resolved reference_tar to %s', reference_tar_file) output = { "reads1": reads1_file, "reference_tar": reference_tar_file, "crop_length": crop_length, "bwa_aln_params": bwa_aln_params, "bwa_version": bwa_version, "samtools_version": samtools_version, "debug": debug } if reads2_file: output.update({"reads2": reads2_file}) logger.info('Exiting with output: %s' % (output)) return output