def get_pipeline_inputs(job, input_flag, input_file, encryption_key=None, per_file_encryption=False, gdc_download_token=None): """ Get the input file from s3 or disk and write to file store. :param str input_flag: The name of the flag :param str input_file: The value passed in the config file :param str encryption_key: Path to the encryption key if encrypted with sse-c :param bool per_file_encryption: If encrypted, was the file encrypted using the per-file method? :param str gdc_download_token: The download token to obtain files from the GDC :return: fsID for the file :rtype: toil.fileStore.FileID """ work_dir = os.getcwd() job.fileStore.logToMaster('Obtaining file (%s) to the file job store' % input_flag) if input_file.startswith(('http', 'https', 'ftp')): input_file = get_file_from_url(job, input_file, encryption_key=encryption_key, per_file_encryption=per_file_encryption, write_to_jobstore=True) elif input_file.startswith(('S3', 's3')): input_file = get_file_from_s3(job, input_file, encryption_key=encryption_key, per_file_encryption=per_file_encryption, write_to_jobstore=True) elif input_file.startswith(('GDC', 'gdc')): input_file = get_file_from_gdc(job, input_file, gdc_download_token=gdc_download_token, write_to_jobstore=True) else: assert os.path.exists(input_file), 'Bogus Input : ' + input_file input_file = job.fileStore.writeGlobalFile(input_file) return input_file
def _get_MHC_file(job): """ Get the MHC file and write to jobstore :return: FSID for the MHC file """ mhc_file = get_file_from_s3( job, 'S3://cgl-protect-data/hg19_references/mhc_pathway_genes.json.tar.gz', write_to_jobstore=False) return {'genes_file': job.fileStore.writeGlobalFile(mhc_file)}
def _get_test_rsem_file(job, test_src_folder): """ Get the test rsem file and write to jobstore :return: FSID for the rsem file """ rsem_file = get_file_from_s3(job, 'S3://cgl-pipeline-inputs/protect/unit_results/expression/' 'rsem.genes.results', write_to_jobstore=False) return job.fileStore.writeGlobalFile(rsem_file)
def _get_MHC_file(job): """ Get the MHC file and write to jobstore :return: FSID for the MHC file """ mhc_file = get_file_from_s3(job, 'S3://cgl-protect-data/hg19_references/mhc_pathway_genes.json.tar.gz', write_to_jobstore=False) return { 'genes_file': job.fileStore.writeGlobalFile(mhc_file)}
def _download_files(job): """ Attempts to download an unencrypted file, a file encrypted with a key, and a file encrypted with a hash of a master key. """ keyfile = os.path.abspath('test.key') with open(keyfile, 'w') as k_f: k_f.write('protectwillhelpwithimmunotherapy') http_base = 'https://s3-us-west-2.amazonaws.com/pimmuno-test-data/CI_test_input/' s3_base = 'S3://pimmuno-test-data/CI_test_input/' unencrypted = 'unencrypted.file' encrypted_with_key = 'encrypted_with_test_key.file' encrypted_with_hash = 'encrypted_with_key_hash.file' # Download with https schema get_file_from_s3(job, http_base + unencrypted, write_to_jobstore=False) get_file_from_s3(job, http_base + encrypted_with_key, encryption_key=keyfile, per_file_encryption=False, write_to_jobstore=False) get_file_from_s3(job, http_base + encrypted_with_hash, encryption_key=keyfile, write_to_jobstore=False) # Download with S3 schema get_file_from_s3(job, s3_base + unencrypted, write_to_jobstore=False) # Test wrong schema try: get_file_from_s3(job, 's' + s3_base + encrypted_with_hash, encryption_key=keyfile, write_to_jobstore=False) except RuntimeError as err: if 'Unexpected url scheme' not in err.message: raise # Test downloading encrypted file without key try: get_file_from_s3(job, s3_base + encrypted_with_hash, write_to_jobstore=False) except RuntimeError as err: if '400' not in err.message: raise # Test downloading file encrypted with hash using the master (this emulates downloading # file with the wrong key) try: get_file_from_s3(job, s3_base + encrypted_with_hash, encryption_key=keyfile, per_file_encryption=False, write_to_jobstore=False) except RuntimeError as err: if '403' not in err.message: raise # Test downloading unencrypted with a key try: get_file_from_s3(job, s3_base + unencrypted, encryption_key=keyfile, per_file_encryption=False, write_to_jobstore=False) except RuntimeError as err: if '400' not in err.message: raise # Test downloading non-existent file try: get_file_from_s3(job, s3_base + unencrypted + 'xx', write_to_jobstore=False) except RuntimeError as err: if 'exist on s3?' not in err.message: raise