def main(workflow): args = workflow.parse_args() conf = parse_cfg_file(args.config_file, section='MBX') manifest = parse_cfg_file(args.manifest_file) data_files = manifest.get('submitted_files') project = manifest.get('project') creation_date = manifest.get('submission_date') dataset_cfg = manifest.get('config') if data_files and data_files.get('MBX'): input_files = data_files.get('MBX').get('input') sample_names = get_sample_names(input_files) project_dirs = create_project_dirs([conf.get('deposition_dir'), conf.get('processing_dir'), conf.get('public_dir')], project, creation_date, 'Metabolomics') (deposition_dir, processing_dir, public_dir) = project_dirs base_depo_dir = os.path.abspath(os.path.join(deposition_dir, '..')) manifest_file = stage_files(workflow, [args.manifest_file], base_depo_dir) deposited_files = stage_files(workflow, input_files, deposition_dir) # Our metabolite data are just a series of spreadsheets that we are # going to want to append some metadata too. If they are Excel files # we will want to process them and convert to CSV. processed_files = excel_to_csv(workflow, deposited_files, processing_dir) pcl_files = add_metadata_to_tsv(workflow, processed_files, args.metadata_file, 'metabolomics', conf.get('metadata_id_col'), metadata_rows=dataset_cfg.get('metadata_rows'), col_offset=dataset_cfg.get('col_offset'), target_cols=conf.get('target_metadata_cols', None)) public_files = stage_files(workflow, pcl_files, public_dir) workflow.go()
def main(workflow): args = workflow.parse_args() conf = parse_cfg_file(args.config_file, section='MVX') knead_human_genome_db = conf.get('databases').get('knead_dna') ## Parse the manifest file containing all data files from this submission manifest = parse_cfg_file(args.manifest_file) project = manifest.get('project') data_files = manifest.get('submitted_files') submission_date = manifest.get('submission_date') if data_files and data_files.get('MVX', {}).get('input'): input_files = data_files.get('MVX').get('input') input_file_ext = data_files.get('MVX').get('input_file_extension') pair_identifier = data_files.get('MVX').get('pair_identifier') project_dirs = create_project_dirs([ conf.get('deposition_dir'), conf.get('processing_dir'), conf.get('public_dir') ], project, submission_date, 'MVX') deposited_files = stage_files(workflow, input_files, project_dirs[0], symlink=True) mvx_qc_output = shotgun.quality_control( workflow, input_files, project_dirs[1], args.threads, [knead_human_genome_db], pair_identifier=pair_identifier, remove_intermediate_output=True) paired_fastq_files = deinterleave_fastq(workflow, input_files, project_dirs[1]) paired_fastq_tars = [] for (mate_1, mate_2) in zip(paired_fastq_files[0], paired_fastq_files[1]): sample_name = sample_names(mate_1, input_file_ext, pair_identifier=pair_identifier) tar_path = os.path.join(project_dirs[-1], "%s.tar" % sample_name) paired_fastq_tar = tar_files(workflow, [mate_1, mate_2], tar_path, depends=[mate_1, mate_2], compress=False) paired_fastq_tars.append(paired_fastq_tar) workflow.go()
def main(workflow): args = workflow.parse_args() conf = parse_cfg_file(args.config_file, section='HG') ## Parse the manifest file containing all data files from this submission manifest = parse_cfg_file(args.manifest_file) project = manifest.get('project') data_files = manifest.get('submitted_files') submission_date = manifest.get('submission_date') if data_files and data_files.get('HG', {}).get('input'): input_files = data_files.get('HG').get('input') project_dirs = create_project_dirs([ conf.get('deposition_dir'), conf.get('processing_dir'), conf.get('public_dir') ], project, submission_date, 'HG') deposited_files = stage_files(workflow, input_files, project_dirs[0], symlink=True) fastq_files = bam_to_fastq(workflow, input_files, project_dirs[1], paired_end=True, threads=args.threads, compress=False) paired_fastq_files = paired_files(fastq_files, '_R1') paired_fastq_tars = [] for (mate_1, mate_2) in zip(paired_fastq_files[0], paired_fastq_files[1]): sample_name = sample_names(mate_4, pair_identifier="_R1") tar_path = os.path.join(project_dirs[-1], "%s.tar" % sample_name) paired_fastq_tar = tar_files(workflow, [mate_1, mate_2], tar_path, depends=[mate_1, mate_2]) paired_fastq_tars.append(paired_fastq_tar) md5sum_files = generate_md5_checksums(workflow, paired_fastq_tars) workflow.go()
def main(workflow): args = workflow.parse_args() conf = parse_cfg_file(args.config_file, section='16S') manifest = parse_cfg_file(args.manifest_file) data_files = manifest.get('submitted_files') project = manifest.get('project') creation_date = manifest.get('submission_date') gg_tax = conf.get('databases').get('gg_taxonomy') gg_usearch = conf.get('databases').get('gg_usearch') gg_fasta = conf.get('databases').get('gg_fasta') if data_files and data_files.get('16S', {}).get('input'): input_files = data_files.get('16S').get('input') input_extension = data_files.get('16S').get('file_extension') barcode_file = data_files.get('16S').get('barcode_file') pair_identifier = data_files.get('16S').get('pair_identifier') index_identifier = data_files.get('16S').get('index_identifier') if index_identifier: index_files = [in_file for in_file in input_files if index_identifier in in_file] input_files = set(input_files) - set(index_files) project_dirs = create_project_dirs([conf.get('deposition_dir'), conf.get('processing_dir'), conf.get('public_dir')], project, creation_date, '16S') base_depo_dir = os.path.abspath(os.path.join(project_dirs[0], '..')) manifest_file = stage_files(workflow, [args.manifest_file], base_depo_dir) sequence_files = stage_files(workflow, input_files, project_dirs[0], symlink=True) # An entry point into this pipeline is any analysis conducted by Baylor/CMMR # which will require a slight branching of the pipeline utilized. # We are making a very fraught assumption here that if only one sequence file # is passed in alongside a centroid FASTA file we are dealing with any files # generated by CMMR otu_table = data_files.get('16S').get('otu_table') centroid_fasta = data_files.get('16S').get('centroid_fasta') if otu_table and centroid_fasta: if len(sequence_files) < 1: merged_fastq = sequence_files[0] fixed_otu_table = fix_CMMR_OTU_table_taxonomy_labels(workflow, otu_table, project_dirs[1]) else: if barcode_file: sequence_files = demultiplex(workflow, input_files, project_dirs[1], barcode_file, index_files, conf.get('min_pred_qc_score'), pair_identifier) merged_fastq = merge_samples_and_rename(workflow, sequence_files, input_extension, project_dirs[1], pair_identifier, args.threads) qc_fasta_outs = quality_control(workflow, merged_fastq, project_dirs[1], args.threads, conf.get('maxee'), conf.get('min_trunc_len_max')) if not otu_table: closed_ref_tsv = taxonomic_profile(workflow, qc_fasta_outs[0], qc_fasta_outs[1], qc_fasta_outs[2], project_dirs[1], args.threads, conf.get('percent_identity'), gg_usearch, gg_fasta, gg_tax, conf.get('min_size')) predict_metagenomes_tsv = functional_profile(workflow, closed_ref_tsv, project_dirs[1]) workflow.go()
def main(workflow): args = workflow.parse_args() conf = parse_cfg_file(args.config_file, section='proteomics') ## Parse the manifest file containing all data files from this submission manifest = parse_cfg_file(args.manifest_file) project = manifest.get('project') data_files = manifest.get('submitted_files') if data_files and data_files.get('proteomics'): (input_files, output_files) = data_files.get('proteomics').values() ## Step #1 - Verify MD5sums of all input data provided to IBDMDB ## ## Since our proteomics files will be coming from the PNNL our ## files won't be in the same location as the Broad files so ## we'll need to get MD5's manually supplied. validated_files = verify_files(workflow, input_files, args.checksums_file) ## Setup the directories where we will be depositing our files date_stamp = str(datetime.date.today()) base_deposition_dir = os.path.join(conf.get('deposition_dir'), project, date_stamp) deposition_dir = os.path.join(base_deposition_dir, 'proteomics') create_folders(deposition_dir) processing_dir = os.path.join(conf.get('processing_dir'), project, date_stamp, 'proteomics') create_folders(processing_dir) public_dir = os.path.join(conf.get('public_dir'), project, date_stamp, 'proteomics') create_folders(public_dir) ## Move the manifest file over so we have information about this ## batch of data in the deposition directory manifest_file = stage_files(workflow, [args.manifest_file], base_deposition_dir) ## Step 2 - Move files over to our deposition directory deposited_files = stage_files(workflow, validated_files, deposition_dir) ## Step #3 - Stage files to processing directory ## ## For the Proteomics data it is ok to symlink these files over from the ## data deposition folder because these files aren't actually processed ## but we need them to be in place here to show up on the website. files_to_process = stage_files(workflow, deposited_files, processing_dir, symlink=True) output_files = output_files if output_files else [] ## We have a dataset specific metadata file that we can incorporate ## into the analysis output. if output_files and args.data_specific_metadata: output_files = add_metadata_to_tsv( workflow, output_files, args.data_specific_metadata, conf.get('metadata_id_col'), conf.get('target_metadata_cols', [])) ## Step #4 - Stage output files to public folder public_files = stage_files(workflow, output_files, public_dir) ## TODO: We need to generate metadata files for the output files that ## are included with this dataset. Need to talk to George about ## getting the ID-mapped version of these files since they will be ## needed here. ## Step #5 - Make files web-visible by creating the complete.html file ## in each of our output directories. make_files_web_visible(workflow, [files_to_process, public_files]) ## Step #6 - Once all the files have been staged we can go ahead and ## delete the raw files from their original directory as well as the ## MANIFEST file. workflow.go()
def main(workflow): args = workflow.parse_args() conf_mtx = parse_cfg_file(args.config_file, section='MTX') conf_mgx = parse_cfg_file(args.config_file, section='MGX') manifest = parse_cfg_file(args.manifest_file) data_files = manifest.get('submitted_files') project = manifest.get('project') creation_date = manifest.get('submission_date') adapters_file = manifest.get('adapters_file') contaminate_db = conf_mtx.get('databases').get('knead_dna') mtx_db = conf_mtx.get('databases').get('knead_mtx') rrna_db = conf_mtx.get('databases').get('knead_rrna') adapter_sequences = conf_mtx.get('adapter_sequences') qc_threads = args.threads_kneaddata if args.threads_kneaddata else args.threads tax_threads = args.threads_metaphlan if args.threads_metaphlan else args.threads func_threads = args.threads_humann if args.threads_humann else args.threads if data_files and data_files.get('MTX', {}).get('input'): input_files_mtx = data_files.get('MTX').get('input') file_extension_mtx = data_files.get('MTX').get('input_extension', '.fastq') pair_identifier_mtx = data_files.get('MTX').get('pair_identifier') input_file_tags = data_files.get('MTX').get('tags') input_tax_profiles = [] project_dirs_mtx = create_project_dirs([conf_mtx.get('deposition_dir'), conf_mtx.get('processing_dir'), conf_mtx.get('public_dir')], project, creation_date, 'MTX') public_dir_mtx = project_dirs_mtx[-1] base_depo_dir = os.path.abspath(os.path.join(project_dirs_mtx[0], '..')) manifest_file = stage_files(workflow, [args.manifest_file], base_depo_dir) deposited_files_mtx = stage_files(workflow, input_files_mtx, project_dirs_mtx[0], symlink=True) if file_extension_mtx == ".bam": ## Need to sort our BAM files to be sure here... paired_end_seqs = bam_to_fastq(workflow, deposited_files_mtx, project_dirs_mtx[1], paired_end=True, compress=False, threads=args.threads) pair_identifier_mtx = "_R1" else: paired_end_seqs = deposited_files_mtx if adapters_file: adapter_trim_opts = (" --trimmomatic-options \"ILLUMINACLIP:%s:2:30:10:8:TRUE " "SLIDINGWINDOW:4:20 MINLEN:50\"" % adapters_file) (cleaned_fastqs_mtx, read_counts_mtx) = quality_control(workflow, paired_end_seqs, file_extension_mtx, project_dirs_mtx[1], qc_threads, databases=[contaminate_db, rrna_db, mtx_db], pair_identifier=pair_identifier_mtx, additional_options=adapter_trim_opts, remove_intermediate_output=True) sample_names_mtx = sample_names(cleaned_fastqs_mtx, file_extension_mtx) ########################################## # MGX FILE PROCESSING # ########################################## # Ideally we would be passed in a set of corresponding metagenome # sequence(s) to go with our metatranscriptomic files but we also # have two other scenarios: # # 1.) No accompanying metagenomic sequences exist; in this # case we will proceed just using the metatranscriptomic # data. # 2.) Taxonomic profiles are passed directly in in our MANIFEST # file; here we remove these from our input files and # prevent them from running through the kneaddata -> # metaphlan2 portions of our pipeline if data_files.get('MGX', {}).get('input'): input_files_mgx = data_files.get('MGX').get('input') file_extension_mgx = data_files.get('MGX').get('file_ext') pair_identifier_mgx = data_files.get('MGX').get('pair_identifier') input_tax_profiles = [in_file for in_file in input_files_mgx if 'taxonomic_profile.tsv' in in_file] input_files_mgx = set(input_files_mgx) - set(input_tax_profiles) if input_files_mgx: sample_names_mgx = sample_names(input_files_mgx, file_extension_mgx, file_extension_mgx) project_dirs_mgx = create_project_dirs([conf_mgx.get('deposition_dir'), conf_mgx.get('processing_dir'), conf_mgx.get('public_dir')], project, creation_date, 'WGS') public_dir_mgx = project_dirs_mgx[-1] deposited_files_mgx = stage_files(workflow, input_files_mgx, project_dirs_mgx[0], symlink=True) if file_extension_mgx == ".bam": ## Need to sort our BAM files to be sure here... paired_end_seqs = bam_to_fastq(workflow, deposited_files_mgx, project_dirs_mgx[1], paired_end=True, compress=False, threads=args.threads) pair_identifier_mgx = "_R1" else: paired_end_seqs_mgx = paired_files(deposited_files_mgx, pair_identifier_mgx) (cleaned_fastqs_mgx, read_counts_mgx) = quality_control(workflow, paired_end_seqs_mgx, project_dirs_mgx[1], qc_threads, [contaminate_db, rrna_db], remove_intermediate_output=True) tax_outs_mgx = taxonomic_profile(workflow, cleaned_fastqs_mgx, project_dirs_mgx[1], tax_threads, '*.fastq') func_outs_mgx = functional_profile(workflow, cleaned_fastqs_mgx, project_dirs_mgx[1], func_threads, tax_outs_mgx[1], remove_intermediate_output=True) input_tax_profiles.extend(tax_outs_mgx[1]) pub_wgs_raw_dir = os.path.join(public_dir_mgx, 'raw') pub_wgs_tax_profile_dir = os.path.join(public_dir_mgx, 'tax_profile') pub_wgs_func_profile_dir = os.path.join(public_dir_mgx, 'func_profile') map(create_folders, [pub_wgs_raw_dir, pub_wgs_tax_profile_dir, pub_wgs_func_profile_dir]) norm_genefamilies_mgx = name_files(sample_names, project_dirs_mgx[1], subfolder='genes', tag='genefamilies_relab', extension='tsv') norm_ecs_files_mgx = name_files(sample_names, project_dirs_mgx[1], subfolder='ecs', tag='genefamilies_ecs_relab', extension='tsv') norm_path_files_mgx = name_files(sample_names, project_dirs_mgx[1], subfolder='pathways', tag='pathabundance_relab', extension='tsv') pcl_files = add_metadata_to_tsv(workflow, [tax_outs_mgx[1]] + func_outs_mgx, 'metagenomics', conf_mgx.get('metadata_id_col'), conf_mgx.get('analysis_col_patterns'), conf_mgx.get('target_metadata_cols')) func_tar_files_wgs = [] for (sample, gene_file, ecs_file, path_file) in zip(sample_names_mgx, norm_genefamilies_mgx, norm_ecs_files_mgx, norm_path_files_mgx): tar_path = os.path.join(pub_wgs_func_profile_dir, "%s_humann2.tgz" % sample) func_tar_file = tar_files(workflow, [gene_file, ecs_file, path_file], tar_path, depends=func_outs_mgx) func_tar_files_wgs.append(func_tar_file) ########################################## # MTX FILE PROCESSING # ########################################## # Here we want to see if we can create a set of matching cleaned # MTX files to corresponding MGX taxonomic profiles. If these exist # we want to run functional profiling wit hthe corresponding MGX # taxonomic profile otherwise we will run a taxonomic profiling # on the MTX sequences and run functional profiling with the produced # taxonomic profile. func_outs_match_mtx = [] if input_tax_profiles: (matched_fqs, matched_tax_profiles) = match_tax_profiles(cleaned_fastqs_mtx, '.fastq', data_files.get('MTX').get('metadata_id_col', 'External ID'), input_tax_profiles, data_files.get('MGX').get('tax_profile_id', 'External ID'), args.metadata_file, tags=input_file_tags) func_outs_match_mtx = functional_profile(workflow, matched_fqs, project_dirs_mtx[1], func_threads, matched_tax_profiles, remove_intermediate_output=True) # Reset the remaining MTX files left over here so that we can run them through # the metaphlan2 -> humann2 pipeline. cleaned_fastqs_mtx = set(cleaned_fastqs_mtx) - set(matched_fqs) if cleaned_fastqs_mtx: tax_outs_mtx = taxonomic_profile(workflow, cleaned_fastqs_mtx, project_dirs_mtx[1], tax_threads, '*.fastq') func_outs_mtx = functional_profile(workflow, cleaned_fastqs_mtx, file_extension_mtx, project_dirs_mtx[1], func_threads, tax_outs_mtx[1], remove_intermediate_output=True) func_outs_mtx = list(func_outs_mtx).extend(func_outs_match_mtx) else: func_outs_mtx = func_outs_match_mtx # We'll need to generate DNA/RNA normalized files to be displayed # in our visualization output. (norm_gene_ratio, norm_ecs_ratio, norm_path_ratio) = norm_ratio(workflow, func_outs_mgx[0], func_outs_mgx[1], func_outs_mgx[2], func_outs_mtx[0], func_outs_mtx[1], func_outs_mtx[2], project_dirs_mtx[1]) pub_mtx_raw_dir = os.path.join(public_dir_mtx, 'raw') pub_mtx_tax_profile_dir = os.path.join(public_dir_mtx, 'tax_profile') pub_mtx_func_profile_dir = os.path.join(public_dir_mtx, 'func_profile') map(create_folders, [pub_mtx_raw_dir, pub_mtx_tax_profile_dir, pub_mtx_func_profile_dir]) norm_genefamilies_mtx = name_files(sample_names_mtx, project_dirs_mtx[1], subfolder='genes', tag='genefamilies_relab', extension='tsv') norm_ecs_files_mtx = name_files(sample_names_mtx, project_dirs_mtx[1], subfolder='ecs', tag='genefamilies_ecs_relab', extension='tsv') norm_path_files_mtx = name_files(sample_names_mtx, project_dirs_mtx[1], subfolder='pathways', tag='pathabundance_relab', extension='tsv') func_tar_files_mtx = [] for (sample, gene_file, ecs_file, path_file) in zip(sample_names_mtx, norm_genefamilies_mtx, norm_ecs_files_mtx, norm_path_files_mtx): tar_path = os.path.join(pub_mtx_func_profile_dir, "%s_humann2.tgz" % sample) func_tar_file = tar_files(workflow, [gene_file, ecs_file, path_file], tar_path, depends=func_outs_mtx) func_tar_files_mtx.append(func_tar_file) workflow.go()
def main(workflow): args = workflow.parse_args() conf = parse_cfg_file(args.config_file, section='MGX') manifest = parse_cfg_file(args.manifest_file) data_files = manifest.get('submitted_files') project = manifest.get('project') creation_date = manifest.get('submission_date') contaminate_db = conf.get('databases').get('knead_dna') if data_files and data_files.get('MGX'): input_files = data_files.get('MGX').get('input') pair_identifier = data_files.get('MGX').get('pair_identifier') file_extension = data_files.get('MGX', {}).get('input_extension', '.fastq') sample_names = get_sample_names(input_files, file_extension) project_dirs = create_project_dirs([ conf.get('deposition_dir'), conf.get('processing_dir'), conf.get('public_dir') ], project, creation_date, 'WGS') (deposition_dir, processing_dir, public_dir) = project_dirs base_depo_dir = os.path.abspath(os.path.join(deposition_dir, '..')) manifest_file = stage_files(workflow, [args.manifest_file], base_depo_dir) deposited_files = stage_files(workflow, input_files, deposition_dir, symlink=True) if file_extension == ".bam": ## Need to sort our BAM files to be sure here... paired_end_seqs = bam_to_fastq(workflow, deposited_files, processing_dir, paired_end=True, compress=False, threads=args.threads) pair_identifier = "_R1" else: paired_end_seqs = input_files qc_threads = args.threads_kneaddata if args.threads_kneaddata else args.threads (cleaned_fastqs, read_counts) = quality_control(workflow, paired_end_seqs, '.fastq', processing_dir, qc_threads, contaminate_db, pair_identifier=pair_identifier, remove_intermediate_output=True) ## Generate taxonomic profile output. Output are stored in a list ## and are the following: ## ## * Merged taxonomic profile ## * Individual taxonomic files ## * metaphlan2 SAM files tax_threads = args.threads_metaphlan if args.threads_metaphlan else args.threads tax_profile_outputs = taxonomic_profile(workflow, cleaned_fastqs, processing_dir, tax_threads, '.fastq') ## Generate functional profile output using humann2. Outputs are the ## the following: ## ## * Merged normalized genefamilies ## * Merged normalized ecs ## * Merged normalized pathways ## * Merged genefamilies ## * Merged ecs ## * Merged pathways func_threads = args.threads_humann if args.threads_humann else args.threads func_profile_outputs = functional_profile( workflow, cleaned_fastqs, '.fastq', processing_dir, func_threads, tax_profile_outputs[1], remove_intermediate_output=True) ## The current biobakery workflows do not generate KO's from our genefamilies ## so we're going to want to do that ourselves. genefamilies = name_files(sample_names, os.path.join(processing_dir, 'metaphlan2'), subfolder='main', tag='genefamilies', extension='tsv') pathways = name_files(sample_names, os.path.join(processing_dir, 'humann2'), subfolder='main', tag='pathabundance', extension='tsv') ecs = name_files(sample_names, os.path.join(processing_dir, 'humann2'), subfolder='regrouped', tag='ecs', extension='tsv') kos = name_files(sample_names, os.path.join(processing_dir, 'humann2'), subfolder='regrouped', tag='kos', extension='tsv') #(merged_norm_kos, merged_kos) = generate_ko_files(workflow, # genefamilies, # processing_dir) biom_files = batch_convert_tsv_to_biom(workflow, tax_profile_outputs[1]) tax_biom_files = stage_files(workflow, biom_files, processing_dir) kneaddata_log_files = name_files(sample_names, os.path.join(processing_dir, 'kneaddata'), subfolder='main', extension='log') pub_raw_dir = os.path.join(public_dir, 'raw') pub_tax_profile_dir = os.path.join(public_dir, 'tax_profile') pub_func_profile_dir = os.path.join(public_dir, 'func_profile') map(create_folders, [pub_raw_dir, pub_tax_profile_dir, pub_func_profile_dir]) knead_read_counts = os.path.join(processing_dir, 'counts', 'merged', 'kneaddata_read_count_table.tsv') tax_profile_pcl = add_metadata_to_tsv( workflow, [tax_profile_outputs[0]], args.metadata_file, 'metagenomics', id_col=conf.get('metadata_id_col'), col_replace=conf.get('analysis_col_patterns'), target_cols=conf.get('target_metadata_cols'), aux_files=[knead_read_counts]) func_profile_pcl = add_metadata_to_tsv( workflow, [func_profile_outputs[0]], args.metadata_file, 'metagenomics', id_col=conf.get('metadata_id_col'), col_replace=conf.get('analysis_col_patterns'), target_cols=conf.get('target_metadata_cols'), aux_files=[knead_read_counts]) pub_files = [ stage_files(workflow, files, target_dir) for (files, target_dir) in [(cleaned_fastqs, pub_raw_dir), ([tax_profile_outputs[0]], pub_tax_profile_dir), (tax_profile_outputs[1], pub_tax_profile_dir), ( tax_biom_files, pub_tax_profile_dir), (tax_profile_pcl, pub_tax_profile_dir), (func_profile_outputs, pub_func_profile_dir), ( func_profile_pcl, pub_func_profile_dir), (kneaddata_log_files, pub_raw_dir)] ] norm_genefamilies = name_files(sample_names, os.path.join(processing_dir, 'humann2', 'relab'), subfolder='genes', tag='genefamilies_relab', extension='tsv') norm_ecs_files = name_files(sample_names, os.path.join(processing_dir, 'humann2', 'relab'), subfolder='ecs', tag='ecs_relab', extension='tsv') norm_path_files = name_files(sample_names, os.path.join(processing_dir, 'humann2', 'relab'), subfolder='pathways', tag='pathabundance_relab', extension='tsv') norm_kos_files = name_files(sample_names, os.path.join(processing_dir, 'humann2', 'relab'), subfolder='kos_relab', extension='tsv') func_tar_files = [] for (sample, gene_file, ecs_file, path_file) in zip(sample_names, norm_genefamilies, norm_ecs_files, norm_path_files): tar_path = os.path.join(pub_func_profile_dir, "%s_humann2.tgz" % sample) func_tar_file = tar_files(workflow, [gene_file, ecs_file, path_file], tar_path, depends=func_profile_outputs) func_tar_files.append(func_tar_file) workflow.go()