def __init__(self, args=None, run=terminal.Run(), progress=terminal.Progress()): self.init_workflow_super_class(args, workflow_name='pangenomics') # initialize the base class ContigsDBWorkflow.__init__(self) self.rules.extend(['anvi_gen_genomes_storage', 'anvi_pan_genome']) self.general_params.extend(["project_name", "fasta_txt", "internal_genomes", "external_genomes"]) self.dirs_dict.update({"FASTA_DIR": "01_FASTA", "CONTIGS_DIR": "02_CONTIGS", "PAN_DIR": "03_PAN"}) self.default_config.update({"fasta_txt": "fasta.txt", "anvi_pan_genome": {"threads": 7}}) pan_params = ["--project-name", "--genome-names", "--skip-alignments",\ "--align-with", "--exclude-partial-gene-calls", "--use-ncbi-blast",\ "--minbit", "--mcl-inflation", "--min-occurrence",\ "--min-percent-identity", "--sensitive", "--description",\ "--overwrite-output-destinations", "--skip-hierarchical-clustering",\ "--enforce-hierarchical-clustering", "--distance", "--linkage"] self.rule_acceptable_params_dict['anvi_pan_genome'] = pan_params storage_params = ["--gene-caller"] self.rule_acceptable_params_dict['anvi_gen_genomes_storage'] = storage_params
def __init__(self, args, run=terminal.Run(), progress=terminal.Progress()): self.args = args self.run = run self.progress = progress # know thyself. self.name = 'pangenomics' # initialize the base class ContigsDBWorkflow.__init__(self) self.rules.extend(['gen_external_genome_file', 'anvi_gen_genomes_storage', 'anvi_pan_genome']) self.general_params.extend(["project_name", "fasta_txt"]) self.dirs_dict.update({"FASTA_DIR": "01_FASTA", "CONTIGS_DIR": "02_CONTIGS", "PAN_DIR": "03_PAN"}) self.default_config.update({"fasta_txt": "fasta.txt", "anvi_pan_genome": {"threads": 20}}) pan_params = ["--project-name", "--genome-names", "--skip-alignments",\ "--align-with", "--exclude-partial-gene-calls", "--use-ncbi-blast",\ "--minbit", "--mcl-inflation", "--min-occurrence",\ "--min-percent-identity", "--sensitive", "--description",\ "--overwrite-output-destinations", "--skip-hierarchical-clustering",\ "--enforce-hierarchical-clustering", "--distance", "--linkage"] self.rule_acceptable_params_dict['anvi_pan_genome'] = pan_params storage_params = ["--internal-genomes", "--external-genomes", "--gene-caller"] self.rule_acceptable_params_dict['anvi_gen_genomes_storage'] = storage_params
def __init__(self, args=None, run=terminal.Run(), progress=terminal.Progress()): self.init_workflow_super_class(args, workflow_name='phylogenomics') # initialize the base class ContigsDBWorkflow.__init__(self) self.input_for_anvi_get_sequences_for_hmm_hits = {} self.internal_genomes_file = '' self.external_genomes_file = '' # initialize the base class self.rules.extend(['anvi_get_sequences_for_hmm_hits', 'trimal', 'iqtree']) self.general_params.extend(['project_name', 'internal_genomes', 'external_genomes']) self.dirs_dict.update({"PHYLO_DIR": "01_PHYLOGENOMICS"}) self.default_config.update({'anvi_get_sequences_for_hmm_hits': {'--return-best-hit': True, '--align-with': 'famsa', '--concatenate-genes': True, '--get-aa-sequences': True, '--hmm-sources': 'Campbell_et_al'}, 'trimal': {'-gt': 0.5}, 'iqtree': {'threads': 8, '-m': 'WAG', '-bb': 1000}}) get_sequences_params = ['--return-best-hit', \ '--separator', '--align-with', '--min-num-bins-gene-occurs', \ '--max-num-genes-missing-from-bin', '--concatenate-genes', \ '--get-aa-sequences', '--gene-names', '--hmm-sources'] self.rule_acceptable_params_dict['anvi_get_sequences_for_hmm_hits'] = get_sequences_params self.rule_acceptable_params_dict['trimal'] = ['-gt', 'additional_params'] self.rule_acceptable_params_dict['iqtree'] = ['-m', '-bb', 'additional_params']
def __init__(self, args=None, run=terminal.Run(), progress=terminal.Progress()): self.init_workflow_super_class(args, workflow_name='phylogenomics') # initialize the base class ContigsDBWorkflow.__init__(self) self.input_for_anvi_get_sequences_for_hmm_hits = {} self.internal_genomes_file = '' self.external_genomes_file = '' self.phylogenomics_sequence_file = None self.project_name = None self.use_hmms_for_phylogeny = True # initialize the base class self.rules.extend( ['anvi_get_sequences_for_hmm_hits', 'trimal', 'iqtree']) self.general_params.extend( ['project_name', 'internal_genomes', 'external_genomes']) self.dirs_dict.update({"PHYLO_DIR": "01_PHYLOGENOMICS"}) self.default_config.update({ 'anvi_get_sequences_for_hmm_hits': { '--return-best-hit': True, '--align-with': 'famsa', '--concatenate-genes': True, '--get-aa-sequences': True, '--hmm-sources': 'Bacteria_71' }, 'trimal': { '-gt': 0.5 }, 'iqtree': { 'threads': 8, '-m': 'WAG', '-bb': 1000 } }) get_sequences_params = ['--return-best-hit', \ '--separator', '--align-with', '--min-num-bins-gene-occurs', \ '--max-num-genes-missing-from-bin', '--concatenate-genes', \ '--get-aa-sequences', '--gene-names', '--hmm-sources'] self.rule_acceptable_params_dict[ 'anvi_get_sequences_for_hmm_hits'] = get_sequences_params self.rule_acceptable_params_dict['trimal'] = [ '-gt', 'additional_params' ] self.rule_acceptable_params_dict['iqtree'] = [ '-m', '-bb', 'additional_params' ]
def __init__(self): ContigsDBWorkflow.__init__(self, config) self.rules = ['iu_gen_configs', 'iu_filter_quality_minoche', 'gen_qc_report', 'gzip_fastqs',\ 'fq2fa', 'merge_fastas_for_co_assembly', 'megahit', 'anvi_script_anvi_script_reformat_fasta',\ 'anvi_gen_contigs_database', 'export_gene_calls', 'centrifuge',\ 'anvi_import_taxonomy', 'anvi_run_hmms', 'anvi_run_ncbi_cogs',\ 'bowtie_build', 'bowtie', 'samtools_view', 'anvi_init_bam',\ 'anvi_profile', 'annotate_contigs_database', 'anvi_merge'] rule_acceptable_params_dict = {} # defining the accesible params per rule rule_acceptable_params_dict['iu_gen_configs'] = [ "--r1-prefix", "--r2-prefix" ] rule_acceptable_params_dict['iu_filter_quality_minoche'] = [ 'visualize_quality_curves', 'ignore_deflines', 'limit_num_pairs', 'print_qual_scores', 'store_read_fate' ] rule_acceptable_params_dict['gzip_fastqs'] = ["run"] rule_acceptable_params_dict['megahit'] = [] rule_acceptable_params_dict['anvi_script_reformat_fasta'] = [] rule_acceptable_params_dict['anvi_gen_contigs_database'] = [] rule_acceptable_params_dict['export_gene_calls'] = [] rule_acceptable_params_dict['centrifuge'] = [] rule_acceptable_params_dict['anvi_import_taxonomy'] = [] rule_acceptable_params_dict['anvi_run_hmms'] = [] rule_acceptable_params_dict['anvi_run_ncbi_cogs'] = [] rule_acceptable_params_dict['bowtie_build'] = [] rule_acceptable_params_dict['bowtie'] = [] rule_acceptable_params_dict['samtools_view'] = [] rule_acceptable_params_dict['anvi_init_bam'] = [] rule_acceptable_params_dict['anvi_profile'] = [] rule_acceptable_params_dict['annotate_contigs_database'] = [] rule_acceptable_params_dict['anvi_merge'] = [] self.rule_acceptable_params_dict = rule_acceptable_params_dict
def __init__(self, config): ContigsDBWorkflow.__init__(self, config) self.rules.extend([ 'gen_external_genome_file', 'anvi_gen_genomes_storage', 'anvi_pan_genome' ]) self.general_params.extend(["project_name", "samples_txt"]) pan_params = ["--project-name", "--output-dir", "--genome-names", "--skip-alignments",\ "--align-with", "--exclude-partial-gene-calls", "--use-ncbi-blast",\ "--minbit", "--mcl-inflation", "--min-occurrence",\ "--min-percent-identity", "--sensitive", "--description",\ "--overwrite-output-destinations", "--skip-hierarchical-clustering",\ "--enforce-hierarchical-clustering", "--distance", "--linkage"] self.rule_acceptable_params_dict['anvi_pan_genome'] = pan_params storage_params = [ "--internal-genomes", "--external-genomes", "--gene-caller" ] self.rule_acceptable_params_dict[ 'anvi_gen_genomes_storage'] = storage_params
def __init__(self, args, run=terminal.Run(), progress=terminal.Progress()): self.args = args self.run = run self.progress = progress # know thyself. self.name = 'metagenomics' self.samples_information = {} self.kraken_annotation_dict = {} self.run_metaspades = None self.use_scaffold_from_metaspades = None self.remove_short_reads_based_on_references = None self.references_for_removal_txt = None self.references_for_removal = {} self.references_mode = None self.fasta_txt_file = None self.samples_txt_file = None self.sample_names = None self.group_sizes = None # initialize the base class ContigsDBWorkflow.__init__(self) self.rules.extend(['iu_gen_configs', 'iu_filter_quality_minoche', 'gen_qc_report', 'gzip_fastqs',\ 'merge_fastqs_for_co_assembly', 'megahit', 'merge_fastas_for_co_assembly',\ 'anvi_gen_contigs_database', 'anvi_export_gene_calls', 'centrifuge',\ 'anvi_import_taxonomy', 'anvi_run_hmms', 'anvi_run_ncbi_cogs',\ 'bowtie_build', 'bowtie', 'samtools_view', 'anvi_init_bam', 'idba_ud',\ 'anvi_profile', 'annotate_contigs_database', 'anvi_merge', 'import_percent_of_reads_mapped',\ 'krakenhll', 'krakenhll_mpa_report', 'import_kraken_hll_taxonomy', 'metaspades',\ 'remove_short_reads_based_on_references']) self.general_params.extend(['samples_txt', "references_mode", "all_against_all",\ "kraken_txt"]) rule_acceptable_params_dict = {} # defining the accesible params per rule rule_acceptable_params_dict['iu_gen_configs'] = ["--r1-prefix", "--r2-prefix"] rule_acceptable_params_dict['iu_filter_quality_minoche'] = ['run', '--visualize-quality-curves', '--ignore-deflines', '--limit-num-pairs', '--print-qual-scores', '--store-read-fate'] rule_acceptable_params_dict['gzip_fastqs'] = ["run"] rule_acceptable_params_dict['metaspades'] = ["run", "additional_params", "use_scaffolds"] rule_acceptable_params_dict['megahit'] = ["run", "--min-contig-len", "--min-count", "--k-min", "--k-max", "--k-step", "--k-list", "--no-mercy", "--no-bubble", "--merge-level", "--prune-level", "--prune-depth", "--low-local-ratio", "--max-tip-len", "--no-local", "--kmin-1pass", "--presets", "--memory", "--mem-flag", "--use-gpu", "--gpu-mem", "--keep-tmp-files", "--tmp-dir", "--continue", "--verbose"] rule_acceptable_params_dict['idba_ud'] = ["run", "--mink", "--maxk", "--step", "--inner_mink", "--inner_step", "--prefix", "--min_count", "--min_support", "--seed_kmer", "--min_contig", "--similar", "--max_mismatch", "--min_pairs", "--no_bubble", "--no_local", "--no_coverage", "--no_correct", "--pre_correction"] rule_acceptable_params_dict['bowtie'] = ["additional_params"] rule_acceptable_params_dict['samtools_view'] = ["additional_params"] rule_acceptable_params_dict['anvi_profile'] = ["--overwrite-output-destinations", "--sample-name", "--report-variability-full", "--skip-SNV-profiling", "--profile-SCVs", "--description", "--skip-hierarchical-clustering", "--distance", "--linkage", "--min-contig-length", "--min-mean-coverage", "--min-coverage-for-variability", "--cluster-contigs", "--contigs-of-interest", "--queue-size", "--write-buffer-size", "--max-contig-length"] rule_acceptable_params_dict['annotate_contigs_database'] = [] rule_acceptable_params_dict['merge_fastas_for_co_assembly'] = [] rule_acceptable_params_dict['merge_fastqs_for_co_assembly'] = [] rule_acceptable_params_dict['anvi_merge'] = ["--sample-name", "--description", "--skip-hierarchical-clustering", "--enforce-hierarchical-clustering", "--distance", "--linkage", "--skip-concoct-binning", "--overwrite-output-destinations"] rule_acceptable_params_dict['import_percent_of_reads_mapped'] = ["run"] rule_acceptable_params_dict['krakenhll'] = ["additional_params", "run", "--db", "--gzip-compressed"] rule_acceptable_params_dict['krakenhll_mpa_report'] = ["additional_params"] rule_acceptable_params_dict['import_kraken_hll_taxonomy'] = ["--min-abundance"] rule_acceptable_params_dict['remove_short_reads_based_on_references'] = ["dont_remove_just_map", \ "references_for_removal_txt", \ "delimiter-for-iu-remove-ids-from-fastq"] self.rule_acceptable_params_dict.update(rule_acceptable_params_dict) forbidden_params = {} forbidden_params['krakenhll'] = ['--fastq-input', '--paired', '--output'] self.forbidden_params.update(forbidden_params) self.dirs_dict.update({"QC_DIR": "01_QC", "FASTA_DIR": "02_FASTA", "CONTIGS_DIR": "03_CONTIGS", "MAPPING_DIR": "04_MAPPING", "PROFILE_DIR": "05_ANVIO_PROFILE", "MERGE_DIR": "06_MERGED", "TAXONOMY_DIR": "07_TAXONOMY"}) self.default_config.update({'samples_txt': "samples.txt", 'metaspades': {"additional_params": "--only-assembler", "threads": 7}, 'megahit': {"--min-contig-len": min_contig_length_for_assembly, "--memory": 0.4, "threads": 7}, 'idba_ud': {"--min_contig": min_contig_length_for_assembly, "threads": 7}, 'iu_filter_quality_minoche': {"run": True, "--ignore-deflines": True}, "gzip_fastqs": {"run": True}, "bowtie": {"additional_params": "--no-unal", "threads": 3}, "samtools_view": {"additional_params": "-F 4"}, "anvi_profile": {"threads": 3, "--sample-name": "{sample}", "--overwrite-output-destinations": True}, "anvi_merge": {"--sample-name": "{group}", "--overwrite-output-destinations": True}, "import_percent_of_reads_mapped": {"run": True}, "krakenhll": {"threads": 3, "--gzip-compressed": True, "additional_params": "--preload"}, "remove_short_reads_based_on_references": {"delimiter-for-iu-remove-ids-from-fastq": " "}})
def __init__(self, args=None, run=terminal.Run(), progress=terminal.Progress()): self.init_workflow_super_class(args, workflow_name='metagenomics') self.target_files = [] # TODO: Once we update all other workflows then this will be initiated in WorkflowSuperClass self.samples_information = {} self.kraken_annotation_dict = {} self.run_krakenuniq = None self.run_metaspades = None self.use_scaffold_from_metaspades = None self.remove_short_reads_based_on_references = None self.references_for_removal_txt = None self.references_for_removal = {} self.references_mode = None self.fasta_txt_file = None self.samples_txt_file = None self.sample_names = None self.group_sizes = None self.collections_txt = None self.collections = None # initialize the base class ContigsDBWorkflow.__init__(self) self.rules.extend(['iu_gen_configs', 'iu_filter_quality_minoche', 'gen_qc_report', 'gzip_fastqs',\ 'merge_fastqs_for_co_assembly', 'megahit', 'merge_fastas_for_co_assembly',\ 'anvi_gen_contigs_database', 'anvi_export_gene_calls', 'centrifuge',\ 'anvi_import_taxonomy', 'anvi_run_hmms', 'anvi_run_ncbi_cogs',\ 'bowtie_build', 'bowtie', 'samtools_view', 'anvi_init_bam', 'idba_ud',\ 'anvi_profile', 'annotate_contigs_database', 'anvi_merge', 'import_percent_of_reads_mapped',\ 'krakenuniq', 'krakenuniq_mpa_report', 'import_krakenuniq_taxonomy', 'metaspades',\ 'remove_short_reads_based_on_references', 'anvi_summarize', 'anvi_split']) self.general_params.extend(['samples_txt', "references_mode", "all_against_all",\ "kraken_txt", "collections_txt"]) rule_acceptable_params_dict = {} # defining the accesible params per rule rule_acceptable_params_dict['iu_gen_configs'] = ["--r1-prefix", "--r2-prefix"] rule_acceptable_params_dict['iu_filter_quality_minoche'] = ['run', '--visualize-quality-curves', '--ignore-deflines', '--limit-num-pairs', '--print-qual-scores', '--store-read-fate'] rule_acceptable_params_dict['gzip_fastqs'] = ["run"] rule_acceptable_params_dict['anvi_summarize'] = ["additional_params", "run"] rule_acceptable_params_dict['anvi_split'] = ["additional_params", "run"] rule_acceptable_params_dict['metaspades'] = ["run", "additional_params", "use_scaffolds"] rule_acceptable_params_dict['megahit'] = ["run", "--min-contig-len", "--min-count", "--k-min", "--k-max", "--k-step", "--k-list", "--no-mercy", "--no-bubble", "--merge-level", "--prune-level", "--prune-depth", "--low-local-ratio", "--max-tip-len", "--no-local", "--kmin-1pass", "--presets", "--memory", "--mem-flag", "--use-gpu", "--gpu-mem", "--keep-tmp-files", "--tmp-dir", "--continue", "--verbose"] rule_acceptable_params_dict['idba_ud'] = ["run", "--mink", "--maxk", "--step", "--inner_mink", "--inner_step", "--prefix", "--min_count", "--min_support", "--seed_kmer", "--min_contig", "--similar", "--max_mismatch", "--min_pairs", "--no_bubble", "--no_local", "--no_coverage", "--no_correct", "--pre_correction"] rule_acceptable_params_dict['bowtie'] = ["additional_params"] rule_acceptable_params_dict['samtools_view'] = ["additional_params"] rule_acceptable_params_dict['anvi_profile'] = ["--overwrite-output-destinations", "--sample-name", "--report-variability-full", "--skip-SNV-profiling", "--profile-SCVs", "--description", "--skip-hierarchical-clustering", "--distance", "--linkage", "--min-contig-length", "--min-mean-coverage", "--min-coverage-for-variability", "--cluster-contigs", "--contigs-of-interest", "--queue-size", "--write-buffer-size", "--max-contig-length", "--max-coverage-depth"] rule_acceptable_params_dict['annotate_contigs_database'] = [] rule_acceptable_params_dict['merge_fastas_for_co_assembly'] = [] rule_acceptable_params_dict['merge_fastqs_for_co_assembly'] = [] rule_acceptable_params_dict['anvi_merge'] = ["--sample-name", "--description", "--skip-hierarchical-clustering", "--enforce-hierarchical-clustering", "--distance", "--linkage", "--skip-concoct-binning", "--overwrite-output-destinations"] rule_acceptable_params_dict['import_percent_of_reads_mapped'] = ["run"] rule_acceptable_params_dict['krakenuniq'] = ["additional_params", "run", "--db", "--gzip-compressed"] rule_acceptable_params_dict['import_krakenuniq_taxonomy'] = ["--min-abundance"] rule_acceptable_params_dict['remove_short_reads_based_on_references'] = ["dont_remove_just_map", \ "references_for_removal_txt", \ "delimiter-for-iu-remove-ids-from-fastq"] self.rule_acceptable_params_dict.update(rule_acceptable_params_dict) forbidden_params = {} forbidden_params['krakenuniq'] = ['--fastq-input', '--paired', '--output'] self.forbidden_params.update(forbidden_params) self.dirs_dict.update({"QC_DIR": "01_QC", "FASTA_DIR": "02_FASTA", "CONTIGS_DIR": "03_CONTIGS", "MAPPING_DIR": "04_MAPPING", "PROFILE_DIR": "05_ANVIO_PROFILE", "MERGE_DIR": "06_MERGED", "TAXONOMY_DIR": "07_TAXONOMY", "SUMMARY_DIR": "08_SUMMARY", "SPLIT_PROFILES_DIR": "09_SPLIT_PROFILES"}) self.default_config.update({'samples_txt': "samples.txt", 'metaspades': {"additional_params": "--only-assembler", "threads": 7}, 'megahit': {"--min-contig-len": min_contig_length_for_assembly, "--memory": 0.4, "threads": 7}, 'idba_ud': {"--min_contig": min_contig_length_for_assembly, "threads": 7}, 'iu_filter_quality_minoche': {"run": True, "--ignore-deflines": True}, "gzip_fastqs": {"run": True}, "bowtie": {"additional_params": "--no-unal", "threads": 3}, "samtools_view": {"additional_params": "-F 4"}, "anvi_profile": {"threads": 3, "--sample-name": "{sample}", "--overwrite-output-destinations": True}, "anvi_merge": {"--sample-name": "{group}", "--overwrite-output-destinations": True}, "import_percent_of_reads_mapped": {"run": True}, "krakenuniq": {"threads": 3, "--gzip-compressed": True, "additional_params": ""}, "remove_short_reads_based_on_references": {"delimiter-for-iu-remove-ids-from-fastq": " "}})