Ejemplo n.º 1
0
def get_extract_otu_tax_rank_command(config, fixed_rank, taxa_otu_rank, otu_taxa_labels, abund_table,
                                     significance_threshold, rdp_database, rdp_depth):

    """Extract OTU-tax-ranks"""

    description = 'OTU taxextr'
    short = 'te'

    command = [config['scripts']['taxrank_extractor'],
               '--input',                   fixed_rank,
               '--output',                  taxa_otu_rank,
               '--significance_threshold',  significance_threshold,
               '--out_taxlabel',            otu_taxa_labels,
               '--abundance_table',         abund_table]

    if rdp_database == '16S' and rdp_depth == 'phylum':
        command += ['--deeper_taxa', 'proteobacteria']

    if rdp_depth == 'phylum':
        base_depth = 1
    elif rdp_depth == 'class':
        base_depth = 2
    else:
        raise Exception('Unknown RDP depth encountered: {}'.format(rdp_depth))

    if rdp_database == '16S':
        command += ['--depth', base_depth]
    elif rdp_database == '18S':
        command += ['--depth', base_depth + 1]

    return program_module.ProgramCommand(description, short, command)
Ejemplo n.º 2
0
def get_prinseq_command(config_file, input_file, good_output, bad_output):

    """
    Runs the Prinseq program which performs various cleaning on the reads

    Currently performs:
        - Quality trimming for both ends
        - Evaluating that the remaining length is long enough
        - Evaluating the quality of the read
        - Evaluating that it contains a low number of Ns
    """

    description = 'Prinseq'
    short = 'pq'

    prinseq = [config_file['programs']['prinseq']]
    input_command = ['-fastq', input_file]
    output_command = ['-out_good', good_output,
                      '-out_bad', bad_output]
    # phred_command = ['-phred64']
    trim_command = ['-trim_qual_left', str(TRIM_QUAL), '-trim_qual_right', str(TRIM_QUAL)]
    minlen_command = ['-min_len', str(MIN_LEN)]
    minqual_command = ['-min_qual_mean', str(MIN_QUAL)]
    ns_command = ['-ns_max_p', str(MAX_NS)]

    process_command = prinseq + input_command + output_command\
        + trim_command + minlen_command + minqual_command + ns_command  # + derep_command

    return program_module.ProgramCommand(description, short, process_command)
Ejemplo n.º 3
0
def get_timeplot_command(config, log_table_fp, timeplot_fp):
    """Create a taxa barplot using matplotlib"""

    description = 'Timeplot'
    short = 'tp'

    command = [
        config['scripts']['time_script'], '-i', log_table_fp, '-o', timeplot_fp
    ]

    return program_module.ProgramCommand(description, short, command)
Ejemplo n.º 4
0
def get_generate_otu_names_command(config, raw_otus, otu_name_map):
    """Generates mapping table between old and newly generated OTU names"""

    description = 'Generate otu names'
    short = 'GOn'

    command = [
        config['scripts']['generate_otu_names'], '--input', raw_otus,
        '--output', otu_name_map
    ]

    return program_module.ProgramCommand(description, short, command)
Ejemplo n.º 5
0
def get_convert_to_phylip_command(config, pynast_alignment_fasta,
                                  pynast_alignment_phylip):
    """Convert fasta-alignment to phylip format"""

    description = 'Convert fas/phy'
    short = 'Cfp'

    command = [
        config['scripts']['fasta_to_phylip'], '--input_fasta',
        pynast_alignment_fasta, '--output_phylip', pynast_alignment_phylip
    ]

    return program_module.ProgramCommand(description, short, command)
Ejemplo n.º 6
0
def get_fast_tree_command(config, input_alignment_fp, output_tree_fp):

    """Produces a tree file from a PyNAST alignment using Fast Tree"""

    description = 'FastTree'
    short = 'FT'

    command = [config['scripts']['fasttree'],
               '--input', input_alignment_fp,
               '--output', output_tree_fp,
               '--fasttree_path', config['programs']['fasttree']]

    return program_module.ProgramCommand(description, short, command)
Ejemplo n.º 7
0
def get_create_color_tables_command(config, fixed_rdp_output_fp, otu_color_taxa_table, taxa_color_table):

    """Produces color tables for color strap and color definitions"""

    description = 'Create color-tab'
    short = 'cct'

    command = [config['scripts']['colors_from_phyla'],
               '--input',           fixed_rdp_output_fp,
               '--otu_color_taxa',  otu_color_taxa_table,
               '--taxa_color',      taxa_color_table]

    return program_module.ProgramCommand(description, short, command)
Ejemplo n.º 8
0
def get_reduce_phylip_command(config, phylip_alignment,
                              phylip_alignment_reduced):
    """Remove empty columns from phylip alignment"""

    description = 'Reduce phylip'
    short = 'rp'

    command = [
        config['scripts']['reduce_phylip'], '-i', phylip_alignment, '-o',
        phylip_alignment_reduced
    ]

    return program_module.ProgramCommand(description, short, command)
Ejemplo n.º 9
0
def get_decompression_command(config_file, compressed_input_fp,
                              decompressed_output_base):
    """Runs the decompression script, targetting .gz files only"""

    description = 'decompress'
    short = 'dc'

    command = [
        config_file['scripts']['decompression_script'], '--input',
        compressed_input_fp, '--output_base', decompressed_output_base,
        '--decompression_mode', 'gz'
    ]

    return program_module.ProgramCommand(description, short, command)
Ejemplo n.º 10
0
def get_derep_command(config, raw_reads_fp, dereplicated_fp):
    """
    Adds the -derep_fulllength execute_test
    Dereplicates full-length sequences
    Outputs fasta-file with dereplication counts
    """

    description = 'Dereplicate'
    short = 'dr'

    command = [
        config['programs']['dereplicate'], raw_reads_fp, dereplicated_fp
    ]

    return program_module.ProgramCommand(description, short, command)
Ejemplo n.º 11
0
def get_merge_command(config_file, input_fastq_files_fp, merged_output_fp,
                      labels):
    """Runs the decompression script, targetting .gz files only"""

    description = 'merge'
    short = 'mr'

    print('DEBUG input labels {}'.format(labels))

    command = [
        config_file['scripts']['merge'], '--input_files', input_fastq_files_fp,
        '--output', merged_output_fp, '--labels', labels
    ]

    return program_module.ProgramCommand(description, short, command)
Ejemplo n.º 12
0
def get_fastq_to_fasta_command(config, fastq_fp, fasta_fp):
    """
    Command for converting the input files from fastq to fasta format
    This format is needed in the following steps
    """

    description = 'Fq to Fa'
    short = 'FQA'

    command = [
        config['scripts']['fasta_to_fastq'], '--input', fastq_fp, '--output',
        fasta_fp, '--extract_label'
    ]

    return program_module.ProgramCommand(description, short, command)
Ejemplo n.º 13
0
def get_create_barplot_data_command(config, taxa_otu_rank, otu_sample_table,
                                    tax_count_table, tax_abund_table):
    """Extracts data from the otu-taxa-table and outputs taxa barplot data"""

    description = 'Create bar-data'
    short = 'cbd'

    command = [
        config['scripts']['create_barplot_table'], '--taxa_table',
        taxa_otu_rank, '--otu_sample_table', otu_sample_table,
        '--barplot_cluster', tax_count_table, '--barplot_abund',
        tax_abund_table
    ]

    return program_module.ProgramCommand(description, short, command)
Ejemplo n.º 14
0
def get_generate_alpha_plots_command(config, plot_rar, plot_chao, otu_mapping_table):

    """Runs the preliminary chao1/rarefaction calculating script"""

    description = 'Chao1'
    short = 'PC'

    command = [config['scripts']['alpha_plots'],
               '--plot_rarefaction',    plot_rar,
               '--plot_chao',           plot_chao,
               '--samplepoints',        SAMPLE_STEPS,
               '--replicates',          SAMPLE_REPLICATES,
               '--otu_mapping_table',   otu_mapping_table]

    return program_module.ProgramCommand(description, short, command)
Ejemplo n.º 15
0
def get_filter_otu_command(config, complete_otu_fp, otu_abundancy_fp,
                           output_fp, abund_filt_fp, filter_threshold):
    """
    Filters the otus based on mapped read count
    """

    description = 'Abund. filtering'
    short = 'af'

    command = [
        config['scripts']['filter_otus'], '-i', complete_otu_fp, '-m',
        otu_abundancy_fp, '-o', output_fp, '-t', filter_threshold, '-O',
        abund_filt_fp
    ]

    return program_module.ProgramCommand(description, short, command)
Ejemplo n.º 16
0
def get_create_otu_table_command(config, cluster_mapping, derep_mapping, name_mapping_table, otu_table):

    """
    Creates OTU table where counts in separate samples are mapped to the different OTUs
    """

    description = 'OTU-table'
    short = 'OT'

    command = [config['scripts']['create_otu_table'],
               '--cluster_mapping', cluster_mapping,
               '--derep_mapping', derep_mapping,
               '--name_mapping', name_mapping_table,
               '--output', otu_table]

    return program_module.ProgramCommand(description, short, command)
Ejemplo n.º 17
0
def get_ete_command(config, input_tree_fp, output_tree_pic_fp,
                    otu_abund_fp, color_strap_fp, labels_fp):

    """Uses the Python ETE module to create and render a tree"""

    description = 'ETE'
    short = 'ete'

    command = ['xvfb-run', config['scripts']['ete'],
               '--input', input_tree_fp,
               '--output', output_tree_pic_fp,
               '--labels', labels_fp,
               '--abundancies', otu_abund_fp,
               '--color_taxa', color_strap_fp]

    return program_module.ProgramCommand(description, short, command)
Ejemplo n.º 18
0
def get_chimera_checking_command(config, unchecked_fp, non_chimeric_fp):
    """
    Runs chimeric checking
    Identifies chimeric reads, and outputs non-chimeric reads to target file path
    """

    description = 'Chim. checking'
    short = 'cc'

    command = [
        config['programs']['vsearch'], '-uchime_ref', unchecked_fp, '-db',
        config['databases']['vsearch_16S_ref'], '-uchimeout',
        str(non_chimeric_fp + '.OUTPUT'), '-nonchimeras', non_chimeric_fp
    ]

    return program_module.ProgramCommand(description, short, command)
Ejemplo n.º 19
0
def get_cdhit_parser_command(config, input_mapping_matrix_fp,
                             output_mapping_table_fp, cluster_mapping_fp):
    """
    Extracts OTU counts from CDHIT mapping table and outputs them
    as an abundancy matrix
    """

    description = 'Extract otu table'
    short = 'eOt'

    command = [
        config['scripts']['cdhit_output_parser'], '-i',
        input_mapping_matrix_fp, '-o', output_mapping_table_fp,
        '--count_dereplicated', '--seq_matrix', cluster_mapping_fp
    ]

    return program_module.ProgramCommand(description, short, command)
Ejemplo n.º 20
0
def get_rename_otus_command(config, input_otus, input_table, renamed_fasta,
                            renamed_table, name_mapping):
    """
    Renames OTUs in fasta-file and abundancy matrix
    Outputs then as 'output.fasta' and 'output.table'
    """

    description = 'Rename otus'
    short = 'rO'

    command = [
        config['scripts']['rename_otus'], '--fasta', input_otus, '--table',
        input_table, '--output_fasta', renamed_fasta, '--output_table',
        renamed_table, '--name_mapping', name_mapping
    ]

    return program_module.ProgramCommand(description, short, command)
Ejemplo n.º 21
0
def get_label_fasta_header_command(config, raw_reads_fp, labelled_fp):
    """
    Adds labels to fasta headers
    Is used to add ';size=1;' if the derep command isn't used
    """

    description = 'Label reads'
    short = 'lr'

    label = ';size=1;'

    command = [
        config['scripts']['label_fasta_headers'], '-i', raw_reads_fp, '-o',
        labelled_fp, '-l', label
    ]

    return program_module.ProgramCommand(description, short, command)
Ejemplo n.º 22
0
def get_script_dereplicator_command(config, raw_reads_fp, dereplicated_fp,
                                    mapping_fp):
    """
    Home-made dereplication script
    """

    description = 'Dereplicate script'
    short = 'dr'

    # command = [config['programs']['dereplicate'], raw_reads_fp, dereplicated_fp]

    command = [
        config['scripts']['script_dereplicator'], '--input', raw_reads_fp,
        '--output', dereplicated_fp, '--mapping_file', mapping_fp
    ]

    return program_module.ProgramCommand(description, short, command)
Ejemplo n.º 23
0
def get_annotate_otus_command(config, raw_otus, raw_abundance, annotated_otus,
                              annotated_abundance, taxa_otu_rank,
                              fixed_rank_annotation):
    """
    Annotates an OTU-fasta file and an OTU-abundancy matrix
    """

    description = 'Annotate otu'
    short = 'ao'

    command = [
        config['scripts']['annotate_otu'], '--input_fasta', raw_otus,
        '--input_abundancy', raw_abundance, '--input_taxa', taxa_otu_rank,
        '--annotated_fasta', annotated_otus, '--annotated_abundancy',
        annotated_abundance, '--fixed_rank_annotation', fixed_rank_annotation
    ]

    return program_module.ProgramCommand(description, short, command)
Ejemplo n.º 24
0
def get_filter_bad_taxa_command(config, raw_otus, abundancy_matrix,
                                otu_taxa_table, output_dir):
    """
    Filters out OTUs whose taxa is determined with low confidence
    by RDP classifier.
    Filtering effects both OTU fasta file, and abundancy table
    """

    description = 'Filter taxa'
    short = 'ft'

    command = [
        config['scripts']['filter_poor_taxa'], '--input', raw_otus,
        '--taxa_table', otu_taxa_table, '--suffix', TAX_FILTER_SUFFIX,
        '--abund_matrix', abundancy_matrix, '--output_dir', output_dir
    ]

    return program_module.ProgramCommand(description, short, command)
Ejemplo n.º 25
0
def get_raxml_command(config, input_alignment_fp, output_dir):

    """Produces a tree file from a PyNAST alignment using Fast Tree"""

    description = 'RAxML'
    short = 'Rx'

    seed = 12345
    model_of_substitution = 'GTRGAMMA'
    raxml_out_name = 'raxml_tree.tre'

    command = [config['programs']['raxml'],
               '-p', seed,
               '-m', model_of_substitution,
               '-s', input_alignment_fp,
               '-n', raxml_out_name,
               '-w', output_dir]

    return program_module.ProgramCommand(description, short, command)
Ejemplo n.º 26
0
def get_taxa_barplot_command(config,
                             tax_count_table_fp,
                             matlibplot_out_fp,
                             taxa_color_table,
                             plot_relative_abundance=False,
                             title='MODTITL',
                             ylabel='MODYLABL'):
    """Create a taxa barplot using matplotlib"""

    description = 'Taxa-barplot'
    short = 'tb'

    command = [
        config['scripts']['make_barplot'], '--input', tax_count_table_fp,
        '--output', matlibplot_out_fp, '--title', title, '--ylabel', ylabel,
        '--color_table', taxa_color_table
    ]

    if plot_relative_abundance:
        command.append('--relative_abundance')

    return program_module.ProgramCommand(description, short, command)
Ejemplo n.º 27
0
def get_rdp_command(config, input_fp, fixed_rank_fp, significant_taxa_fp, significance_threshold, chosen_database):

    """Run the RDP classification program"""

    description = 'RDP Classifier'
    short = 'RDP'

    if chosen_database == '18S':
        train_option = ['-t', config['databases']['rdpclassifier_18S']]
    else:
        train_option = []

    command = ['java', '-Xmx' + str(MEMORY_SIZE_GB) + 'g',
               '-jar',              config['programs']['rdpclassifier'],
               'classify', '-c',    significance_threshold,
               '-f',                'fixrank',
               '-o',                fixed_rank_fp,
               '-h',                significant_taxa_fp]

    command += train_option
    command += [input_fp]

    return program_module.ProgramCommand(description, short, command)
Ejemplo n.º 28
0
def get_run_cdhit_command(config, input_reads_fp, output_otus_fp,
                          clustering_identity):
    """
    Runs CD-HIT, clustering sequences into OTUs
    """

    description = 'CD-HIT'
    short = 'CH'

    output_description_length_option = 0

    command = [
        config['programs']['cdhit'], '-i', input_reads_fp, '-o',
        output_otus_fp, '-c', clustering_identity, '-n', CDHIT_WORD_SIZE, '-T',
        CDHIT_THREADS, '-M', CDHIT_MEMORY, '-d',
        output_description_length_option
    ]

    if ACCURATE_CLUSTERING:
        command.append('-g')
        command.append(1)
        print('CD-HIT command: {}'.format(command))

    return program_module.ProgramCommand(description, short, command)
Ejemplo n.º 29
0
def get_pynast_command(config, filtered_otus, pynast_alignment_fasta,
                       pynast_log, pynast_failed,
                       min_alignment_similarity_percentage, rdp_database):
    """Run PyNAST"""

    description = 'PyNAST'
    short = 'PN'

    if rdp_database == '16S':
        pynast_database = config['databases']['pynast_16S']
    elif rdp_database == '18S':
        pynast_database = config['databases']['pynast_16S']
    else:
        raise AttributeError(
            'Chosen RDP database: {} doesn\'t exist!'.format(rdp_database))

    command = [
        config['programs']['pynast'], '-i', filtered_otus, '-t',
        pynast_database, '-l', MIN_INPUT_TO_ALIGNMENT_LENGTH, '-p',
        min_alignment_similarity_percentage, '-a', pynast_alignment_fasta,
        '-g', pynast_log, '-f', pynast_failed
    ]

    return program_module.ProgramCommand(description, short, command)