def __init__(self, output_dir):
        """Initialization."""
        
        check_dependencies(['blastn', 'makeblastdb'])
        
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)
            
        self.output_dir = output_dir
        
        logger_setup(output_dir, "in_silico_probes.log", "in_silico_probes", __version__, False)
        self.logger = logging.getLogger('timestamp')
        
        self.output_fmt = '6 qseqid qlen qseq sseqid slen sseq length mismatch gaps pident bitscore evalue'

        self.BlastHit = namedtuple('BlastHit', """query_id
                                                query_len
                                                query_aln_seq
                                                subject_id
                                                subject_len
                                                subject_aln_seq
                                                aln_len
                                                mismatch
                                                gaps
                                                perc_identity
                                                bitscore
                                                evalue""")
Example #2
0
    def __init__(self, tmp_dir, output_dir, cpus):
        """Initialization."""
        
        self.tmp_dir = tmp_dir
        self.output_dir = output_dir
        self.cpus = cpus
        
        check_dependencies(['prodigal', 'hmmsearch', 'pfam_search.pl', 'genometk'])

        self.tigrfam_hmms = '/srv/whitlam/bio/db/tigrfam/15.0/TIGRFAMs_15.0_HMM/tigrfam.hmm'
        self.tigrfam_ext = '_tigrfam.tsv'
        
        self.pfam_hmm_dir = '/srv/db/pfam/27/'
        self.pfam_ext = '_pfam.tsv'
        
        self.protein_file_ext = '_protein.faa'
        
        logger_setup(output_dir, "gtdb_protein_pipeline.log", "gtdb_protein_pipeline", __version__, False)
        self.logger = logging.getLogger('timestamp')
Example #3
0
def main():

    # initialize the option parser
    parser = argparse.ArgumentParser(
        add_help=False,
        description=
        "BAM-Tk is a software toolkit for dealing with Binary Alignment Map (BAM) files.",
        epilog="Written by Corentin Hochart ([email protected]), "
        + "UMR CNRSS 6023 Laboratoire Genome et Environement (LMGE), " +
        "as part of the [ANR Eureka](https://anr.fr/Projet-ANR-14-CE02-0004) project."
        + "Released under the terms of the GNU General Public License v3. " +
        "bamtk version %s." % version())
    subparsers = parser.add_subparsers(help="--", dest='subparser_name')

    # pathway reconstruction
    mm_featuresparser = subparsers.add_parser('mm_features', description='')
    mm_featuresparser.add_argument(
        'faidx', help='samtools fasta index of the reference')
    mm_featuresparser.add_argument(
        'bam_list', help='list of bam format alignement file(s) path')
    mm_featuresparser.add_argument('output_dir',
                                   help='directory to write output files')
    mm_featuresinput_argument = mm_featuresparser.add_argument_group(
        'optional input arguments')
    mm_featuresinput_argument.add_argument('-x',
                                           '--extension',
                                           help='bam file prefix',
                                           default='bam')
    mm_featuresinput_argument.add_argument('-fx',
                                           '--faidx_extension',
                                           help='faidx file prefix',
                                           default='fasta.fai')
    mm_featuresinput_argument.add_argument(
        '-t',
        '--threads',
        help='threads number for "samtools view"',
        default='2')
    mm_featuresinput_argument.add_argument(
        '-Q',
        '--mapQ',
        help='only include reads with mapping quality >= INT [10]',
        default='10')
    mm_featuresinput_argument.add_argument(
        '-i',
        '--id_cutoff',
        help='only include reads with identity >= INT [0]',
        default=0)
    mm_featuresinput_argument.add_argument(
        '-m',
        '--merge',
        help='merge features abundance by field',
        action='store_true')
    mm_featuresinput_argument.add_argument(
        '-s',
        '--separator',
        help='filed separator for -m/--merge argument',
        default='.')
    mm_featuresinput_argument.add_argument(
        '-g',
        '--genome',
        help='sum abundance of all features',
        action='store_true')
    mm_featuresoutput_argument = mm_featuresparser.add_argument_group(
        'optional output arguments')
    mm_featuresoutput_argument.add_argument(
        '-n',
        '--feature_normalisation',
        help="get the number of features per X reads [Default: 1000000]",
        default=1000000,
        type=int)
    mm_featuresoutput_argument.add_argument(
        '-sn',
        '--feature_size_normalisation',
        help="get the number of features per X bases [Default: 1000]",
        default=1000,
        type=int)
    mm_featuresoutput_argument.add_argument(
        '-f',
        '--discard_feature_length_normalisation',
        help=
        "discard feature length normalisation for base count abundance output",
        action='store_true')
    mm_featuresoutput_argument.add_argument(
        '-l',
        '--discard_library_size_normalisation',
        help=
        "discard library size normalisation for reads and bases count abundance output",
        action='store_true')
    mm_featuresoutput_argument.add_argument(
        '-lsn',
        '--library_size_normalisation',
        help=
        "library size normalisation by total number of reads count or by number of aligned reads ",
        choices=['total', 'aligned'],
        default='total')
    mm_featuresoutput_argument.add_argument(
        '--removed',
        help=
        "removed features who do not appears in samples (sum of abundance through sample = 0)",
        action='store_true')
    mm_featuresparser.add_argument('--silent',
                                   help='suppress output of logger',
                                   action='store_true')
    mm_featuresparser.add_argument(
        '--force_overwrite',
        help='force overwriting of output directory',
        action="store_true",
        default=False)
    mm_featuresparser.add_argument('--version',
                                   help='print version and exit',
                                   action='version',
                                   version='bamtk ' + version())

    mm_annotated_features_parser = subparsers.add_parser(
        'mm_annotated_features', description='')
    mm_annotated_features_parser.add_argument(
        'features_dir', help='directory specified during features command')
    mm_annotated_features_parser.add_argument(
        'features_annotation',
        help='features annotation file in tabular format')
    mm_annotated_features_parser.add_argument(
        'annotation_description',
        help='annotation description file in tabular format')
    mm_annotated_features_input_argument = mm_annotated_features_parser.add_argument_group(
        'optional input arguments')
    mm_annotated_features_input_argument.add_argument(
        '--library_size',
        help=
        "Tabular file with sample library size to produce normalised count matrix"
    )
    mm_annotated_features_output_argument = mm_annotated_features_parser.add_argument_group(
        'optional output arguments')
    mm_annotated_features_output_argument.add_argument(
        '-f',
        '--feature_normalisation',
        help="get the number of features per X reads [Default: 1000000]",
        default=1000000,
        type=int)
    mm_annotated_features_output_argument.add_argument(
        '--removed',
        help=
        "removed features who do not appears in samples (sum of abundance through sample = 0)",
        action='store_true')

    mm_annotated_features_parser.add_argument('--silent',
                                              help='suppress output of logger',
                                              action='store_true')
    mm_annotated_features_parser.add_argument(
        '--force_overwrite',
        help='force overwriting of output directory',
        action="store_true",
        default=False)

    mm_wf_parser = subparsers.add_parser(
        'mm_wf',
        description='Run features and annotate_features command',
        epilog=
        'bamtk mm_wf ./file.fai ./bam_list.tsv ./features2annotation.tsv ./annotationDescription.tsv ./output'
    )
    mm_wf_parser.add_argument('faidx',
                              help='samtools fasta index of the reference')
    mm_wf_parser.add_argument(
        'bam_list', help='list of bam format alignement file(s) path ')
    mm_wf_parser.add_argument(
        'features_annotation',
        help='features annotation file in tabular format')
    mm_wf_parser.add_argument(
        'annotation_description',
        help='annotation description file in tabular format')
    mm_wf_parser.add_argument('output_dir',
                              help='directory to write output files')
    mm_wf_input_argument = mm_wf_parser.add_argument_group(
        'optional input arguments')
    mm_wf_input_argument.add_argument('-x',
                                      '--extension',
                                      help='bam file prefix',
                                      default='bam')
    mm_wf_input_argument.add_argument('-fx',
                                      '--faidx_extension',
                                      help='faidx file prefix',
                                      default='fasta.fai')
    mm_wf_input_argument.add_argument(
        '-t',
        '--threads',
        help='threads number for "samtools view"',
        default='2')
    mm_wf_input_argument.add_argument(
        '-Q',
        '--mapQ',
        help='only include reads with mapping quality >= INT [10]',
        default='10')
    mm_wf_input_argument.add_argument(
        '-i',
        '--id_cutoff',
        help='only include reads with identity >= INT [0]',
        default=0)
    mm_wf_output_argument = mm_wf_parser.add_argument_group(
        'optional output arguments')
    mm_wf_output_argument.add_argument(
        '-f',
        '--feature_normalisation',
        help="get the number of features per X reads [Default: 1000000]",
        default=1000000,
        type=int)
    mm_wf_output_argument.add_argument(
        '-g',
        '--discard_gene_length_normalisation',
        help=
        "discard gene length normalisation for base count abundance output",
        action='store_true')
    mm_wf_output_argument.add_argument(
        '--removed',
        help=
        "removed features who do not appears in samples (sum of abundance through sample = 0)",
        action='store_true')
    mm_wf_parser.add_argument('--silent',
                              help='suppress output of logger',
                              action='store_true')
    mm_wf_parser.add_argument('--force_overwrite',
                              help='force overwriting of output directory',
                              action="store_true",
                              default=False)
    mm_wf_parser.add_argument('--version',
                              help='print version and exit',
                              action='version',
                              version='bamtk ' + version())

    # get and check options
    args = None
    if (len(sys.argv) == 1 or sys.argv[1] == '-h' or sys.argv == '--help'):
        print_help()
        sys.exit(0)
    else:
        args = parser.parse_args()

    try:
        logger_setup(args.output_dir, "bamtk.log", "bamtk", version(),
                     args.silent)
    except:
        logger_setup(None, "bamtk.log", "bamtk", version(), args.silent)

    try:
        parser = OptionsParser()
        if (False):
            import cProfile
            cProfile.run('parser.parse_options(args)')
        else:
            parser.parse_options(args)
    except SystemExit:
        print('Unrecoverable error.')
    except:
        print("\nUnexpected error:", sys.exc_info()[0])
        raise
Example #4
0
def main():

    parser = argparse.ArgumentParser(
        description="This script allow the construction of abundance" +
        "matrix from a list of bam file.",
        epilog="Written by Corentin Hochart ([email protected]), " +
        "UMR CNRSS 6023 Laboratoire Genome et Environement (LMGE). " +
        "Released under the terms of the GNU General Public License v3. " +
        "MAMa version %s." % version())

    parser.add_argument('faidx', help='samtools fasta index of the reference')
    parser.add_argument('bam_list',
                        help='list of bam format alignement file(s) path ')

    input_argument = parser.add_argument_group('optional input arguments')
    input_argument.add_argument('-x',
                                '--extension',
                                help='bam file prefix',
                                default='bam')
    input_argument.add_argument('-t',
                                '--threads',
                                help='threads number for "samtools view"',
                                default='2')
    input_argument.add_argument(
        '-Q',
        '--mapQ',
        help='only include reads with mapping quality >= INT [10]',
        default='10')
    input_argument.add_argument(
        '-i',
        '--id_cutoff',
        help='only include reads with identity >= INT [0]',
        default=0)

    output_argument = parser.add_argument_group('optional output arguments')
    output_argument.add_argument('-a',
                                 '--abundance',
                                 help="reads count abundance output")
    output_argument.add_argument(
        '-n',
        '--normalised',
        help=
        "reads count normalised abundance output (feature per X reads ; see '-f' argument)"
    )
    output_argument.add_argument('-r',
                                 '--relative',
                                 help="reads count relative abundance output")
    output_argument.add_argument('-ba',
                                 '--base_abundance',
                                 help="base count abundance output")
    output_argument.add_argument(
        '-bn',
        '--base_normalised',
        help=
        "base count normalised abundance output (feature per X reads ; see '-f' argument)"
    )
    output_argument.add_argument('-br',
                                 '--base_relative',
                                 help="base count relative abundance output")
    output_argument.add_argument(
        '-f',
        '--feature_normalisation',
        help="get the numer of features per X reads [Default: 1000000]",
        default=1000000,
        type=int)
    output_argument.add_argument(
        '-g',
        '--discard_gene_length_normalisation',
        help=
        "discard gene length normalisation for base count abundance output",
        action='store_true')
    output_argument.add_argument(
        '--removed',
        help=
        "removed features who do not appears in samples (sum of abundance through sample = 0)",
        action='store_true')

    parser.add_argument('--silent',
                        help='suppress output of logger',
                        action='store_true')
    parser.add_argument('--version',
                        help='print version and exit',
                        action='version',
                        version='MAMa ' + version())

    args = parser.parse_args()

    try:
        logger_setup('log', "MAMa.log", "MAMa", version(), args.silent)
    except:
        logger_setup(None, "MAMa.log", "MAMa", version(), args.silent)

    if not args.abundance and not args.normalised and not args.relative:
        parser.error(
            '''At least one output file name must be specified with '--relative' and/or '--normalised' and/or '--abundance'.'''
        )

    matrix_maker(args.faidx, args.bam_list, args.extension, args.threads,
                 args.mapQ, args.id_cutoff, args.abundance, args.normalised,
                 args.relative, args.base_abundance, args.base_normalised,
                 args.base_relative, args.feature_normalisation,
                 args.discard_gene_length_normalisation, args.removed)
Example #5
0
    unroot_tree_parser.add_argument('input_tree', help='')
    unroot_tree_parser.add_argument('output_tree', help='')
    unroot_tree_parser.add_argument('--silent',
                                    help="suppress output",
                                    action='store_true')

    # get and check options
    args = None
    if (len(sys.argv) == 1 or sys.argv[1] == '-h' or sys.argv == '--help'):
        print_help()
        sys.exit(0)
    else:
        args = parser.parse_args()

    try:
        logger_setup(args.output_dir, 'gtdbtk_toolset.log',
                     'GTDB Tk converter', version(), args.silent)
    except:
        logger_setup(None, 'gtdbtk_toolset.log', 'GTDB Tk converter',
                     __version__, args.silent)

    # do what we came here to do
    try:
        parser = OptionsParser()
        if False:
            # import pstats
            # p = pstats.Stats('prof')
            # p.sort_stats('cumulative').print_stats(10)
            # p.sort_stats('time').print_stats(10)
            import cProfile

            cProfile.run('parser.parse_options(args)', 'prof')
Example #6
0
        type=float,
        default=0.25,
        help=
        'minimum percentage of the same amino acid required to retain column')
    parser.add_argument(
        '--max_consensus',
        type=float,
        default=0.95,
        help=
        'maximum percentage of the same amino acid required to retain column')
    parser.add_argument(
        '--min_perc_taxa',
        type=float,
        default=0.50,
        help='minimum percentage of taxa required to retain column')
    parser.add_argument('--out_dir', help='output directory')

    args = parser.parse_args()

    logger_setup(args.out_dir, "trim_msa.log", "trim_msa", __version__, False)

    try:
        p = TrimMSA(args.cols_per_gene, args.min_perc_aa, args.min_consensus,
                    args.max_consensus, args.min_perc_taxa, args.out_dir)
        p.run(args.msa, args.marker_list)
    except SystemExit:
        print "\nControlled exit resulting from an unrecoverable error or warning."
    except:
        print "\nUnexpected error:", sys.exc_info()[0]
        raise