def __init__(self, output_dir): """Initialization.""" check_dependencies(['blastn', 'makeblastdb']) if not os.path.exists(output_dir): os.makedirs(output_dir) self.output_dir = output_dir logger_setup(output_dir, "in_silico_probes.log", "in_silico_probes", __version__, False) self.logger = logging.getLogger('timestamp') self.output_fmt = '6 qseqid qlen qseq sseqid slen sseq length mismatch gaps pident bitscore evalue' self.BlastHit = namedtuple('BlastHit', """query_id query_len query_aln_seq subject_id subject_len subject_aln_seq aln_len mismatch gaps perc_identity bitscore evalue""")
def __init__(self, tmp_dir, output_dir, cpus): """Initialization.""" self.tmp_dir = tmp_dir self.output_dir = output_dir self.cpus = cpus check_dependencies(['prodigal', 'hmmsearch', 'pfam_search.pl', 'genometk']) self.tigrfam_hmms = '/srv/whitlam/bio/db/tigrfam/15.0/TIGRFAMs_15.0_HMM/tigrfam.hmm' self.tigrfam_ext = '_tigrfam.tsv' self.pfam_hmm_dir = '/srv/db/pfam/27/' self.pfam_ext = '_pfam.tsv' self.protein_file_ext = '_protein.faa' logger_setup(output_dir, "gtdb_protein_pipeline.log", "gtdb_protein_pipeline", __version__, False) self.logger = logging.getLogger('timestamp')
def main(): # initialize the option parser parser = argparse.ArgumentParser( add_help=False, description= "BAM-Tk is a software toolkit for dealing with Binary Alignment Map (BAM) files.", epilog="Written by Corentin Hochart ([email protected]), " + "UMR CNRSS 6023 Laboratoire Genome et Environement (LMGE), " + "as part of the [ANR Eureka](https://anr.fr/Projet-ANR-14-CE02-0004) project." + "Released under the terms of the GNU General Public License v3. " + "bamtk version %s." % version()) subparsers = parser.add_subparsers(help="--", dest='subparser_name') # pathway reconstruction mm_featuresparser = subparsers.add_parser('mm_features', description='') mm_featuresparser.add_argument( 'faidx', help='samtools fasta index of the reference') mm_featuresparser.add_argument( 'bam_list', help='list of bam format alignement file(s) path') mm_featuresparser.add_argument('output_dir', help='directory to write output files') mm_featuresinput_argument = mm_featuresparser.add_argument_group( 'optional input arguments') mm_featuresinput_argument.add_argument('-x', '--extension', help='bam file prefix', default='bam') mm_featuresinput_argument.add_argument('-fx', '--faidx_extension', help='faidx file prefix', default='fasta.fai') mm_featuresinput_argument.add_argument( '-t', '--threads', help='threads number for "samtools view"', default='2') mm_featuresinput_argument.add_argument( '-Q', '--mapQ', help='only include reads with mapping quality >= INT [10]', default='10') mm_featuresinput_argument.add_argument( '-i', '--id_cutoff', help='only include reads with identity >= INT [0]', default=0) mm_featuresinput_argument.add_argument( '-m', '--merge', help='merge features abundance by field', action='store_true') mm_featuresinput_argument.add_argument( '-s', '--separator', help='filed separator for -m/--merge argument', default='.') mm_featuresinput_argument.add_argument( '-g', '--genome', help='sum abundance of all features', action='store_true') mm_featuresoutput_argument = mm_featuresparser.add_argument_group( 'optional output arguments') mm_featuresoutput_argument.add_argument( '-n', '--feature_normalisation', help="get the number of features per X reads [Default: 1000000]", default=1000000, type=int) mm_featuresoutput_argument.add_argument( '-sn', '--feature_size_normalisation', help="get the number of features per X bases [Default: 1000]", default=1000, type=int) mm_featuresoutput_argument.add_argument( '-f', '--discard_feature_length_normalisation', help= "discard feature length normalisation for base count abundance output", action='store_true') mm_featuresoutput_argument.add_argument( '-l', '--discard_library_size_normalisation', help= "discard library size normalisation for reads and bases count abundance output", action='store_true') mm_featuresoutput_argument.add_argument( '-lsn', '--library_size_normalisation', help= "library size normalisation by total number of reads count or by number of aligned reads ", choices=['total', 'aligned'], default='total') mm_featuresoutput_argument.add_argument( '--removed', help= "removed features who do not appears in samples (sum of abundance through sample = 0)", action='store_true') mm_featuresparser.add_argument('--silent', help='suppress output of logger', action='store_true') mm_featuresparser.add_argument( '--force_overwrite', help='force overwriting of output directory', action="store_true", default=False) mm_featuresparser.add_argument('--version', help='print version and exit', action='version', version='bamtk ' + version()) mm_annotated_features_parser = subparsers.add_parser( 'mm_annotated_features', description='') mm_annotated_features_parser.add_argument( 'features_dir', help='directory specified during features command') mm_annotated_features_parser.add_argument( 'features_annotation', help='features annotation file in tabular format') mm_annotated_features_parser.add_argument( 'annotation_description', help='annotation description file in tabular format') mm_annotated_features_input_argument = mm_annotated_features_parser.add_argument_group( 'optional input arguments') mm_annotated_features_input_argument.add_argument( '--library_size', help= "Tabular file with sample library size to produce normalised count matrix" ) mm_annotated_features_output_argument = mm_annotated_features_parser.add_argument_group( 'optional output arguments') mm_annotated_features_output_argument.add_argument( '-f', '--feature_normalisation', help="get the number of features per X reads [Default: 1000000]", default=1000000, type=int) mm_annotated_features_output_argument.add_argument( '--removed', help= "removed features who do not appears in samples (sum of abundance through sample = 0)", action='store_true') mm_annotated_features_parser.add_argument('--silent', help='suppress output of logger', action='store_true') mm_annotated_features_parser.add_argument( '--force_overwrite', help='force overwriting of output directory', action="store_true", default=False) mm_wf_parser = subparsers.add_parser( 'mm_wf', description='Run features and annotate_features command', epilog= 'bamtk mm_wf ./file.fai ./bam_list.tsv ./features2annotation.tsv ./annotationDescription.tsv ./output' ) mm_wf_parser.add_argument('faidx', help='samtools fasta index of the reference') mm_wf_parser.add_argument( 'bam_list', help='list of bam format alignement file(s) path ') mm_wf_parser.add_argument( 'features_annotation', help='features annotation file in tabular format') mm_wf_parser.add_argument( 'annotation_description', help='annotation description file in tabular format') mm_wf_parser.add_argument('output_dir', help='directory to write output files') mm_wf_input_argument = mm_wf_parser.add_argument_group( 'optional input arguments') mm_wf_input_argument.add_argument('-x', '--extension', help='bam file prefix', default='bam') mm_wf_input_argument.add_argument('-fx', '--faidx_extension', help='faidx file prefix', default='fasta.fai') mm_wf_input_argument.add_argument( '-t', '--threads', help='threads number for "samtools view"', default='2') mm_wf_input_argument.add_argument( '-Q', '--mapQ', help='only include reads with mapping quality >= INT [10]', default='10') mm_wf_input_argument.add_argument( '-i', '--id_cutoff', help='only include reads with identity >= INT [0]', default=0) mm_wf_output_argument = mm_wf_parser.add_argument_group( 'optional output arguments') mm_wf_output_argument.add_argument( '-f', '--feature_normalisation', help="get the number of features per X reads [Default: 1000000]", default=1000000, type=int) mm_wf_output_argument.add_argument( '-g', '--discard_gene_length_normalisation', help= "discard gene length normalisation for base count abundance output", action='store_true') mm_wf_output_argument.add_argument( '--removed', help= "removed features who do not appears in samples (sum of abundance through sample = 0)", action='store_true') mm_wf_parser.add_argument('--silent', help='suppress output of logger', action='store_true') mm_wf_parser.add_argument('--force_overwrite', help='force overwriting of output directory', action="store_true", default=False) mm_wf_parser.add_argument('--version', help='print version and exit', action='version', version='bamtk ' + version()) # get and check options args = None if (len(sys.argv) == 1 or sys.argv[1] == '-h' or sys.argv == '--help'): print_help() sys.exit(0) else: args = parser.parse_args() try: logger_setup(args.output_dir, "bamtk.log", "bamtk", version(), args.silent) except: logger_setup(None, "bamtk.log", "bamtk", version(), args.silent) try: parser = OptionsParser() if (False): import cProfile cProfile.run('parser.parse_options(args)') else: parser.parse_options(args) except SystemExit: print('Unrecoverable error.') except: print("\nUnexpected error:", sys.exc_info()[0]) raise
def main(): parser = argparse.ArgumentParser( description="This script allow the construction of abundance" + "matrix from a list of bam file.", epilog="Written by Corentin Hochart ([email protected]), " + "UMR CNRSS 6023 Laboratoire Genome et Environement (LMGE). " + "Released under the terms of the GNU General Public License v3. " + "MAMa version %s." % version()) parser.add_argument('faidx', help='samtools fasta index of the reference') parser.add_argument('bam_list', help='list of bam format alignement file(s) path ') input_argument = parser.add_argument_group('optional input arguments') input_argument.add_argument('-x', '--extension', help='bam file prefix', default='bam') input_argument.add_argument('-t', '--threads', help='threads number for "samtools view"', default='2') input_argument.add_argument( '-Q', '--mapQ', help='only include reads with mapping quality >= INT [10]', default='10') input_argument.add_argument( '-i', '--id_cutoff', help='only include reads with identity >= INT [0]', default=0) output_argument = parser.add_argument_group('optional output arguments') output_argument.add_argument('-a', '--abundance', help="reads count abundance output") output_argument.add_argument( '-n', '--normalised', help= "reads count normalised abundance output (feature per X reads ; see '-f' argument)" ) output_argument.add_argument('-r', '--relative', help="reads count relative abundance output") output_argument.add_argument('-ba', '--base_abundance', help="base count abundance output") output_argument.add_argument( '-bn', '--base_normalised', help= "base count normalised abundance output (feature per X reads ; see '-f' argument)" ) output_argument.add_argument('-br', '--base_relative', help="base count relative abundance output") output_argument.add_argument( '-f', '--feature_normalisation', help="get the numer of features per X reads [Default: 1000000]", default=1000000, type=int) output_argument.add_argument( '-g', '--discard_gene_length_normalisation', help= "discard gene length normalisation for base count abundance output", action='store_true') output_argument.add_argument( '--removed', help= "removed features who do not appears in samples (sum of abundance through sample = 0)", action='store_true') parser.add_argument('--silent', help='suppress output of logger', action='store_true') parser.add_argument('--version', help='print version and exit', action='version', version='MAMa ' + version()) args = parser.parse_args() try: logger_setup('log', "MAMa.log", "MAMa", version(), args.silent) except: logger_setup(None, "MAMa.log", "MAMa", version(), args.silent) if not args.abundance and not args.normalised and not args.relative: parser.error( '''At least one output file name must be specified with '--relative' and/or '--normalised' and/or '--abundance'.''' ) matrix_maker(args.faidx, args.bam_list, args.extension, args.threads, args.mapQ, args.id_cutoff, args.abundance, args.normalised, args.relative, args.base_abundance, args.base_normalised, args.base_relative, args.feature_normalisation, args.discard_gene_length_normalisation, args.removed)
unroot_tree_parser.add_argument('input_tree', help='') unroot_tree_parser.add_argument('output_tree', help='') unroot_tree_parser.add_argument('--silent', help="suppress output", action='store_true') # get and check options args = None if (len(sys.argv) == 1 or sys.argv[1] == '-h' or sys.argv == '--help'): print_help() sys.exit(0) else: args = parser.parse_args() try: logger_setup(args.output_dir, 'gtdbtk_toolset.log', 'GTDB Tk converter', version(), args.silent) except: logger_setup(None, 'gtdbtk_toolset.log', 'GTDB Tk converter', __version__, args.silent) # do what we came here to do try: parser = OptionsParser() if False: # import pstats # p = pstats.Stats('prof') # p.sort_stats('cumulative').print_stats(10) # p.sort_stats('time').print_stats(10) import cProfile cProfile.run('parser.parse_options(args)', 'prof')
type=float, default=0.25, help= 'minimum percentage of the same amino acid required to retain column') parser.add_argument( '--max_consensus', type=float, default=0.95, help= 'maximum percentage of the same amino acid required to retain column') parser.add_argument( '--min_perc_taxa', type=float, default=0.50, help='minimum percentage of taxa required to retain column') parser.add_argument('--out_dir', help='output directory') args = parser.parse_args() logger_setup(args.out_dir, "trim_msa.log", "trim_msa", __version__, False) try: p = TrimMSA(args.cols_per_gene, args.min_perc_aa, args.min_consensus, args.max_consensus, args.min_perc_taxa, args.out_dir) p.run(args.msa, args.marker_list) except SystemExit: print "\nControlled exit resulting from an unrecoverable error or warning." except: print "\nUnexpected error:", sys.exc_info()[0] raise