def check_dependencies(): try: # sb.check_call(["samtools", "tview"], stdout=sb.DEVNULL, stderr=sb.DEVNULL) sb.check_call(["bzip2", "--help"], stdout=sb.DEVNULL, stderr=sb.DEVNULL) except Exception as e: error('Program not installed or not present in the system path\n'+str(e), init_new_line=True, exit=True)
def extract_markers(database, clade, output_dir): info('\tGenerating DB markers FASTA...', init_new_line=True) fasta_markers = generate_markers_fasta(database, output_dir) info('\tDone.', init_new_line=True) info('\tLoading MetaPhlan ' + __version__ + ' database...', init_new_line=True) db = pickle.load(bz2.BZ2File(database)) info('\tDone.', init_new_line=True) markers = set([]) for marker in db['markers']: species = db['markers'][marker]['clade'] if clade == species: markers.add(marker) if len(markers) == 0: error("No markers were found for the clade \"" + clade + "\" in the database", exit=True, init_new_line=True) info('\tNumber of markers for the clade \"' + clade + "\": " + str(len(markers)), init_new_line=True) output_file = output_dir + clade + ".fna" info('\tExporting markers...', init_new_line=True) with open(output_file, 'w') as ofile: for rec in SeqIO.parse(open(fasta_markers, 'r'), 'fasta'): if rec.name in markers: SeqIO.write(rec, ofile, 'fasta') info('\tDone.', init_new_line=True) os.remove(fasta_markers) return output_file
def check_dependencies(): try: sb.check_call("bowtie2-inspect", stdout=sb.DEVNULL, stderr=sb.DEVNULL) except Exception as e: error('Program not installed or not present in the system path\n' + str(e), init_new_line=True, exit=True)
def check_input_files(input, input_format): for s in input: _, extension = os.path.splitext(s) if not os.path.exists(s): error('The input file \"'+s+'\" does not exist', exit=True, init_new_line=True) elif not input_format.lower() == extension[1:].lower(): error('The the input file \"'+s+'\" must be in \"'+ input_format.upper()+'\" format', exit=True, init_new_line=True) return True
def samples_to_markers(input, sorted, input_format, output_dir, breath_threshold, nprocs): tmp_dir = output_dir+'tmp/' try: os.mkdir(tmp_dir) except Exception as e: error('Folder \"'+tmp_dir+'\" already exists!\n'+str(e), exit=True, init_new_line=True) input = convert_inputs(input, sorted, input_format, tmp_dir, nprocs) execute_cmseq(input, output_dir, breath_threshold, nprocs) shutil.rmtree(tmp_dir, ignore_errors=False, onerror=None)
def execute_pool(args, nprocs): terminating = Event() with Pool(initializer=init_terminating, initargs=(terminating, ), processes=nprocs) as pool: try: return [ _ for _ in pool.imap_unordered( parallel_execution, args, chunksize=CHUNKSIZE) ] except Exception as e: error('Parallel execution fails: ' + str(e), init_new_line=True, exit=True)
def execute(cmd): inp_f = None out_f = sb.DEVNULL if cmd['stdin']: inp_f = open(cmd['stdin'], 'r') if cmd['stdout']: out_f = open(cmd['stdout'], 'w') exec_res = sb.run(cmd['command_line'], stdin=inp_f, stdout=out_f) if exec_res.returncode == 1: error("An error was ocurred executing a external tool, exiting...", init_new_line=True, exit=True) if cmd['stdin']: inp_f.close() if cmd['stdout']: out_f.close()
def decompress_from_bz2(input, tmp_dir, nprocs): decompressed = [] decompressed_format = [] results = execute_pool(((decompress_bz2_file, i, tmp_dir) for i in input), nprocs) for r in results: decompressed.append(r[0]) decompressed_format.append(r[1]) if decompressed_format[1:] == decompressed_format[:-1]: if decompressed_format[0][1:].lower() == "sam": return decompressed, "sam" elif decompressed_format[0][1:].lower() == "bam": return decompressed, "bam" else: error("Decompressed files are not in SAM or BAM format", exit=True, init_new_line=True) else: error("Decompressed files have different formats", exit=True, init_new_line=True)
def check_params(args): if not args.tree and not args.dist: error('-t (or --tree) must be specified', exit=True, init_new_line=True) if not args.output_dir: error('-o (or --output_dir) must be specified', exit=True, init_new_line=True) elif not os.path.exists(args.output_dir): error('The directory {} does not exist'.format(args.output_dir), exit=True, init_new_line=True) if not args.metadata: error('-m (or --metadata) must be specified', exit=True, init_new_line=True)
def check_params(args): if not args.input: error('-i (or --input) must be specified', exit=True, init_new_line=True) elif not args.input_format: error('-f (or --input_format) must be specified', exit=True, init_new_line=True) elif not args.output_dir: error('-o (or --output_dir) must be specified', exit=True, init_new_line=True) elif args.input_format.lower() != "bam" and args.input_format.lower() != "sam" and args.input_format.lower() != "bz2": error('The input format must be SAM, BAM, or compressed in BZ2 format', exit=True, init_new_line=True) else: check_input_files(args.input, args.input_format) if not args.output_dir.endswith('/'): args.output_dir += '/' return args
def check_params(args): if not args.clade: error('-c (or --clade) must be specified', exit=True, init_new_line=True) elif not args.output_dir: error('-o (or --output_dir) must be specified', exit=True, init_new_line=True) elif not os.path.exists(args.output_dir): error('The directory {} does not exist'.format(args.output_dir), exit=True, init_new_line=True) elif not os.path.exists(args.database): error('The database does not exist', exit=True, init_new_line=True) if not args.output_dir.endswith('/'): args.output_dir += '/' return args
def check_params(args): if not args.tree and not args.dist: error('-t (or --tree) must be specified', exit=True, init_new_line=True) if not args.output_dir: error('-o (or --output_dir) must be specified', exit=True, init_new_line=True) if not args.metadata: error('-m (or --metadata) must be specified', exit=True, init_new_line=True)
def compose_command(params, check=False, input_file=None, database=None, output_path=None, output_file=None, nproc=1): program_name = None stdin = None stdout = None environment = os.environ.copy() r_output_path = None r_output_file = None command_line = params['command_line'] if 'program_name' in list(params): command_line = command_line.replace('#program_name#', params['program_name']) program_name = params['program_name'] else: error('Error: something wrong... ' + program_name + ' not found!', exit=True) if check: command_line = program_name if 'version' in list(params): command_line = '{} {}'.format(program_name, params['version']) else: if 'params' in list(params): command_line = command_line.replace('#params#', params['params']) if 'threads' in list(params): command_line = command_line.replace( '#threads#', '{} {}'.format(params['threads'], nproc)) if output_path: r_output_path = output_path if 'output_path' in list(params): command_line = command_line.replace( '#output_path#', '{} {}'.format(params['output_path'], output_path)) else: output_file = os.path.join(output_path, output_file) if input_file: inp = input_file if 'input' in list(params): inp = '{} {}'.format(params['input'], input_file) if '<' in command_line: command_line = command_line.replace('<', '') command_line = command_line.replace('#input#', '') stdin = inp else: command_line = command_line.replace('#input#', inp) if database and ('database' in list(params)): command_line = command_line.replace( '#database#', '{} {}'.format(params['database'], database)) if output_file: out = output_file r_output_file = output_file if 'output' in list(params): out = '{} {}'.format(params['output'], output_file) if '>' in command_line: command_line = command_line.replace('>', '') command_line = command_line.replace('#output#', '') stdout = out else: command_line = command_line.replace('#output#', out) if 'environment' in list(params): new_environment = dict([ (var.strip(), val.strip()) for var, val in [ a.strip().split('=') for a in params['environment'].split(',') ] ]) environment.update(new_environment) # find string sourrunded with " and make them as one string quotes = [j for j, e in enumerate(command_line) if e == '"'] for s, e in zip(quotes[0::2], quotes[1::2]): command_line = command_line.replace( command_line[s + 1:e], command_line[s + 1:e].replace(' ', '#')) return { 'command_line': [ str(a).replace('#', ' ') for a in re.sub( ' +', ' ', command_line.replace('"', '')).split(' ') if a ], 'stdin': stdin, 'stdout': stdout, 'env': environment, 'output_path': r_output_path, 'output_file': r_output_file }
'Duy Tin Truong ([email protected]), ' 'Francesco Asnicar ([email protected]), ' 'Moreno Zolfo ([email protected]), ' 'Francesco Beghini ([email protected])') __version__ = '3.0.11' __date__ = '07 Jul 2021' import sys try: from .util_fun import info, error except ImportError: from util_fun import info, error if sys.version_info[0] < 3: error("StrainPhlAn " + __version__ + " requires Python 3, your current Python version is {}.{}.{}".format( sys.version_info[0], sys.version_info[1], sys.version_info[2]), exit=True) import pickle, bz2, os, time import subprocess as sb import argparse as ap from Bio import SeqIO, Seq, SeqRecord try: from .external_exec import generate_markers_fasta except ImportError: from external_exec import generate_markers_fasta # get the directory that contains this script metaphlan_script_install_folder = os.path.dirname(os.path.abspath(__file__)) DEFAULT_DB_FOLDER = os.path.join(metaphlan_script_install_folder, "../metaphlan_databases")