Ejemplo n.º 1
0
def check_dependencies():
    try:
        # sb.check_call(["samtools", "tview"], stdout=sb.DEVNULL, stderr=sb.DEVNULL)
        sb.check_call(["bzip2", "--help"], stdout=sb.DEVNULL, stderr=sb.DEVNULL)
    except Exception as e:
        error('Program not installed or not present in the system path\n'+str(e), 
            init_new_line=True, exit=True)
Ejemplo n.º 2
0
def extract_markers(database, clade, output_dir):
    info('\tGenerating DB markers FASTA...', init_new_line=True)
    fasta_markers = generate_markers_fasta(database, output_dir)
    info('\tDone.', init_new_line=True)
    info('\tLoading MetaPhlan ' + __version__ + ' database...',
         init_new_line=True)
    db = pickle.load(bz2.BZ2File(database))
    info('\tDone.', init_new_line=True)
    markers = set([])
    for marker in db['markers']:
        species = db['markers'][marker]['clade']
        if clade == species:
            markers.add(marker)
    if len(markers) == 0:
        error("No markers were found for the clade \"" + clade +
              "\" in the database",
              exit=True,
              init_new_line=True)
    info('\tNumber of markers for the clade \"' + clade + "\": " +
         str(len(markers)),
         init_new_line=True)
    output_file = output_dir + clade + ".fna"
    info('\tExporting markers...', init_new_line=True)
    with open(output_file, 'w') as ofile:
        for rec in SeqIO.parse(open(fasta_markers, 'r'), 'fasta'):
            if rec.name in markers:
                SeqIO.write(rec, ofile, 'fasta')
    info('\tDone.', init_new_line=True)

    os.remove(fasta_markers)
    return output_file
Ejemplo n.º 3
0
def check_dependencies():
    try:
        sb.check_call("bowtie2-inspect", stdout=sb.DEVNULL, stderr=sb.DEVNULL)
    except Exception as e:
        error('Program not installed or not present in the system path\n' +
              str(e),
              init_new_line=True,
              exit=True)
Ejemplo n.º 4
0
def check_input_files(input, input_format):
    for s in input:
        _, extension = os.path.splitext(s)
        if not os.path.exists(s):
            error('The input file \"'+s+'\" does not exist', exit=True, 
                init_new_line=True)
        elif not input_format.lower() == extension[1:].lower():
            error('The the input file \"'+s+'\" must be in \"'+
                input_format.upper()+'\" format',
                exit=True, init_new_line=True)
    return True
Ejemplo n.º 5
0
def samples_to_markers(input, sorted, input_format, output_dir, breath_threshold, nprocs):
    tmp_dir = output_dir+'tmp/'
    try:
        os.mkdir(tmp_dir)
    except Exception as e:
        error('Folder \"'+tmp_dir+'\" already exists!\n'+str(e), exit=True,
            init_new_line=True)
    
    input = convert_inputs(input, sorted, input_format, tmp_dir, nprocs)
    execute_cmseq(input, output_dir, breath_threshold, nprocs)        
    
    shutil.rmtree(tmp_dir, ignore_errors=False, onerror=None)
Ejemplo n.º 6
0
def execute_pool(args, nprocs):
    terminating = Event()
    with Pool(initializer=init_terminating,
              initargs=(terminating, ),
              processes=nprocs) as pool:
        try:
            return [
                _ for _ in pool.imap_unordered(
                    parallel_execution, args, chunksize=CHUNKSIZE)
            ]
        except Exception as e:
            error('Parallel execution fails: ' + str(e),
                  init_new_line=True,
                  exit=True)
Ejemplo n.º 7
0
def execute(cmd):
    inp_f = None
    out_f = sb.DEVNULL

    if cmd['stdin']:
        inp_f = open(cmd['stdin'], 'r')
    if cmd['stdout']:
        out_f = open(cmd['stdout'], 'w')

    exec_res = sb.run(cmd['command_line'], stdin=inp_f, stdout=out_f)
    if exec_res.returncode == 1:
        error("An error was ocurred executing a external tool, exiting...",
              init_new_line=True,
              exit=True)

    if cmd['stdin']:
        inp_f.close()
    if cmd['stdout']:
        out_f.close()
Ejemplo n.º 8
0
def decompress_from_bz2(input, tmp_dir, nprocs):
    decompressed = []
    decompressed_format = []
    results = execute_pool(((decompress_bz2_file, i, tmp_dir) for i in input), 
        nprocs)
    for r in results:
        decompressed.append(r[0])
        decompressed_format.append(r[1])   

    if decompressed_format[1:] == decompressed_format[:-1]:
        if decompressed_format[0][1:].lower() == "sam":
            return decompressed, "sam"
        elif decompressed_format[0][1:].lower() == "bam":
            return decompressed, "bam"
        else:
            error("Decompressed files are not in SAM or BAM format",
                exit=True, init_new_line=True)
    else:
        error("Decompressed files have different formats",
            exit=True, init_new_line=True)
Ejemplo n.º 9
0
def check_params(args):
    if not args.tree and not args.dist:
        error('-t (or --tree) must be specified', exit=True, 
            init_new_line=True)
    if not args.output_dir:
        error('-o (or --output_dir) must be specified', exit=True, 
            init_new_line=True)
    elif not os.path.exists(args.output_dir):
        error('The directory {} does not exist'.format(args.output_dir), exit=True, 
            init_new_line=True)
    if not args.metadata:
        error('-m (or --metadata) must be specified', exit=True, 
            init_new_line=True)
Ejemplo n.º 10
0
def check_params(args):
    if not args.input:
        error('-i (or --input) must be specified', exit=True, 
            init_new_line=True)
    elif not args.input_format:
        error('-f (or --input_format) must be specified', exit=True, 
            init_new_line=True)
    elif not args.output_dir:
        error('-o (or --output_dir) must be specified', exit=True, 
            init_new_line=True)
    elif args.input_format.lower() != "bam" and args.input_format.lower() != "sam" and args.input_format.lower() != "bz2":
        error('The input format must be SAM, BAM, or compressed in BZ2 format', 
            exit=True, init_new_line=True)
    else:
        check_input_files(args.input, args.input_format)
    if not args.output_dir.endswith('/'):
        args.output_dir += '/'    
    return args
Ejemplo n.º 11
0
def check_params(args):
    if not args.clade:
        error('-c (or --clade) must be specified',
              exit=True,
              init_new_line=True)
    elif not args.output_dir:
        error('-o (or --output_dir) must be specified',
              exit=True,
              init_new_line=True)
    elif not os.path.exists(args.output_dir):
        error('The directory {} does not exist'.format(args.output_dir),
              exit=True,
              init_new_line=True)
    elif not os.path.exists(args.database):
        error('The database does not exist', exit=True, init_new_line=True)
    if not args.output_dir.endswith('/'):
        args.output_dir += '/'

    return args
Ejemplo n.º 12
0
def check_params(args):
    if not args.tree and not args.dist:
        error('-t (or --tree) must be specified',
              exit=True,
              init_new_line=True)
    if not args.output_dir:
        error('-o (or --output_dir) must be specified',
              exit=True,
              init_new_line=True)
    if not args.metadata:
        error('-m (or --metadata) must be specified',
              exit=True,
              init_new_line=True)
Ejemplo n.º 13
0
def compose_command(params,
                    check=False,
                    input_file=None,
                    database=None,
                    output_path=None,
                    output_file=None,
                    nproc=1):
    program_name = None
    stdin = None
    stdout = None
    environment = os.environ.copy()
    r_output_path = None
    r_output_file = None
    command_line = params['command_line']

    if 'program_name' in list(params):
        command_line = command_line.replace('#program_name#',
                                            params['program_name'])
        program_name = params['program_name']
    else:
        error('Error: something wrong... ' + program_name + ' not found!',
              exit=True)

    if check:
        command_line = program_name

        if 'version' in list(params):
            command_line = '{} {}'.format(program_name, params['version'])
    else:
        if 'params' in list(params):
            command_line = command_line.replace('#params#', params['params'])

        if 'threads' in list(params):
            command_line = command_line.replace(
                '#threads#', '{} {}'.format(params['threads'], nproc))

        if output_path:
            r_output_path = output_path

            if 'output_path' in list(params):
                command_line = command_line.replace(
                    '#output_path#', '{} {}'.format(params['output_path'],
                                                    output_path))
            else:
                output_file = os.path.join(output_path, output_file)

        if input_file:
            inp = input_file

            if 'input' in list(params):
                inp = '{} {}'.format(params['input'], input_file)

            if '<' in command_line:
                command_line = command_line.replace('<', '')
                command_line = command_line.replace('#input#', '')
                stdin = inp
            else:
                command_line = command_line.replace('#input#', inp)

        if database and ('database' in list(params)):
            command_line = command_line.replace(
                '#database#', '{} {}'.format(params['database'], database))

        if output_file:
            out = output_file
            r_output_file = output_file

            if 'output' in list(params):
                out = '{} {}'.format(params['output'], output_file)

            if '>' in command_line:
                command_line = command_line.replace('>', '')
                command_line = command_line.replace('#output#', '')
                stdout = out
            else:
                command_line = command_line.replace('#output#', out)

        if 'environment' in list(params):
            new_environment = dict([
                (var.strip(), val.strip()) for var, val in [
                    a.strip().split('=')
                    for a in params['environment'].split(',')
                ]
            ])
            environment.update(new_environment)

    # find string sourrunded with " and make them as one string
    quotes = [j for j, e in enumerate(command_line) if e == '"']

    for s, e in zip(quotes[0::2], quotes[1::2]):
        command_line = command_line.replace(
            command_line[s + 1:e], command_line[s + 1:e].replace(' ', '#'))

    return {
        'command_line': [
            str(a).replace('#', ' ') for a in re.sub(
                ' +', ' ', command_line.replace('"', '')).split(' ') if a
        ],
        'stdin':
        stdin,
        'stdout':
        stdout,
        'env':
        environment,
        'output_path':
        r_output_path,
        'output_file':
        r_output_file
    }
Ejemplo n.º 14
0
              'Duy Tin Truong ([email protected]), '
              'Francesco Asnicar ([email protected]), '
              'Moreno Zolfo ([email protected]), '
              'Francesco Beghini ([email protected])')
__version__ = '3.0.11'
__date__ = '07 Jul 2021'

import sys
try:
    from .util_fun import info, error
except ImportError:
    from util_fun import info, error

if sys.version_info[0] < 3:
    error("StrainPhlAn " + __version__ +
          " requires Python 3, your current Python version is {}.{}.{}".format(
              sys.version_info[0], sys.version_info[1], sys.version_info[2]),
          exit=True)

import pickle, bz2, os, time
import subprocess as sb
import argparse as ap
from Bio import SeqIO, Seq, SeqRecord
try:
    from .external_exec import generate_markers_fasta
except ImportError:
    from external_exec import generate_markers_fasta

# get the directory that contains this script
metaphlan_script_install_folder = os.path.dirname(os.path.abspath(__file__))
DEFAULT_DB_FOLDER = os.path.join(metaphlan_script_install_folder,
                                 "../metaphlan_databases")