コード例 #1
0
ファイル: graphsample.py プロジェクト: VDBWRAIR/ngs_mapper
import sys
import os
from os.path import *
import subprocess
import argparse

import bqd, graph_qualdepth as qd
import samtools
from bam_to_qualdepth import set_unmapped_mapped_reads
import json
import log

logc = log.get_config( 'graphsample.log' )
logger = log.setup_logger( 'graphsample', logc )

def main():
    args = parse_args()
    args = handle_args( args )
    if not args.qualdepth:
        jfile = make_json( args.bamfile, args.outpath )
    else:
        jfile = args.qualdepth
    pngfile = make_image( jfile, args.outpath )

def make_json( bamfile, outpathprefix ):
    pileup = samtools.nogap_mpileup(bamfile)
    stats = bqd.parse_pileup( pileup )
    set_unmapped_mapped_reads( bamfile, stats )
    outfile = outpathprefix + '.qualdepth.json'
    with open( outfile, 'w' ) as fh:
        json.dump( stats, fh )
コード例 #2
0
ファイル: data.py プロジェクト: pirekupcode/ngs_mapper
from glob import glob
from os.path import *
import os
import sys
import re
import log
from Bio import SeqIO
import gzip

logger = log.setup_logger(__name__, log.get_config())

ROCHE_FILE = '\S+?(?:__[0-9]){0,1}__(?:TI|RL)\d+__\d{4}_\d{2}_\d{2}__\w+.(sff|fastq)'
'''
Matches roche sff or fastq files

sample__region__barcode__year_month_day__type.filetype
'''
ROCHE_ID = '[A-Z0-9]{14}'
'''
Matches Roche accessions which are just 14 Alpha Numeric uppercase characters

@AAAAAAAAAAAAAA
'''
IONTORRENT_FILE = '\S+?__[0-9]__IX\d{3}__\d{4}_\d{2}_\d{2}__\w+.(sff|fastq)'
'''
Matches IonTorrent file names(essentially same as roche)

sample__region__barcode__year__month__day__type.filetype
'''
IONTORRENT_ID = '[A-Z]{5}:[0-9]+:[0-9]+'
'''
コード例 #3
0
ファイル: bqd.py プロジェクト: VDBWRAIR/ngs_mapper
#!/usr/bin/env python

from subprocess import Popen, PIPE
import sys
import json

from collections import namedtuple
from itertools import izip

from matplotlib.lines import Line2D

import log
import samtools
logger = log.setup_logger(__name__, log.get_config())

# Alias our region strings
G = 'Gap'
N = 'Normal'
LC = 'LowCoverage'
LQ = 'LowQuality'
LCQ = 'LowCovQual'

# As a list
REGIONTYPES = [
    G, N, LC, LQ, LCQ
]

def parse_pileup( pileup ):
    '''
    Parses the raw pileup output from samtools mpileup and returns a dictionary
    with stats for every reference in the pileup
コード例 #4
0
ファイル: tagreads.py プロジェクト: pirekupcode/ngs_mapper
import argparse
import sys
import samtools
import re
import shutil
import os.path
from ngs_mapper.bam import sortbam, indexbam

import log
logger = log.setup_logger('tagreads', log.get_config())


# Exception for when headers exist
class HeaderExists(Exception):
    pass


# The next 3 tuples have to be the same length and each index in each is related the same index in each tuple
# AKA zip( IDS, PLATFORMS, ID_MAP ) should work as expected
# Read group ID list
IDS = ('Roche454', 'IonTorrent', 'MiSeq', 'Sanger')
# Valid platforms for read groups
PLATFORMS = ('L454', 'IONTORRENT', 'ILLUMINA', 'CAPILLARY')
# Read name map to ID name
ID_MAP = (re.compile('[0-9A-Z]{14}'), re.compile('[A-Z0-9]{5}:\d{1,}:\d{1,}'),
          re.compile('M[0-9]{5}:\d+:[\w\d-]+:\d:\d{4}:\d{4,5}:\d{4,5}'),
          re.compile('.*'))
# Read Group Template
RG_TEMPLATE = {'SM': None, 'ID': None, 'PL': None, 'CN': None}

コード例 #5
0
ファイル: runsample.py プロジェクト: gitter-badger/ngs_mapper
def main():
    args, qsubargs = parse_args()
    # Qsub job?
    if qsubargs:
        runsampleargs, _ = split_args(' '.join(sys.argv[1:]))
        print pbs_job(runsampleargs, qsubargs)
        sys.exit(1)
    # So we can set the global logger
    global logger
    # Setup analysis directory
    if os.path.isdir(args.outdir):
        if os.listdir(args.outdir):
            raise AlreadyExists("{0} already exists and is not empty".format(
                args.outdir))
    else:
        os.makedirs(args.outdir)

    # tempdir root will be TMPDIR environ variable if it exists
    # unless outdir is set
    # allows user to specify TMPDIR somewhere else if they want such as
    # /dev/shm
    tmpdir = args.outdir
    # Directory analysis is run in will be inside of tmpdir
    tdir = tempfile.mkdtemp('runsample', args.prefix, dir=tmpdir)
    os.environ['TMPDIR'] = tdir

    bamfile = os.path.join(tdir, args.prefix + '.bam')
    flagstats = os.path.join(tdir, 'flagstats.txt')
    consensus = bamfile + '.consensus.fasta'
    vcf = bamfile + '.vcf'
    bwalog = os.path.join(tdir, 'bwa.log')
    stdlog = os.path.join(tdir, args.prefix + '.std.log')
    logfile = os.path.join(tdir, args.prefix + '.log')
    CN = args.CN

    # Set the global logger
    config = log.get_config(logfile)
    logger = log.setup_logger('runsample', config)

    #make_project_repo( tdir )

    logger.info("--- Starting {0} --- ".format(args.prefix))
    if args.config:
        logger.info("--- Using custom config from {0} ---".format(args.config))
    # Write all stdout/stderr to a logfile from the various commands
    with open(stdlog, 'wb') as lfile:
        cmd_args = {
            'samplename':
            args.prefix,
            'tdir':
            tdir,
            'readsdir':
            args.readsdir,
            'reference':
            os.path.join(tdir, os.path.basename(args.reference)),
            'bamfile':
            bamfile,
            'flagstats':
            flagstats,
            'consensus':
            consensus,
            'vcf':
            vcf,
            'CN':
            CN,
            'trim_qual':
            args.trim_qual,
            'trim_outdir':
            os.path.join(tdir, 'trimmed_reads'),
            'filtered_dir':
            os.path.join(tdir, 'filtered'),
            'head_crop':
            args.head_crop,
            'minth':
            args.minth,
            'config':
            args.config,
            'platforms':
            args.platforms,
            'drop_ns':
            args.drop_ns,
            'index_min':
            args.index_min,
            'primer_info': (args.primer_file, args.primer_seed,
                            args.palindrom_clip, args.simple_clip)
        }

        # Best not to run across multiple cpu/core/threads on any of the pipeline steps
        # as multiple samples may be running concurrently already

        logger.debug("Copying reference file {0} to {1}".format(
            args.reference, cmd_args['reference']))
        shutil.copy(args.reference, cmd_args['reference'])

        # Return code list
        rets = []
        logger.debug(cmd_args)

        #Filter
        def select_keys(d, keys):
            return dict(((k, v) for k, v in d.items() if k in keys))

        #convert sffs to fastq

        print sh.convert_formats(cmd_args['readsdir'],
                                 _out=sys.stdout,
                                 _err=sys.stderr)
        #print sh.sff_to_fastq(cmd_args['readsdir'], _out=sys.stdout, _err=sys.stderr)
        try:
            if cmd_args['config']:
                __result = sh.ngs_filter(cmd_args['readsdir'],
                                         config=cmd_args['config'],
                                         outdir=cmd_args['filtered_dir'])
            else:
                filter_args = select_keys(
                    cmd_args, ["drop_ns", "platforms", "index_min"])
                __result = sh.ngs_filter(cmd_args['readsdir'],
                                         outdir=cmd_args['filtered_dir'],
                                         **filter_args)
            logger.debug('ngs_filter: %s' % __result)
        except sh.ErrorReturnCode, e:
            logger.error(e.stderr)
            sys.exit(1)

        #Trim reads
        cmd = 'trim_reads {filtered_dir} -q {trim_qual} -o {trim_outdir} --head-crop {head_crop}'
        if cmd_args['config']:
            cmd += ' -c {config}'
        primer_info = cmd_args['primer_info']
        if primer_info[0]:
            cmd += " --primer-file %s --primer-seed %s --palindrome-clip %s --simple-clip %s " % primer_info
        p = run_cmd(cmd.format(**cmd_args),
                    stdout=lfile,
                    stderr=subprocess.STDOUT)
        rets.append(p.wait())
        if rets[-1] != 0:
            logger.critical("{0} did not exit sucessfully".format(
                cmd.format(**cmd_args)))

        # Filter on index quality and Ns

        # Mapping
        with open(bwalog, 'wb') as blog:
            cmd = 'run_bwa_on_samplename {trim_outdir} {reference} -o {bamfile}'
            if cmd_args['config']:
                cmd += ' -c {config}'
            p = run_cmd(cmd.format(**cmd_args),
                        stdout=blog,
                        stderr=subprocess.STDOUT)
            # Wait for the sample to map
            rets.append(p.wait())
            # Everything else is dependant on bwa finishing so might as well die here
            if rets[-1] != 0:
                cmd = cmd.format(**cmd_args)
                logger.critical(
                    "{0} failed to complete sucessfully. Please check the log file {1} for more details"
                    .format(cmd, bwalog))
                sys.exit(1)

        # Tag Reads
        cmd = 'tagreads {bamfile} -CN {CN}'
        if cmd_args['config']:
            cmd += ' -c {config}'
        p = run_cmd(cmd.format(**cmd_args),
                    stdout=lfile,
                    stderr=subprocess.STDOUT)
        r = p.wait()
        if r != 0:
            logger.critical("{0} did not exit sucessfully".format(
                cmd.format(**cmd_args)))
        rets.append(r)

        # Variant Calling
        cmd = 'base_caller {bamfile} {reference} {vcf} -minth {minth}'
        if cmd_args['config']:
            cmd += ' -c {config}'
        p = run_cmd(cmd.format(**cmd_args),
                    stdout=lfile,
                    stderr=subprocess.STDOUT)
        r = p.wait()
        if r != 0:
            logger.critical("{0} did not exit sucessfully".format(
                cmd.format(**cmd_args)))
        rets.append(r)
        if rets[-1] != 0:
            cmd = cmd.format(**cmd_args)
            logger.critical('{0} failed to complete successfully'.format(
                cmd.format(**cmd_args)))

        # Flagstats
        with open(flagstats, 'wb') as flagstats:
            cmd = 'samtools flagstat {bamfile}'
            p = run_cmd(cmd.format(**cmd_args),
                        stdout=flagstats,
                        stderr=lfile,
                        script_dir='')
            r = p.wait()
            if r != 0:
                logger.critical("{0} did not exit sucessfully".format(
                    cmd.format(**cmd_args)))
            rets.append(r)

        # Graphics
        cmd = 'graphsample {bamfile} -od {tdir}'
        p = run_cmd(cmd.format(**cmd_args),
                    stdout=lfile,
                    stderr=subprocess.STDOUT)
        r = p.wait()
        if r != 0:
            logger.critical("{0} did not exit sucessfully".format(
                cmd.format(**cmd_args)))
        rets.append(r)

        # Read Graphics
        fastqs = ' '.join(
            glob.glob(os.path.join(cmd_args['trim_outdir'], '*.fastq')))
        cmd = 'fqstats -o {0}.reads.png {1}'.format(
            cmd_args['bamfile'].replace('.bam', ''), fastqs)
        p = run_cmd(cmd, stdout=lfile, stderr=subprocess.STDOUT)
        r = p.wait()
        if r != 0:
            logger.critical("{0} did not exit sucessfully".format(cmd))
        rets.append(r)

        # Consensus
        cmd = 'vcf_consensus {vcf} -i {samplename} -o {consensus}'
        p = run_cmd(cmd.format(**cmd_args),
                    stdout=lfile,
                    stderr=subprocess.STDOUT)
        r = p.wait()
        if r != 0:
            logger.critical("{0} did not exit sucessfully".format(
                cmd.format(**cmd_args)))
        rets.append(r)

        # If sum is > 0 then one of the commands failed
        if sum(rets) != 0:
            logger.critical(
                "!!! There was an error running part of the pipeline !!!")
            logger.critical("Please check the logfile {0}".format(logfile))
            sys.exit(1)
        logger.info("--- Finished {0} ---".format(args.prefix))

        #subprocess.call( 'git add -A', cwd=tdir, shell=True, stdout=lfile, stderr=subprocess.STDOUT )
        #subprocess.call( 'git commit -am \'runsample\'', cwd=tdir, shell=True, stdout=lfile, stderr=subprocess.STDOUT )

        logger.debug("Moving {0} to {1}".format(tdir, args.outdir))
        # Cannot log any more below this line as the log file will be moved in the following code
        if not os.path.isdir(args.outdir):
            shutil.move(tdir, args.outdir)
        else:
            file_list = [os.path.join(tdir, m) for m in os.listdir(tdir)]
            for f in file_list:
                shutil.move(f, args.outdir)
コード例 #6
0
ファイル: trim_reads.py プロジェクト: pirekupcode/ngs_mapper
"""

import subprocess
import os
import argparse
import sys
from os.path import basename, join, isdir, dirname, expandvars
from glob import glob
import tempfile
import reads
import shlex
import data
from ngs_mapper import compat

import log
lconfig = log.get_config()
logger = log.setup_logger( 'trim_reads', lconfig )

def main():
    args = parse_args()
    trim_reads_in_dir(
        args.readsdir,
        args.q,
        args.outputdir,
        head_crop=args.headcrop,
        platforms=args.platforms,
        primer_info=[args.primer_file, args.primer_seed, args.palindrom_clip, args.simple_clip]
    )

def trim_reads_in_dir( *args, **kwargs ):
    '''
コード例 #7
0
ファイル: sanger_sync.py プロジェクト: pirekupcode/ngs_mapper
    #. Symlink all original .ab1 files into this directory
    #. Convert all .ab1 to .fastq 
#. Parse the sanger filename and create ReadsBySample/samplename directory
#. Symlink all .fastq and .ab1 files for that samplename from ReadData into Samplename directory

"""
import shutil
from os.path import *
import os
from glob import glob
from Bio import SeqIO
import re
import sys

import log
logger = log.setup_logger(basename(__file__), log.get_config())


# For invalid formatted filenames
class InvalidFormat(Exception):
    pass


def sync_sanger(runpath, ngsdata):
    rund = basename(runpath)
    rawd = join(ngsdata, 'RawData', 'Sanger', rund)
    readd = join(ngsdata, 'ReadData', 'Sanger', rund)

    sync_run(runpath, ngsdata)
    sync_readdata(rawd, ngsdata)
    link_reads(readd, ngsdata)
コード例 #8
0
ファイル: runsample.py プロジェクト: gitter-badger/ngs_mapper
def main():
    args,qsubargs = parse_args()
    # Qsub job?
    if qsubargs:
        runsampleargs, _ = split_args(' '.join(sys.argv[1:]))
        print pbs_job(runsampleargs, qsubargs)
        sys.exit(1)
    # So we can set the global logger
    global logger
    # Setup analysis directory
    if os.path.isdir( args.outdir ):
        if os.listdir( args.outdir ):
            raise AlreadyExists( "{0} already exists and is not empty".format(args.outdir) )
    else:
        os.makedirs(args.outdir)

    # tempdir root will be TMPDIR environ variable if it exists
    # unless outdir is set
    # allows user to specify TMPDIR somewhere else if they want such as
    # /dev/shm
    tmpdir = args.outdir
    # Directory analysis is run in will be inside of tmpdir
    tdir = tempfile.mkdtemp('runsample', args.prefix, dir=tmpdir)
    os.environ['TMPDIR'] = tdir

    bamfile = os.path.join( tdir, args.prefix + '.bam' )
    flagstats = os.path.join( tdir, 'flagstats.txt' )
    consensus = bamfile+'.consensus.fasta'
    vcf = bamfile+'.vcf'
    bwalog = os.path.join( tdir, 'bwa.log' )
    stdlog = os.path.join( tdir, args.prefix + '.std.log' )
    logfile = os.path.join( tdir, args.prefix + '.log' )
    CN = args.CN

    # Set the global logger
    config = log.get_config( logfile )
    logger = log.setup_logger( 'runsample', config )

    #make_project_repo( tdir )

    logger.info( "--- Starting {0} --- ".format(args.prefix) )
    if args.config:
        logger.info( "--- Using custom config from {0} ---".format(args.config) )
    # Write all stdout/stderr to a logfile from the various commands
    with open(stdlog,'wb') as lfile:
        cmd_args = {
            'samplename': args.prefix,
            'tdir': tdir,
            'readsdir': args.readsdir,
            'reference': os.path.join(tdir, os.path.basename(args.reference)),
            'bamfile': bamfile,
            'flagstats': flagstats,
            'consensus': consensus,
            'vcf': vcf,
            'CN': CN,
            'trim_qual': args.trim_qual,
            'trim_outdir': os.path.join(tdir,'trimmed_reads'),
            'filtered_dir' : os.path.join(tdir, 'filtered'),
            'head_crop': args.head_crop,
            'minth': args.minth,
            'config': args.config,
            'platforms': args.platforms,
            'drop_ns': args.drop_ns,
            'index_min': args.index_min,
            'primer_info' : (args.primer_file, args.primer_seed, args.palindrom_clip, args.simple_clip)
        }

        # Best not to run across multiple cpu/core/threads on any of the pipeline steps
        # as multiple samples may be running concurrently already

        logger.debug( "Copying reference file {0} to {1}".format(args.reference,cmd_args['reference']) )
        shutil.copy( args.reference, cmd_args['reference'] )

        # Return code list
        rets = []
        logger.debug(cmd_args)
        #Filter
        def select_keys(d, keys):
            return dict( ((k, v) for k, v in d.items() if k in keys))

        #convert sffs to fastq

        print sh.convert_formats(cmd_args['readsdir'], _out=sys.stdout, _err=sys.stderr)
        #print sh.sff_to_fastq(cmd_args['readsdir'], _out=sys.stdout, _err=sys.stderr)
        try:
            if cmd_args['config']:
                __result = sh.ngs_filter(cmd_args['readsdir'], config=cmd_args['config'], outdir=cmd_args['filtered_dir'])
            else:
                filter_args = select_keys(cmd_args, ["drop_ns", "platforms", "index_min"])
                __result = sh.ngs_filter(cmd_args['readsdir'], outdir=cmd_args['filtered_dir'], **filter_args)
            logger.debug( 'ngs_filter: %s' % __result )
        except sh.ErrorReturnCode, e:
                logger.error(e.stderr)
                sys.exit(1)

        #Trim reads
        cmd = 'trim_reads {filtered_dir} -q {trim_qual} -o {trim_outdir} --head-crop {head_crop}'
        if cmd_args['config']:
            cmd += ' -c {config}'
        primer_info = cmd_args['primer_info']
        if primer_info[0]:
            cmd += " --primer-file %s --primer-seed %s --palindrome-clip %s --simple-clip %s " % primer_info
        p = run_cmd( cmd.format(**cmd_args), stdout=lfile, stderr=subprocess.STDOUT )
        rets.append( p.wait() )
        if rets[-1] != 0:
            logger.critical( "{0} did not exit sucessfully".format(cmd.format(**cmd_args)) )

        # Filter on index quality and Ns

        # Mapping
        with open(bwalog, 'wb') as blog:
            cmd = 'run_bwa_on_samplename {trim_outdir} {reference} -o {bamfile}'
            if cmd_args['config']:
                cmd += ' -c {config}'
            p = run_cmd( cmd.format(**cmd_args), stdout=blog, stderr=subprocess.STDOUT )
            # Wait for the sample to map
            rets.append( p.wait() )
            # Everything else is dependant on bwa finishing so might as well die here
            if rets[-1] != 0:
                cmd = cmd.format(**cmd_args)
                logger.critical( "{0} failed to complete sucessfully. Please check the log file {1} for more details".format(cmd,bwalog) )
                sys.exit(1)

        # Tag Reads
        cmd = 'tagreads {bamfile} -CN {CN}'
        if cmd_args['config']:
            cmd += ' -c {config}'
        p = run_cmd( cmd.format(**cmd_args), stdout=lfile, stderr=subprocess.STDOUT )
        r = p.wait()
        if r != 0:
            logger.critical( "{0} did not exit sucessfully".format(cmd.format(**cmd_args)) )
        rets.append( r )

        # Variant Calling
        cmd = 'base_caller {bamfile} {reference} {vcf} -minth {minth}'
        if cmd_args['config']:
            cmd += ' -c {config}'
        p = run_cmd( cmd.format(**cmd_args), stdout=lfile, stderr=subprocess.STDOUT )
        r = p.wait()
        if r != 0:
            logger.critical( "{0} did not exit sucessfully".format(cmd.format(**cmd_args)) )
        rets.append( r )
        if rets[-1] != 0:
            cmd = cmd.format(**cmd_args)
            logger.critical( '{0} failed to complete successfully'.format(cmd.format(**cmd_args)) )

        # Flagstats
        with open(flagstats,'wb') as flagstats:
            cmd = 'samtools flagstat {bamfile}'
            p = run_cmd( cmd.format(**cmd_args), stdout=flagstats, stderr=lfile, script_dir='' )
            r = p.wait()
            if r != 0:
                logger.critical( "{0} did not exit sucessfully".format(cmd.format(**cmd_args)) )
            rets.append( r )

        # Graphics
        cmd = 'graphsample {bamfile} -od {tdir}'
        p = run_cmd( cmd.format(**cmd_args), stdout=lfile, stderr=subprocess.STDOUT )
        r = p.wait()
        if r != 0:
            logger.critical( "{0} did not exit sucessfully".format(cmd.format(**cmd_args)) )
        rets.append( r )

        # Read Graphics
        fastqs = ' '.join( glob.glob( os.path.join( cmd_args['trim_outdir'], '*.fastq' ) ) )
        cmd = 'fqstats -o {0}.reads.png {1}'.format(cmd_args['bamfile'].replace('.bam',''),fastqs)
        p = run_cmd( cmd, stdout=lfile, stderr=subprocess.STDOUT )
        r = p.wait()
        if r != 0:
            logger.critical( "{0} did not exit sucessfully".format(cmd) )
        rets.append( r )

        # Consensus
        cmd = 'vcf_consensus {vcf} -i {samplename} -o {consensus}'
        p = run_cmd( cmd.format(**cmd_args), stdout=lfile, stderr=subprocess.STDOUT )
        r = p.wait()
        if r != 0:
            logger.critical( "{0} did not exit sucessfully".format(cmd.format(**cmd_args)) )
        rets.append( r )

        # If sum is > 0 then one of the commands failed
        if sum(rets) != 0:
            logger.critical( "!!! There was an error running part of the pipeline !!!" )
            logger.critical( "Please check the logfile {0}".format(logfile) )
            sys.exit( 1 )
        logger.info( "--- Finished {0} ---".format(args.prefix) )

        #subprocess.call( 'git add -A', cwd=tdir, shell=True, stdout=lfile, stderr=subprocess.STDOUT )
        #subprocess.call( 'git commit -am \'runsample\'', cwd=tdir, shell=True, stdout=lfile, stderr=subprocess.STDOUT )

        logger.debug( "Moving {0} to {1}".format( tdir, args.outdir ) )
        # Cannot log any more below this line as the log file will be moved in the following code
        if not os.path.isdir( args.outdir ):
            shutil.move( tdir, args.outdir )
        else:
            file_list = [os.path.join(tdir,m) for m in os.listdir(tdir)]
            for f in file_list:
                shutil.move( f, args.outdir )
コード例 #9
0
ファイル: sanger_sync.py プロジェクト: VDBWRAIR/ngs_mapper
    #. Symlink all original .ab1 files into this directory
    #. Convert all .ab1 to .fastq 
#. Parse the sanger filename and create ReadsBySample/samplename directory
#. Symlink all .fastq and .ab1 files for that samplename from ReadData into Samplename directory

"""
import shutil
from os.path import *
import os
from glob import glob
from Bio import SeqIO
import re
import sys

import log
logger = log.setup_logger( basename(__file__), log.get_config() )

# For invalid formatted filenames
class InvalidFormat(Exception): pass

def sync_sanger( runpath, ngsdata ):
    rund = basename( runpath )
    rawd = join( ngsdata, 'RawData', 'Sanger', rund )
    readd = join( ngsdata, 'ReadData', 'Sanger', rund )

    sync_run( runpath, ngsdata )
    sync_readdata( rawd, ngsdata )
    link_reads( readd, ngsdata )

def sync_run( runpath, ngsdata ):
    '''
コード例 #10
0
import sys
import os
from os.path import *
import subprocess
import argparse

import bqd, graph_qualdepth as qd
import samtools
from bam_to_qualdepth import set_unmapped_mapped_reads
import json
import log

logc = log.get_config('graphsample.log')
logger = log.setup_logger('graphsample', logc)


def main():
    args = parse_args()
    args = handle_args(args)
    if not args.qualdepth:
        jfile = make_json(args.bamfile, args.outpath)
    else:
        jfile = args.qualdepth
    pngfile = make_image(jfile, args.outpath)


def make_json(bamfile, outpathprefix):
    pileup = samtools.nogap_mpileup(bamfile)
    stats = bqd.parse_pileup(pileup)
    set_unmapped_mapped_reads(bamfile, stats)
    outfile = outpathprefix + '.qualdepth.json'
コード例 #11
0
ファイル: tagreads.py プロジェクト: VDBWRAIR/ngs_mapper
import argparse
import sys
import samtools
import re
import shutil
import os.path
from ngs_mapper.bam import sortbam, indexbam

import log
logger = log.setup_logger('tagreads',log.get_config())

# Exception for when headers exist
class HeaderExists(Exception): pass

# The next 3 tuples have to be the same length and each index in each is related the same index in each tuple
# AKA zip( IDS, PLATFORMS, ID_MAP ) should work as expected
# Read group ID list
IDS = ('Roche454', 'IonTorrent', 'MiSeq', 'Sanger')
# Valid platforms for read groups
PLATFORMS = ('L454', 'IONTORRENT', 'ILLUMINA', 'CAPILLARY')
# Read name map to ID name
ID_MAP = (
    re.compile( '[0-9A-Z]{14}' ),
    re.compile( '[A-Z0-9]{5}:\d{1,}:\d{1,}' ),
    re.compile( 'M[0-9]{5}:\d+:[\w\d-]+:\d:\d{4}:\d{4,5}:\d{4,5}' ),
    re.compile( '.*' )
)
# Read Group Template
RG_TEMPLATE = {
    'SM': None,
    'ID': None,
コード例 #12
0
from glob import glob
from os.path import *
import numpy as np
import matplotlib.pyplot as plt

from nose.tools import ok_, eq_

from datetime import datetime
import log

logc = log.get_config()
logger = log.setup_logger( 'graph_times', logc )

def main():
    ss = start_stop( 'Projects' )
    logger.info( "Plotting all projects inside of {0}".format('Projects') )
    x,y = [],[]
    samplenames = sorted(ss.keys())
    for sn in samplenames:
        x.append( sn )
        y.append( ss[sn] )
    fig = plt.figure()                                                                                                                                                                                                                                                                
    fig.set_size_inches( 20.0, 8.0 )
    fig.suptitle( 'Pipeline Time per Sample' )
    ax = plt.gca()
    ax.plot( range(len(x)), y )
    ax.set_xlim([0,len(x)-1])
    ax.set_ylim([0,max(y)])
    ax.set_xticks( range(0,len(x)) )
    ax.set_xticklabels( x, rotation='vertical' )
    ax.set_ylabel( 'Seconds' )