Exemple #1
0
import sys
import os
from os.path import *
import subprocess
import argparse

import bqd, graph_qualdepth as qd
import samtools
from bam_to_qualdepth import set_unmapped_mapped_reads
import json
import log

logc = log.get_config( 'graphsample.log' )
logger = log.setup_logger( 'graphsample', logc )

def main():
    args = parse_args()
    args = handle_args( args )
    if not args.qualdepth:
        jfile = make_json( args.bamfile, args.outpath )
    else:
        jfile = args.qualdepth
    pngfile = make_image( jfile, args.outpath )

def make_json( bamfile, outpathprefix ):
    pileup = samtools.nogap_mpileup(bamfile)
    stats = bqd.parse_pileup( pileup )
    set_unmapped_mapped_reads( bamfile, stats )
    outfile = outpathprefix + '.qualdepth.json'
    with open( outfile, 'w' ) as fh:
        json.dump( stats, fh )
Exemple #2
0
from glob import glob
from os.path import *
import os
import sys
import re
import log
from Bio import SeqIO
import gzip

logger = log.setup_logger(__name__, log.get_config())

ROCHE_FILE = '\S+?(?:__[0-9]){0,1}__(?:TI|RL)\d+__\d{4}_\d{2}_\d{2}__\w+.(sff|fastq)'
'''
Matches roche sff or fastq files

sample__region__barcode__year_month_day__type.filetype
'''
ROCHE_ID = '[A-Z0-9]{14}'
'''
Matches Roche accessions which are just 14 Alpha Numeric uppercase characters

@AAAAAAAAAAAAAA
'''
IONTORRENT_FILE = '\S+?__[0-9]__IX\d{3}__\d{4}_\d{2}_\d{2}__\w+.(sff|fastq)'
'''
Matches IonTorrent file names(essentially same as roche)

sample__region__barcode__year__month__day__type.filetype
'''
IONTORRENT_ID = '[A-Z]{5}:[0-9]+:[0-9]+'
'''
Exemple #3
0
#!/usr/bin/env python

from subprocess import Popen, PIPE
import sys
import json

from collections import namedtuple
from itertools import izip

from matplotlib.lines import Line2D

import log
import samtools
logger = log.setup_logger(__name__, log.get_config())

# Alias our region strings
G = 'Gap'
N = 'Normal'
LC = 'LowCoverage'
LQ = 'LowQuality'
LCQ = 'LowCovQual'

# As a list
REGIONTYPES = [
    G, N, LC, LQ, LCQ
]

def parse_pileup( pileup ):
    '''
    Parses the raw pileup output from samtools mpileup and returns a dictionary
    with stats for every reference in the pileup
Exemple #4
0
import argparse
import sys
import samtools
import re
import shutil
import os.path
from ngs_mapper.bam import sortbam, indexbam

import log
logger = log.setup_logger('tagreads', log.get_config())


# Exception for when headers exist
class HeaderExists(Exception):
    pass


# The next 3 tuples have to be the same length and each index in each is related the same index in each tuple
# AKA zip( IDS, PLATFORMS, ID_MAP ) should work as expected
# Read group ID list
IDS = ('Roche454', 'IonTorrent', 'MiSeq', 'Sanger')
# Valid platforms for read groups
PLATFORMS = ('L454', 'IONTORRENT', 'ILLUMINA', 'CAPILLARY')
# Read name map to ID name
ID_MAP = (re.compile('[0-9A-Z]{14}'), re.compile('[A-Z0-9]{5}:\d{1,}:\d{1,}'),
          re.compile('M[0-9]{5}:\d+:[\w\d-]+:\d:\d{4}:\d{4,5}:\d{4,5}'),
          re.compile('.*'))
# Read Group Template
RG_TEMPLATE = {'SM': None, 'ID': None, 'PL': None, 'CN': None}

def main():
    args, qsubargs = parse_args()
    # Qsub job?
    if qsubargs:
        runsampleargs, _ = split_args(' '.join(sys.argv[1:]))
        print pbs_job(runsampleargs, qsubargs)
        sys.exit(1)
    # So we can set the global logger
    global logger
    # Setup analysis directory
    if os.path.isdir(args.outdir):
        if os.listdir(args.outdir):
            raise AlreadyExists("{0} already exists and is not empty".format(
                args.outdir))
    else:
        os.makedirs(args.outdir)

    # tempdir root will be TMPDIR environ variable if it exists
    # unless outdir is set
    # allows user to specify TMPDIR somewhere else if they want such as
    # /dev/shm
    tmpdir = args.outdir
    # Directory analysis is run in will be inside of tmpdir
    tdir = tempfile.mkdtemp('runsample', args.prefix, dir=tmpdir)
    os.environ['TMPDIR'] = tdir

    bamfile = os.path.join(tdir, args.prefix + '.bam')
    flagstats = os.path.join(tdir, 'flagstats.txt')
    consensus = bamfile + '.consensus.fasta'
    vcf = bamfile + '.vcf'
    bwalog = os.path.join(tdir, 'bwa.log')
    stdlog = os.path.join(tdir, args.prefix + '.std.log')
    logfile = os.path.join(tdir, args.prefix + '.log')
    CN = args.CN

    # Set the global logger
    config = log.get_config(logfile)
    logger = log.setup_logger('runsample', config)

    #make_project_repo( tdir )

    logger.info("--- Starting {0} --- ".format(args.prefix))
    if args.config:
        logger.info("--- Using custom config from {0} ---".format(args.config))
    # Write all stdout/stderr to a logfile from the various commands
    with open(stdlog, 'wb') as lfile:
        cmd_args = {
            'samplename':
            args.prefix,
            'tdir':
            tdir,
            'readsdir':
            args.readsdir,
            'reference':
            os.path.join(tdir, os.path.basename(args.reference)),
            'bamfile':
            bamfile,
            'flagstats':
            flagstats,
            'consensus':
            consensus,
            'vcf':
            vcf,
            'CN':
            CN,
            'trim_qual':
            args.trim_qual,
            'trim_outdir':
            os.path.join(tdir, 'trimmed_reads'),
            'filtered_dir':
            os.path.join(tdir, 'filtered'),
            'head_crop':
            args.head_crop,
            'minth':
            args.minth,
            'config':
            args.config,
            'platforms':
            args.platforms,
            'drop_ns':
            args.drop_ns,
            'index_min':
            args.index_min,
            'primer_info': (args.primer_file, args.primer_seed,
                            args.palindrom_clip, args.simple_clip)
        }

        # Best not to run across multiple cpu/core/threads on any of the pipeline steps
        # as multiple samples may be running concurrently already

        logger.debug("Copying reference file {0} to {1}".format(
            args.reference, cmd_args['reference']))
        shutil.copy(args.reference, cmd_args['reference'])

        # Return code list
        rets = []
        logger.debug(cmd_args)

        #Filter
        def select_keys(d, keys):
            return dict(((k, v) for k, v in d.items() if k in keys))

        #convert sffs to fastq

        print sh.convert_formats(cmd_args['readsdir'],
                                 _out=sys.stdout,
                                 _err=sys.stderr)
        #print sh.sff_to_fastq(cmd_args['readsdir'], _out=sys.stdout, _err=sys.stderr)
        try:
            if cmd_args['config']:
                __result = sh.ngs_filter(cmd_args['readsdir'],
                                         config=cmd_args['config'],
                                         outdir=cmd_args['filtered_dir'])
            else:
                filter_args = select_keys(
                    cmd_args, ["drop_ns", "platforms", "index_min"])
                __result = sh.ngs_filter(cmd_args['readsdir'],
                                         outdir=cmd_args['filtered_dir'],
                                         **filter_args)
            logger.debug('ngs_filter: %s' % __result)
        except sh.ErrorReturnCode, e:
            logger.error(e.stderr)
            sys.exit(1)

        #Trim reads
        cmd = 'trim_reads {filtered_dir} -q {trim_qual} -o {trim_outdir} --head-crop {head_crop}'
        if cmd_args['config']:
            cmd += ' -c {config}'
        primer_info = cmd_args['primer_info']
        if primer_info[0]:
            cmd += " --primer-file %s --primer-seed %s --palindrome-clip %s --simple-clip %s " % primer_info
        p = run_cmd(cmd.format(**cmd_args),
                    stdout=lfile,
                    stderr=subprocess.STDOUT)
        rets.append(p.wait())
        if rets[-1] != 0:
            logger.critical("{0} did not exit sucessfully".format(
                cmd.format(**cmd_args)))

        # Filter on index quality and Ns

        # Mapping
        with open(bwalog, 'wb') as blog:
            cmd = 'run_bwa_on_samplename {trim_outdir} {reference} -o {bamfile}'
            if cmd_args['config']:
                cmd += ' -c {config}'
            p = run_cmd(cmd.format(**cmd_args),
                        stdout=blog,
                        stderr=subprocess.STDOUT)
            # Wait for the sample to map
            rets.append(p.wait())
            # Everything else is dependant on bwa finishing so might as well die here
            if rets[-1] != 0:
                cmd = cmd.format(**cmd_args)
                logger.critical(
                    "{0} failed to complete sucessfully. Please check the log file {1} for more details"
                    .format(cmd, bwalog))
                sys.exit(1)

        # Tag Reads
        cmd = 'tagreads {bamfile} -CN {CN}'
        if cmd_args['config']:
            cmd += ' -c {config}'
        p = run_cmd(cmd.format(**cmd_args),
                    stdout=lfile,
                    stderr=subprocess.STDOUT)
        r = p.wait()
        if r != 0:
            logger.critical("{0} did not exit sucessfully".format(
                cmd.format(**cmd_args)))
        rets.append(r)

        # Variant Calling
        cmd = 'base_caller {bamfile} {reference} {vcf} -minth {minth}'
        if cmd_args['config']:
            cmd += ' -c {config}'
        p = run_cmd(cmd.format(**cmd_args),
                    stdout=lfile,
                    stderr=subprocess.STDOUT)
        r = p.wait()
        if r != 0:
            logger.critical("{0} did not exit sucessfully".format(
                cmd.format(**cmd_args)))
        rets.append(r)
        if rets[-1] != 0:
            cmd = cmd.format(**cmd_args)
            logger.critical('{0} failed to complete successfully'.format(
                cmd.format(**cmd_args)))

        # Flagstats
        with open(flagstats, 'wb') as flagstats:
            cmd = 'samtools flagstat {bamfile}'
            p = run_cmd(cmd.format(**cmd_args),
                        stdout=flagstats,
                        stderr=lfile,
                        script_dir='')
            r = p.wait()
            if r != 0:
                logger.critical("{0} did not exit sucessfully".format(
                    cmd.format(**cmd_args)))
            rets.append(r)

        # Graphics
        cmd = 'graphsample {bamfile} -od {tdir}'
        p = run_cmd(cmd.format(**cmd_args),
                    stdout=lfile,
                    stderr=subprocess.STDOUT)
        r = p.wait()
        if r != 0:
            logger.critical("{0} did not exit sucessfully".format(
                cmd.format(**cmd_args)))
        rets.append(r)

        # Read Graphics
        fastqs = ' '.join(
            glob.glob(os.path.join(cmd_args['trim_outdir'], '*.fastq')))
        cmd = 'fqstats -o {0}.reads.png {1}'.format(
            cmd_args['bamfile'].replace('.bam', ''), fastqs)
        p = run_cmd(cmd, stdout=lfile, stderr=subprocess.STDOUT)
        r = p.wait()
        if r != 0:
            logger.critical("{0} did not exit sucessfully".format(cmd))
        rets.append(r)

        # Consensus
        cmd = 'vcf_consensus {vcf} -i {samplename} -o {consensus}'
        p = run_cmd(cmd.format(**cmd_args),
                    stdout=lfile,
                    stderr=subprocess.STDOUT)
        r = p.wait()
        if r != 0:
            logger.critical("{0} did not exit sucessfully".format(
                cmd.format(**cmd_args)))
        rets.append(r)

        # If sum is > 0 then one of the commands failed
        if sum(rets) != 0:
            logger.critical(
                "!!! There was an error running part of the pipeline !!!")
            logger.critical("Please check the logfile {0}".format(logfile))
            sys.exit(1)
        logger.info("--- Finished {0} ---".format(args.prefix))

        #subprocess.call( 'git add -A', cwd=tdir, shell=True, stdout=lfile, stderr=subprocess.STDOUT )
        #subprocess.call( 'git commit -am \'runsample\'', cwd=tdir, shell=True, stdout=lfile, stderr=subprocess.STDOUT )

        logger.debug("Moving {0} to {1}".format(tdir, args.outdir))
        # Cannot log any more below this line as the log file will be moved in the following code
        if not os.path.isdir(args.outdir):
            shutil.move(tdir, args.outdir)
        else:
            file_list = [os.path.join(tdir, m) for m in os.listdir(tdir)]
            for f in file_list:
                shutil.move(f, args.outdir)
Exemple #6
0
"""

import subprocess
import os
import argparse
import sys
from os.path import basename, join, isdir, dirname, expandvars
from glob import glob
import tempfile
import reads
import shlex
import data
from ngs_mapper import compat

import log
lconfig = log.get_config()
logger = log.setup_logger( 'trim_reads', lconfig )

def main():
    args = parse_args()
    trim_reads_in_dir(
        args.readsdir,
        args.q,
        args.outputdir,
        head_crop=args.headcrop,
        platforms=args.platforms,
        primer_info=[args.primer_file, args.primer_seed, args.palindrom_clip, args.simple_clip]
    )

def trim_reads_in_dir( *args, **kwargs ):
    '''
    #. Symlink all original .ab1 files into this directory
    #. Convert all .ab1 to .fastq 
#. Parse the sanger filename and create ReadsBySample/samplename directory
#. Symlink all .fastq and .ab1 files for that samplename from ReadData into Samplename directory

"""
import shutil
from os.path import *
import os
from glob import glob
from Bio import SeqIO
import re
import sys

import log
logger = log.setup_logger(basename(__file__), log.get_config())


# For invalid formatted filenames
class InvalidFormat(Exception):
    pass


def sync_sanger(runpath, ngsdata):
    rund = basename(runpath)
    rawd = join(ngsdata, 'RawData', 'Sanger', rund)
    readd = join(ngsdata, 'ReadData', 'Sanger', rund)

    sync_run(runpath, ngsdata)
    sync_readdata(rawd, ngsdata)
    link_reads(readd, ngsdata)
def main():
    args,qsubargs = parse_args()
    # Qsub job?
    if qsubargs:
        runsampleargs, _ = split_args(' '.join(sys.argv[1:]))
        print pbs_job(runsampleargs, qsubargs)
        sys.exit(1)
    # So we can set the global logger
    global logger
    # Setup analysis directory
    if os.path.isdir( args.outdir ):
        if os.listdir( args.outdir ):
            raise AlreadyExists( "{0} already exists and is not empty".format(args.outdir) )
    else:
        os.makedirs(args.outdir)

    # tempdir root will be TMPDIR environ variable if it exists
    # unless outdir is set
    # allows user to specify TMPDIR somewhere else if they want such as
    # /dev/shm
    tmpdir = args.outdir
    # Directory analysis is run in will be inside of tmpdir
    tdir = tempfile.mkdtemp('runsample', args.prefix, dir=tmpdir)
    os.environ['TMPDIR'] = tdir

    bamfile = os.path.join( tdir, args.prefix + '.bam' )
    flagstats = os.path.join( tdir, 'flagstats.txt' )
    consensus = bamfile+'.consensus.fasta'
    vcf = bamfile+'.vcf'
    bwalog = os.path.join( tdir, 'bwa.log' )
    stdlog = os.path.join( tdir, args.prefix + '.std.log' )
    logfile = os.path.join( tdir, args.prefix + '.log' )
    CN = args.CN

    # Set the global logger
    config = log.get_config( logfile )
    logger = log.setup_logger( 'runsample', config )

    #make_project_repo( tdir )

    logger.info( "--- Starting {0} --- ".format(args.prefix) )
    if args.config:
        logger.info( "--- Using custom config from {0} ---".format(args.config) )
    # Write all stdout/stderr to a logfile from the various commands
    with open(stdlog,'wb') as lfile:
        cmd_args = {
            'samplename': args.prefix,
            'tdir': tdir,
            'readsdir': args.readsdir,
            'reference': os.path.join(tdir, os.path.basename(args.reference)),
            'bamfile': bamfile,
            'flagstats': flagstats,
            'consensus': consensus,
            'vcf': vcf,
            'CN': CN,
            'trim_qual': args.trim_qual,
            'trim_outdir': os.path.join(tdir,'trimmed_reads'),
            'filtered_dir' : os.path.join(tdir, 'filtered'),
            'head_crop': args.head_crop,
            'minth': args.minth,
            'config': args.config,
            'platforms': args.platforms,
            'drop_ns': args.drop_ns,
            'index_min': args.index_min,
            'primer_info' : (args.primer_file, args.primer_seed, args.palindrom_clip, args.simple_clip)
        }

        # Best not to run across multiple cpu/core/threads on any of the pipeline steps
        # as multiple samples may be running concurrently already

        logger.debug( "Copying reference file {0} to {1}".format(args.reference,cmd_args['reference']) )
        shutil.copy( args.reference, cmd_args['reference'] )

        # Return code list
        rets = []
        logger.debug(cmd_args)
        #Filter
        def select_keys(d, keys):
            return dict( ((k, v) for k, v in d.items() if k in keys))

        #convert sffs to fastq

        print sh.convert_formats(cmd_args['readsdir'], _out=sys.stdout, _err=sys.stderr)
        #print sh.sff_to_fastq(cmd_args['readsdir'], _out=sys.stdout, _err=sys.stderr)
        try:
            if cmd_args['config']:
                __result = sh.ngs_filter(cmd_args['readsdir'], config=cmd_args['config'], outdir=cmd_args['filtered_dir'])
            else:
                filter_args = select_keys(cmd_args, ["drop_ns", "platforms", "index_min"])
                __result = sh.ngs_filter(cmd_args['readsdir'], outdir=cmd_args['filtered_dir'], **filter_args)
            logger.debug( 'ngs_filter: %s' % __result )
        except sh.ErrorReturnCode, e:
                logger.error(e.stderr)
                sys.exit(1)

        #Trim reads
        cmd = 'trim_reads {filtered_dir} -q {trim_qual} -o {trim_outdir} --head-crop {head_crop}'
        if cmd_args['config']:
            cmd += ' -c {config}'
        primer_info = cmd_args['primer_info']
        if primer_info[0]:
            cmd += " --primer-file %s --primer-seed %s --palindrome-clip %s --simple-clip %s " % primer_info
        p = run_cmd( cmd.format(**cmd_args), stdout=lfile, stderr=subprocess.STDOUT )
        rets.append( p.wait() )
        if rets[-1] != 0:
            logger.critical( "{0} did not exit sucessfully".format(cmd.format(**cmd_args)) )

        # Filter on index quality and Ns

        # Mapping
        with open(bwalog, 'wb') as blog:
            cmd = 'run_bwa_on_samplename {trim_outdir} {reference} -o {bamfile}'
            if cmd_args['config']:
                cmd += ' -c {config}'
            p = run_cmd( cmd.format(**cmd_args), stdout=blog, stderr=subprocess.STDOUT )
            # Wait for the sample to map
            rets.append( p.wait() )
            # Everything else is dependant on bwa finishing so might as well die here
            if rets[-1] != 0:
                cmd = cmd.format(**cmd_args)
                logger.critical( "{0} failed to complete sucessfully. Please check the log file {1} for more details".format(cmd,bwalog) )
                sys.exit(1)

        # Tag Reads
        cmd = 'tagreads {bamfile} -CN {CN}'
        if cmd_args['config']:
            cmd += ' -c {config}'
        p = run_cmd( cmd.format(**cmd_args), stdout=lfile, stderr=subprocess.STDOUT )
        r = p.wait()
        if r != 0:
            logger.critical( "{0} did not exit sucessfully".format(cmd.format(**cmd_args)) )
        rets.append( r )

        # Variant Calling
        cmd = 'base_caller {bamfile} {reference} {vcf} -minth {minth}'
        if cmd_args['config']:
            cmd += ' -c {config}'
        p = run_cmd( cmd.format(**cmd_args), stdout=lfile, stderr=subprocess.STDOUT )
        r = p.wait()
        if r != 0:
            logger.critical( "{0} did not exit sucessfully".format(cmd.format(**cmd_args)) )
        rets.append( r )
        if rets[-1] != 0:
            cmd = cmd.format(**cmd_args)
            logger.critical( '{0} failed to complete successfully'.format(cmd.format(**cmd_args)) )

        # Flagstats
        with open(flagstats,'wb') as flagstats:
            cmd = 'samtools flagstat {bamfile}'
            p = run_cmd( cmd.format(**cmd_args), stdout=flagstats, stderr=lfile, script_dir='' )
            r = p.wait()
            if r != 0:
                logger.critical( "{0} did not exit sucessfully".format(cmd.format(**cmd_args)) )
            rets.append( r )

        # Graphics
        cmd = 'graphsample {bamfile} -od {tdir}'
        p = run_cmd( cmd.format(**cmd_args), stdout=lfile, stderr=subprocess.STDOUT )
        r = p.wait()
        if r != 0:
            logger.critical( "{0} did not exit sucessfully".format(cmd.format(**cmd_args)) )
        rets.append( r )

        # Read Graphics
        fastqs = ' '.join( glob.glob( os.path.join( cmd_args['trim_outdir'], '*.fastq' ) ) )
        cmd = 'fqstats -o {0}.reads.png {1}'.format(cmd_args['bamfile'].replace('.bam',''),fastqs)
        p = run_cmd( cmd, stdout=lfile, stderr=subprocess.STDOUT )
        r = p.wait()
        if r != 0:
            logger.critical( "{0} did not exit sucessfully".format(cmd) )
        rets.append( r )

        # Consensus
        cmd = 'vcf_consensus {vcf} -i {samplename} -o {consensus}'
        p = run_cmd( cmd.format(**cmd_args), stdout=lfile, stderr=subprocess.STDOUT )
        r = p.wait()
        if r != 0:
            logger.critical( "{0} did not exit sucessfully".format(cmd.format(**cmd_args)) )
        rets.append( r )

        # If sum is > 0 then one of the commands failed
        if sum(rets) != 0:
            logger.critical( "!!! There was an error running part of the pipeline !!!" )
            logger.critical( "Please check the logfile {0}".format(logfile) )
            sys.exit( 1 )
        logger.info( "--- Finished {0} ---".format(args.prefix) )

        #subprocess.call( 'git add -A', cwd=tdir, shell=True, stdout=lfile, stderr=subprocess.STDOUT )
        #subprocess.call( 'git commit -am \'runsample\'', cwd=tdir, shell=True, stdout=lfile, stderr=subprocess.STDOUT )

        logger.debug( "Moving {0} to {1}".format( tdir, args.outdir ) )
        # Cannot log any more below this line as the log file will be moved in the following code
        if not os.path.isdir( args.outdir ):
            shutil.move( tdir, args.outdir )
        else:
            file_list = [os.path.join(tdir,m) for m in os.listdir(tdir)]
            for f in file_list:
                shutil.move( f, args.outdir )
Exemple #9
0
    #. Symlink all original .ab1 files into this directory
    #. Convert all .ab1 to .fastq 
#. Parse the sanger filename and create ReadsBySample/samplename directory
#. Symlink all .fastq and .ab1 files for that samplename from ReadData into Samplename directory

"""
import shutil
from os.path import *
import os
from glob import glob
from Bio import SeqIO
import re
import sys

import log
logger = log.setup_logger( basename(__file__), log.get_config() )

# For invalid formatted filenames
class InvalidFormat(Exception): pass

def sync_sanger( runpath, ngsdata ):
    rund = basename( runpath )
    rawd = join( ngsdata, 'RawData', 'Sanger', rund )
    readd = join( ngsdata, 'ReadData', 'Sanger', rund )

    sync_run( runpath, ngsdata )
    sync_readdata( rawd, ngsdata )
    link_reads( readd, ngsdata )

def sync_run( runpath, ngsdata ):
    '''
Exemple #10
0
import sys
import os
from os.path import *
import subprocess
import argparse

import bqd, graph_qualdepth as qd
import samtools
from bam_to_qualdepth import set_unmapped_mapped_reads
import json
import log

logc = log.get_config('graphsample.log')
logger = log.setup_logger('graphsample', logc)


def main():
    args = parse_args()
    args = handle_args(args)
    if not args.qualdepth:
        jfile = make_json(args.bamfile, args.outpath)
    else:
        jfile = args.qualdepth
    pngfile = make_image(jfile, args.outpath)


def make_json(bamfile, outpathprefix):
    pileup = samtools.nogap_mpileup(bamfile)
    stats = bqd.parse_pileup(pileup)
    set_unmapped_mapped_reads(bamfile, stats)
    outfile = outpathprefix + '.qualdepth.json'
Exemple #11
0
import argparse
import sys
import samtools
import re
import shutil
import os.path
from ngs_mapper.bam import sortbam, indexbam

import log
logger = log.setup_logger('tagreads',log.get_config())

# Exception for when headers exist
class HeaderExists(Exception): pass

# The next 3 tuples have to be the same length and each index in each is related the same index in each tuple
# AKA zip( IDS, PLATFORMS, ID_MAP ) should work as expected
# Read group ID list
IDS = ('Roche454', 'IonTorrent', 'MiSeq', 'Sanger')
# Valid platforms for read groups
PLATFORMS = ('L454', 'IONTORRENT', 'ILLUMINA', 'CAPILLARY')
# Read name map to ID name
ID_MAP = (
    re.compile( '[0-9A-Z]{14}' ),
    re.compile( '[A-Z0-9]{5}:\d{1,}:\d{1,}' ),
    re.compile( 'M[0-9]{5}:\d+:[\w\d-]+:\d:\d{4}:\d{4,5}:\d{4,5}' ),
    re.compile( '.*' )
)
# Read Group Template
RG_TEMPLATE = {
    'SM': None,
    'ID': None,
Exemple #12
0
from glob import glob
from os.path import *
import numpy as np
import matplotlib.pyplot as plt

from nose.tools import ok_, eq_

from datetime import datetime
import log

logc = log.get_config()
logger = log.setup_logger( 'graph_times', logc )

def main():
    ss = start_stop( 'Projects' )
    logger.info( "Plotting all projects inside of {0}".format('Projects') )
    x,y = [],[]
    samplenames = sorted(ss.keys())
    for sn in samplenames:
        x.append( sn )
        y.append( ss[sn] )
    fig = plt.figure()                                                                                                                                                                                                                                                                
    fig.set_size_inches( 20.0, 8.0 )
    fig.suptitle( 'Pipeline Time per Sample' )
    ax = plt.gca()
    ax.plot( range(len(x)), y )
    ax.set_xlim([0,len(x)-1])
    ax.set_ylim([0,max(y)])
    ax.set_xticks( range(0,len(x)) )
    ax.set_xticklabels( x, rotation='vertical' )
    ax.set_ylabel( 'Seconds' )