Ejemplo n.º 1
0
_group_fn_list = [
    _attr for _attr in dir(_group_fns)
    if (not _attr.startswith('__') and not _attr.endswith('__')
        and callable(getattr(_group_fns, _attr)))
]


@command.add_sub
@argument('-c', '--columns', nargs='+', help='column specs')
@argument('--no-header', '-H', dest='use_header', action='store_false')
@argument('--default-fn',
          '-f',
          default='first',
          choices=_group_fn_list,
          help='default group function')
@argument.exclusive(argument('-i', '--inputs', nargs='+', help='input files'),
                    argument('-l',
                             '--input-list',
                             help='list of input file names'),
                    required=True)
def summarize_row(args):
    """
    -c {col1} {col2} ...
    Availble specifiers :
        col1:col2               ---  concat(col1)
        col1::first
        col1:col1-sum:sum
        col1::concat            ---  concat(col1) as col1
        col1::length            ---  length(col1) as col1
        0                       ---  default_fn(filelist) (1st column) as filelist
        +n:sample               ---  (n+1)-th column of filelist
Ejemplo n.º 2
0
                seq = cigar.hard_clip_seq(seq)
                if qual is not None:
                    qual = cigar.hard_clip_seq(qual)
                rec.query_sequence = seq  # refill
                rec.query_qualities = qual

        out.write(rec)


@command.add_sub
@argument('bam')
@argument('-r', '--region', help='region of target bam file')
@argument('--max-nm', default=4, type=int)
@argument('--max-depth', default=8000, type=int)
@argument.exclusive(
    argument('--summary', action='store_true'),
    argument('--read-count', action='store_true'),
)
def bam_depth_with_nm(args):
    """
    * unmapped is discarded
    * both clipped is discarded
    * end clipped is included
    * multimap is included
    * stratified with NM

    default mode:
        pos is 1-based

    summary mode:
        covered
Ejemplo n.º 3
0
    '-0.301,0,-0.301,0,0,-0.301'
    """
    het_gl = '0'
    hom_gl = '{0:.3f}'.format(-np.log10(2))
    gt_num = (alt_num + 1) * (alt_num + 2) // 2
    gl = [het_gl] * gt_num
    hom_idxs = get_gt_hom_idxs(alt_num)
    for idx in hom_idxs:
        gl[idx] = hom_gl
    return ','.join(gl)


@command.add_sub
@argument('vcf', help='VCF or gzipped VCF')
@argument.exclusive(
    argument('-s', '--samples', help='comma separated sample list'),
    argument('-S', '--samples-file', help='sample list file'),
)
def vcf_reset_gl(args):
    """ whitening format tag for specified samples

    # TODO also reset PL
    """
    vcffile = '/dev/stdin' if args.vcf == '-' else args.vcf
    import pysam
    reader = pysam.VariantFile(vcffile)
    sample_flags = _get_sample_flags(reader, args)
    print(reader.header, end='')
    sample_offset = 9

    # hom: log10 p**2 = 2 log10 p
Ejemplo n.º 4
0
    """
    sam = pysam.Samfile(args.bamfile, 'rb')
    totlen = sum(sam.lengths)  # reference length (contains N)
    cov = 1. * sam.mapped / totlen  # (mapped may contains secondary or supplementary alignments)
    print(cov)


@command.add_sub
@argument('bamfile')
@argument('-r', '--rsample', type=int, default=1000)
@argument('--pacbio',
          action='store_true',
          help='use suplementary alignemnt, use only aligned length')
@argument('--nbed', help='N bed regions')
@argument.exclusive(
    argument('--chroms', nargs='+'),
    argument('--autosomal', action='store_true'),
)
def bam_mean_depth(args):
    """ Roughly estimate bam depth

    - estimate mean read length by scanning top {rsample} reads
    - divide the total length by total reference lengths

    # TODO autosomal option
    """
    sam = pysam.Samfile(args.bamfile, 'rb')
    samm = SamMethods(sam)
    if args.autosomal:
        if 'chr1' in sam.references:
            chroms = ['chr' + str(i + 1) for i in range(22)]
Ejemplo n.º 5
0
def test_subcommand_usage():
    command = Command()
    run_tracks = []

    # create common argument
    common1 = argument()
    common1.add_arg('file', help='this is common option')
    common1.add_arg(argument('--some-flag', action='store_true'))

    # setup subcommand "main1" (basic style)
    sub = command.add_sub()
    sub.add_arg(common1)
    sub.add_arg('-t', '--test', action='store_true', default=False)

    @sub
    def sub1(args):
        """ This line is used as description

        Write details (show in epilog).
        """
        run_tracks.append({
            'file': args.file,
            'test': args.test,
            'verbose': args.verbose,
            'some_flag': args.some_flag,
        })


    # setup subcommand "sub2" (decorator style)

    @command.add_sub
    @argument(common1)
    @argument('-t', '--times', action='count', default=0)
    def sub2(args):
        """ This line is used as description

        Write details here (show in epilog).
        """
        run_tracks.append({
            'file': args.file,
            'times': args.times,
            'verbose': args.verbose,
            'some_flag': args.some_flag,
        })


    # with group, exclusive

    @command.add_sub
    @argument('-a')
    @argument.group(
       'Counting options',
       'Description of counting options',
       argument('-c', '--counts', action='count', default=0),
       argument('-t', '--times', action='count', default=0),
    )
    @argument.exclusive(
       argument('--tsv', action='store_const', const='tsv', dest='filetype'),
       argument('--csv', action='store_const', const='csv', dest='filetype'),
       required=True,
    )
    def sub3(args):
        """ This line is used as description

        Write details here (show in epilog).
        """
        run_tracks.append({
            'a': args.a,
            'counts': args.counts,
            'times': args.times,
            'filetype': args.filetype,
        })


    # root command option
    #command.add_arg('--detail', help='this is root only option')

    # run sub1
    command.run(args=['sub1', 'foo.txt', '-t'])
    eq_(run_tracks.pop(), {'file': 'foo.txt', 'test': True, 'verbose': 0, 'some_flag': False})

    # run sub2
    #command.run(args=['sub2', 'bar.txt', '-t', '--times', '-v'])
    command.run(args=['sub2', 'bar.txt', '-t', '--times', '--some-flag'])
    eq_(run_tracks.pop(), {'file': 'bar.txt', 'times': 2, 'verbose': 0, 'some_flag': True})

    # run sub3
    #command.run(args=['sub2', 'bar.txt', '-t', '--times', '-v'])
    command.run(args=['sub3', '-t', '--times', '-cc', '--counts', '--csv'])
    eq_(run_tracks.pop(), {'times': 2, 'counts': 3, 'a': None, 'filetype': 'csv'})

    # run subcommand directly
    sub1.run(args=['foo.txt', '-t', '-v', '--some-flag'])
    eq_(run_tracks.pop(), {'file': 'foo.txt', 'test': True, 'verbose': 1, 'some_flag': True})
Ejemplo n.º 6
0
        locus = '{0}:{1}-{2}'.format(reg.contig, left, right)
        print ('<tr>')
        print ('''<td><a href="{link}">igv_link</a></td>'''.format(link='http://localhost:{igv_port}/goto?locus={locus}'.format(igv_port=args.igv_port, locus=locus)))
        for term in row:  # TODO html escape
            print ('<td>{0}</td>'.format(term))
        print ('</tr>')
    print ('</tbody>')
    print ('</table>')
    print ('''</body>''')
    print ('''</html>''')


@command.add_sub
@argument('files', nargs='+', help='URL or file path')
@argument.exclusive(
    argument('-l', '--loci', nargs='+'),
    argument('-L', '--loci-file'),
)
@argument('-P', '--paths', nargs='+')
@argument('-g', '--genome')
@argument('-p', '--port', type=int)
@argument('-o', '--outdir')
@argument('-m', '--view-mode', choices=['expand', 'collapse', 'squished'])
@argument('--view-as-pairs', action='store_true')
@argument('--height', type=int)
@argument('--run-server', action='store_true', help='Run IGV server during snapshot')
@argument('--no-reset', dest='reset_on_end', action='store_false', help='Reset IGV server status on end')
def igv_snapshot(args):
    """ Taking IGV snapshot
    """
    if not args.loci and not args.loci_file:
Ejemplo n.º 7
0

def get_sample_name(fname):
    name = os.path.basename(fname)
    name = remove_suffix(name, '.sam')
    name = remove_suffix(name, '.bam')
    name = remove_suffix(name, '.sorted')
    name = remove_suffix(name, '.sort')
    return name


# TODO density or read depth
@command.add_sub
@argument('sam', nargs='+')
@argument.exclusive(
    argument('-r', '--regions', nargs='+'),
    argument('-R', '--region-file'),
    required=True,
)
@argument('--sample', nargs='+')
@argument('--gtf', help='need tabix')
@argument('--show-clips', action='store_true')
@argument('-o', '--output', default='bam_bp_plot.pdf')
@argument('--sam-per-page', default=10, type=int)
@argument('--bin-size', default=1000, type=int)
@argument('--njobs', default=8, type=int)
@argument('--style', default='darkgrid')
@argument('--max-depth', type=int)
@argument('--xlab-rot', type=int)
@argument('--region-propto', action='store_true', help='set column width to that proportional to each region length')
#@argument('--skip-flag', type=lambda x: int(x, 16), default=0x904)
Ejemplo n.º 8
0
            tokens = line.rstrip('\r\n').split(' ', 1)
            name = tokens[0][1:]
            rest = ''.join(tokens[1:])
            if is_target(name):
                print('>', name, ' ', rest, sep='')
                emit = True
            else:
                emit = False
        elif emit:
            print(line, end='')


@command.add_sub
@argument('fasta')
@argument.exclusive(
    argument('-n', '--names', nargs='*', help='list of fasta names to select'),
    argument('-N', '--name-file', help='list of fasta names to select'),
    argument('-r', '--regexp', help='match patterns for name'),
)
def fa_select(args):
    """ Select sequences by names

    O(n) but not required faidx
    """
    if args.name_file:
        names = [line.rstrip() for line in open(args.name_file)]
    else:
        names = args.names
    _select_or_omit(args.fasta, names=names, regexp=args.regexp)

Ejemplo n.º 9
0
                   ','.join(str(base_counts[b]) for b in cand_bases),
                   ref_base,
                   zygosity,
                   '|'.join(best_bases),
                   mean_depths and '|'.join('{0:.1f}'.format(d) for d in mean_depths),
                   um_depths and '|'.join('{0:.1f}'.format(d) for d in um_depths),
                   hap_prob,
                   sep='\t',
                   file=stdout)


@command.add_sub
@argument('bam')
@argument('-r', '--regions', nargs='*')
@argument.exclusive(
    (argument('-c', '--copy-number', type=int, default=2)
    .add_argument('--hap-depth', type=float, default=20.)),
    argument('-t', '--table', help='contig, hap_depth, and copy_number are required')
)
@argument('-g', '--gref', required=True)
@argument('--no-phase', action='store_true')
@argument('-m', '--min-second-bases', type=int, default=2)
def mc_path_call6(args):
    with open(args.gref) as fp:
        fasta = Fasta(fp)
        contigs = {contig.name: contig.seq.upper() for contig in fasta.contigs}

    with Samfile(args.bam) as sam:
        smb = SamModelBuilder2(sam, regions=args.regions, min_second_bases=args.min_second_bases, contigs=contigs)

    if not args.table:
        hap_depths = {ref.name: args.hap_depth for ref in smb.model.refs}
Ejemplo n.º 10
0
from __future__ import print_function
from argtools import command, argument
import logging
import pysam
from operator import attrgetter
from ..samutil import ReadCountGenerator, sam_intervals
from ..interval import Interval, parse_bed


@command.add_sub
@argument('sam')
@argument.exclusive(
    argument(
        '-r',
        '--regions',
        nargs='*',
        help='region e.g. one based indexes chr1, chr1:1001, or chr1:1001-2000'
    ), argument('-b', '--bed', help='region defined bed file'))
@argument('-w', '--window', type=int, default=50, help='width of counter')
@argument('--offset',
          type=int,
          default=0,
          help='offset of regions (or bed file)')
@argument('-H',
          '--no-header',
          dest='header',
          action='store_false',
          default=True,
          help='add header')
@argument('--skip-flag', type=lambda x: int(x, 16), default=0x904)
@argument(