Exemple #1
0
    parser.add_argument('-o', '--outfile', help="Output file name.")
    parser.add_argument('-H',
                        '--histogram_file',
                        help="Save score histograms to FILE")
    parser.add_argument('-j',
                        '--nthreads',
                        help="Number of threads.",
                        type=int,
                        default=16)

    args = parser.parse_args()

    if args.top:
        import psutil

    hra = HiriseAssembly()
    hra.load_assembly(args.infile)

    hra.merge_masked_regions(debug=args.debug)

    print(len(hra.layout_lines))
    if len(hra.layout_lines) == 0:
        print("#make trivial layout")
        hra.make_trivial_layout_lines(debug=args.debug)

    ces.set_exp_insert_size_dist_fit_params(hra.model_params)
    model = ces.model

    nbams = len(hra.bams)
    inq = JoinableQueue(maxsize=0)
    readers = []
    parser.add_argument('-i',
                        '--infile',
                        default=False,
                        help="Filename for serialised assembly input file.")
    parser.add_argument(
        '-o',
        '--outfile',
        default=False,
        help=
        "Filename for writing a list of segments on the raw contigs to mask for being promiscuous in linking."
    )
    # -m 2 -w 1000 -M $( cat {input.threshold} )
    args = parser.parse_args()

    if args.infile:
        asf = HiriseAssembly()
        asf.load_assembly(args.infile)

    for segments_file in args.mask:
        asf.add_mask_regions(filename=segments_file)
        asf.merge_masked_regions()

    if args.outfile:
        of = open(args.outfile, "wt")
    else:
        of = sys.stdout
    for ocontig in asf.ocontigs_iter():
        chunk = struct.unpack(
            "<L",
            hashlib.md5(
                ocontig.encode("utf-8")).digest()[:4])[0] % args.nchunks
from __future__ import division
from __future__ import print_function
from builtins import range
from past.utils import old_div
from hirise_assembly import HiriseAssembly

if __name__=="__main__":
     import sys
     import argparse

     parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
     parser.add_argument('-d','--debug',default=False  ,action="store_true",help="Turn on debugging ouput")
     parser.add_argument('-L','--layout',default=False ,help="A file containing a layout of contigs.")
     parser.add_argument('-i','--infile',default=False ,help="Filename for serialised assembly input file.")
     parser.add_argument('-o','--outfile',default=False,help="Filename for writing a list of segments on the raw contigs to mask for being promiscuous in linking.")

     args = parser.parse_args()

     if args.infile:
          asf = HiriseAssembly()
          asf.load_assembly(args.infile)
     
     if args.outfile:
          f=open(args.outfile,"wt")
          for contig in asf.contigs_iter():
               f.write("{}\t{}\n".format(contig,asf.contig_length(contig)))



    parser.add_argument("-m", "--min_links", default=2, type=int, help="Min links to another contig to count.")
    parser.add_argument("-M", "--max_others", default=4, type=int, help="Max other contigs in the window.")
    parser.add_argument("-c", "--chunk", default=0, type=int, help="This chunk.")
    parser.add_argument("-q", "--mapq", default=50, type=int, help="Min map quality score.")
    parser.add_argument("-i", "--infile", default=False, help="Filename for serialised assembly input file.")
    parser.add_argument(
        "-o",
        "--outfile",
        default=False,
        help="Filename for writing a list of segments on the raw contigs to mask for being promiscuous in linking.",
    )
    # -m 2 -w 1000 -M $( cat {input.threshold} )
    args = parser.parse_args()

    if args.infile:
        asf = HiriseAssembly()
        asf.load_assembly(args.infile)

    for segments_file in args.mask:
        asf.add_mask_regions(filename=segments_file)
        asf.merge_masked_regions()

    if args.outfile:
        of = open(args.outfile, "wt")
    else:
        of = sys.stdout
    for ocontig in asf.ocontigs_iter():
        chunk = struct.unpack("<L", hashlib.md5(ocontig.encode("utf-8")).digest()[:4])[0] % args.nchunks
        if not chunk == args.chunk:
            continue
        links = {}
if __name__=="__main__":
     import sys
     import argparse

     parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
     parser.add_argument('-d','--debug',default=False  ,action="store_true",help="Turn on debugging ouput")
     parser.add_argument('-b','--breaks',default=False ,help="File containing breaks")
     parser.add_argument('-i','--infile',default=False ,help="Input layout in p: lines format.")
     parser.add_argument('-o','--outfile',default=False,help="Filename for output.")

     args = parser.parse_args()


     if args.infile:
          asf = HiriseAssembly()
          asf.load_assembly(args.infile)


#     asf = HiriseAssembly()
#     asf.load_playout(args.infile)

     breaks=[]
     scores={}
     for l in open(args.breaks):
          if l[0]=="#": continue
#Scaffold102239 741 1379 5097 -5.421961663655971
          scaffold,a,b,slen,score = l.strip().split()
          a=int(a) #start
          b=int(b) #end
#          c=int(c) #lowpoint
     parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
     parser.add_argument('-d','--debug',default=False  ,action="store_true",help="Turn on debugging ouput")
#     parser.add_argument('-L','--layout',default=False ,help="A file containing a layout of contigs.")
     parser.add_argument('-T','--test',  default=False ,help="Compute stats for an explicitly specified region.")
     parser.add_argument('-B','--bin',  default=False ,action="store_true",help="Bin reference sequences.")
     parser.add_argument('-N','--nsamples',default=10000,type=int ,help="Number of winows to sample.")
     parser.add_argument('-w','--window',default=1000, type=int ,help="Size of windows to examine.")
     parser.add_argument('-W','--binwindow',default=100000, type=int ,help="Size of windows to break reference contigs into.")
     parser.add_argument('-i','--infile',default=False ,help="Filename for serialised assembly input file.")
#     parser.add_argument('-o','--outfile',default=False,help="Filename for writing a list of segments on the raw contigs to mask for being promiscuous in linking.")

     args = parser.parse_args()

     if args.infile:
          asf = HiriseAssembly()
          asf.load_assembly(args.infile)
     n=0

     asf.binsize = args.binwindow

     while n<args.nsamples:
          try:
               c,a,b=asf.random_window(wlen=args.window)
               if args.test:
                    m=re.match("^(.*):(\d+)-(\d+)$",args.test)
                    c=m.group(1)
                    a=int(m.group(2))
                    b=int(m.group(3))
               asf.window_stats(c,a,b,debug=args.debug,bins=args.bin)
               n+=1
     parser.add_argument('-b','--breaks',default=False ,help="File containing breaks")
     parser.add_argument('-B','--bams',default=[],action="append" ,help="File containing breaks")
     parser.add_argument('-i','--infile',default=False ,help="Input layout in p: lines format.")
     parser.add_argument('-j','--nprocesses',default=16,type=int ,help="Number of subprocesses to use when making PDF reports.")
     parser.add_argument('-H','--head',default=False,type=int ,help="Maximum number of breaks to make visualizations for.")
     parser.add_argument('-o','--outfile',default=False,help="Filename for output.")
     parser.add_argument('-R','--report',default=False ,metavar="FILE", help="Generate a PDF report with visualizations of the breaks made.")
     parser.add_argument('-t','--threshold',default=5.0,type=float,help="Minimum acceptable LLR support.")
     parser.add_argument('-T','--contig_break_threshold',type=float,help="Minimum acceptable LLR support within contig sequence.")
     parser.add_argument('-q','--mapq',default=10.0,type=float,help="Mapping quality score cutoff.")
     parser.add_argument('--mask',default=False,help="File with segments to mask for dot-plot visualization.")

     args = parser.parse_args()
     if args.contig_break_threshold==None: args.contig_break_threshold = args.threshold

     asf = HiriseAssembly()
     asf.load_playout(args.infile)

     breaks=[]
     scores={}
     stype={}
     for l in open(args.breaks):
          if l[0]=="#": continue
          fields = l.strip().split()
          scaffold,a,b,c,score,slen = fields[:6]

          lowest_raw_score=args.threshold+5
          clipped=False
          if "clippedLLR" in fields:
               clipped=True
               if not fields[6]=="False":
from builtins import range
from past.utils import old_div
from hirise_assembly import HiriseAssembly

# import struct
# import hashlib
import re

if __name__ == "__main__":
    import sys
    import argparse

    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument("-d", "--debug", default=False, action="store_true", help="Turn on debugging ouput")
    parser.add_argument("-q", "--mapq", default=55, type=float, help="Minimum map quality threshold.")
    parser.add_argument("-i", "--infile", default=False, help="Filename for serialised assembly input file.")
    parser.add_argument("-o", "--outfile", default=False, help="Filename for bedfile output.")

    args = parser.parse_args()

    if args.infile:
        hra = HiriseAssembly()
        hra.load_assembly(args.infile)

    if args.outfile:
        of = open(args.outfile, "wt")
    else:
        of = sys.stdout

    hra.read_deserts(mapq=55, outfile=of, min_len=1000)
Exemple #9
0
#

from __future__ import division
from __future__ import print_function
from builtins import range
from past.utils import old_div
from hirise_assembly import HiriseAssembly

if __name__=="__main__":
     import sys
     import argparse

     parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
     parser.add_argument('-d','--debug',default=False  ,action="store_true",help="Turn on debugging ouput")
     parser.add_argument('-L','--layout',default=False ,help="A file containing a layout of contigs.")
     parser.add_argument('-i','--infile',default=False ,help="Filename for serialised assembly input file.")
     parser.add_argument('-o','--outfile',default=False,help="Filename for writing a list of segments on the raw contigs to mask for being promiscuous in linking.")

     args = parser.parse_args()

     if args.infile:
          asf = HiriseAssembly()
          asf.load_assembly(args.infile)

     if args.layout:
          asf.load_playout(args.layout)
     
     if args.outfile:
          asf.save_assembly( args.outfile )

Exemple #10
0
     parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
     parser.add_argument('-d','--debug',default=False,action="store_true",help="Turn on debugging ouput")
     parser.add_argument('-C','--nchunks' ,default=1,type=int,help="Number of chunks.")
     
     parser.add_argument('-m','--mask'   ,default=[], action="append",help="Name of a file containing regions of the input assembly to mask out")
     parser.add_argument('-c','--chunk',default=0,  type=int,help="This chunk.")
     parser.add_argument('-K','--contig',default=False,help="Just this contig.")
     parser.add_argument('-q','--mapq',default=10,  type=float,help="Minimum map quality threshold.")
     parser.add_argument('-i','--infile',default=False,help="Filename for serialised assembly input file.")
     parser.add_argument('-o','--outfile',default=False,help="Filename for writing a list of segments on the raw contigs to mask for being promiscuous in linking.")
# -m 2 -w 1000 -M $( cat {input.threshold} ) 
     args = parser.parse_args()

     if args.infile:
          asf = HiriseAssembly()
          asf.load_assembly(args.infile)
     
     for segments_file in args.mask:
         asf.add_mask_regions(filename=segments_file)
         asf.merge_masked_regions()

     
     if args.contig:
          contig_iter=iter([args.contig])
     else:
          contig_iter = asf.ocontigs_iter()

     if args.outfile:
          of=open(args.outfile,"wt")
     else:
                        default=False,
                        help="Filename for serialised assembly input file.")
    parser.add_argument('-f',
                        '--fasta',
                        default=False,
                        help="Filename for original contigs fasta.")
    parser.add_argument(
        '-o',
        '--outfile',
        default=False,
        help=
        "Filename for writing a list of segments on the raw contigs to mask for being promiscuous in linking."
    )

    args = parser.parse_args()

    if args.infile:
        asf = HiriseAssembly()
        asf.load_assembly(args.infile)

    asf.ocontig_fasta = args.fasta

    if args.outfile:
        outfasta = fastaWriter(args.outfile)
        for contig in asf.contigs_iter():
            outfasta.next(contig)
            outfasta.write(asf.get_seq(contig))

        outfasta.flush()
        outfasta.close()
    parser.add_argument(
        '-W',
        '--binwindow',
        default=100000,
        type=int,
        help="Size of windows to break reference contigs into.")
    parser.add_argument('-i',
                        '--infile',
                        default=False,
                        help="Filename for serialised assembly input file.")
    #     parser.add_argument('-o','--outfile',default=False,help="Filename for writing a list of segments on the raw contigs to mask for being promiscuous in linking.")

    args = parser.parse_args()

    if args.infile:
        asf = HiriseAssembly()
        asf.load_assembly(args.infile)
    n = 0

    asf.binsize = args.binwindow

    while n < args.nsamples:
        try:
            c, a, b = asf.random_window(wlen=args.window)
            if args.test:
                m = re.match("^(.*):(\d+)-(\d+)$", args.test)
                c = m.group(1)
                a = int(m.group(2))
                b = int(m.group(3))
            asf.window_stats(c, a, b, debug=args.debug, bins=args.bin)
            n += 1
     parser.add_argument('-q','--mapq',type=int,default=55)

     parser.add_argument('-S','--slice',default=False)

     parser.add_argument('-i','--infile',help="File containing .hra formatted hirise assembly.")
     parser.add_argument('-o','--outfile',help="Output file name.")
     parser.add_argument('-H','--histogram_file',help="Save score histograms to FILE")
     parser.add_argument('-j','--nthreads',help="Number of threads.",type=int,default=16)
     

     args = parser.parse_args()

     if args.top:
          import psutil

     hra = HiriseAssembly()
     hra.load_assembly(args.infile)

     hra.merge_masked_regions(debug=args.debug)

     print(len(hra.layout_lines))
     if len(hra.layout_lines)==0: 
          print("#make trivial layout")
          hra.make_trivial_layout_lines(debug=args.debug)

     ces.set_exp_insert_size_dist_fit_params(hra.model_params)
     model=ces.model

     nbams = len(hra.bams)
     inq = JoinableQueue(maxsize=0)
     readers=[]
from __future__ import print_function
from builtins import range
from past.utils import old_div
from hirise_assembly import HiriseAssembly
#import struct
#import hashlib
import re    

if __name__=="__main__":
    import sys
    import argparse

    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('-d','--debug',default=False,action="store_true",help="Turn on debugging ouput")
    parser.add_argument('-q','--mapq',default=55,  type=float,help="Minimum map quality threshold.")
    parser.add_argument('-i','--infile',default=False,help="Filename for serialised assembly input file.")
    parser.add_argument('-o','--outfile',default=False,help="Filename for bedfile output.")

    args = parser.parse_args()

    if args.infile:
        hra = HiriseAssembly()
        hra.load_assembly(args.infile)

    if args.outfile:
        of=open(args.outfile,"wt")
    else:
        of=sys.stdout

    hra.read_deserts(mapq=55,outfile=of,min_len=1000)