Example #1
0
def main(argv):

    try:
        refseq_fname = argv[1]
        read_fname = argv[2]
        mate_fname = argv[3]
    except IndexError:
        sys.exit("Usage: %s REFSEQ_FN READ_FN MATE_FN" % sys.argv[0])

    seq_list_len = 5000
    max_isize = pairing_batch_size = 1000
    gopt, popt = bwa.gap_init_opt(), bwa.pe_init_opt()

    read_flow = Bio.SeqIO.parse(open(read_fname), 'fastq-illumina')
    mate_flow = Bio.SeqIO.parse(open(mate_fname), 'fastq-illumina')
    pairs_flow = it.izip(read_flow, mate_flow)
    res = []
    while 1:
        pairs = list(it.islice(pairs_flow, 0, seq_list_len))
        if len(pairs) == 0:
            break
        bwts = bwa.restore_index(refseq_fname)
        bnsp, pacseq = bwa.restore_reference(refseq_fname)

        l = len(pairs)
        bwsa = bwa.build_bws_array(pairs)

        logger = logging.getLogger("test")
        logger.setLevel(logging.DEBUG)
        counters = get_counters()
        ctx = ContextStub()
        visitor = MRVisitor(logger, ctx, counters)

        bwa_iterator = BWAIterator(refseq_fname, gopt, popt, max_isize,
                                   pairing_batch_size, visitor)
        for read, mate in bwa_iterator.analyze(bwsa, l):
            print read.get_name(), mate.get_name()

        for j in 0, 1:
            bwa.free_seq(l, bwsa[j])
        bwa.bns_destroy(bwa_iterator.bnsp)

    for cn, c in counters.iteritems():
        sys.stderr.write("%s = %d\n" % (cn, c.value))
def main(argv):

  try:
    refseq_fname = argv[1]
    read_fname = argv[2]
    mate_fname = argv[3]
  except IndexError:
    sys.exit("Usage: %s REFSEQ_FN READ_FN MATE_FN" % sys.argv[0])

  seq_list_len = 5000
  max_isize = pairing_batch_size = 1000
  gopt, popt = bwa.gap_init_opt(), bwa.pe_init_opt()

  read_flow = Bio.SeqIO.parse(open(read_fname), 'fastq-illumina')
  mate_flow = Bio.SeqIO.parse(open(mate_fname), 'fastq-illumina')
  pairs_flow = it.izip(read_flow, mate_flow)
  res = []
  while 1:
    pairs = list(it.islice(pairs_flow, 0, seq_list_len))
    if len(pairs) == 0:
      break
    bwts = bwa.restore_index(refseq_fname)
    bnsp, pacseq = bwa.restore_reference(refseq_fname)

    l = len(pairs)
    bwsa = bwa.build_bws_array(pairs)

    logger = logging.getLogger("test")
    logger.setLevel(logging.DEBUG)
    counters = get_counters()
    ctx = ContextStub()
    visitor = MRVisitor(logger, ctx, counters)

    bwa_iterator = BWAIterator(refseq_fname, gopt, popt, max_isize,
                               pairing_batch_size, visitor)
    for read, mate in bwa_iterator.analyze(bwsa, l):
      print read.get_name(), mate.get_name()

    for j in 0, 1:
      bwa.free_seq(l, bwsa[j])
    bwa.bns_destroy(bwa_iterator.bnsp)

  for cn, c in counters.iteritems():
    sys.stderr.write("%s = %d\n" % (cn, c.value))
Example #3
0
def main(argv):
  try:
    refseq_fname = argv[1]
    read_fname = argv[2]
    mate_fname = argv[3]
  except IndexError:
    sys.exit("Usage: %s REFSEQ_FN READ_FN MATE_FN" % sys.argv[0])

  seq_list_len = 10000
  max_isize = pairing_batch_size = 10000
  gopt, popt = bwa.gap_init_opt(), bwa.pe_init_opt()

  read_flow = Bio.SeqIO.parse(open(read_fname), 'fastq-illumina')
  mate_flow = Bio.SeqIO.parse(open(mate_fname), 'fastq-illumina')
  pairs_flow = it.izip(read_flow, mate_flow)
  pairs = list(it.islice(pairs_flow, 0, seq_list_len))
  bwts = bwa.restore_index(refseq_fname)
  bnsp, pacseq = bwa.restore_reference(refseq_fname)

  l = len(pairs)
  bwsa = bwa.build_bws_array(pairs)

  bwa_iterator = BWAIterator(refseq_fname, gopt, popt, max_isize,
                             pairing_batch_size)
  pairs = [p for p in bwa_iterator.analyze(bwsa, l)]
  print "READ POS GAPO GAPE MM STRAND SCORE CIGAR"
  for read, mate in pairs:
    if read.n_multi > 0:
      print
      multi_list = [m for m in read.itermulti()]
      for m in multi_list:
        print read.get_name(), m.pos, m.n_gapo, m.n_gape, m.n_mm, m.strand, \
               m.score, m.get_cigar(read.len)

  for j in 0, 1:
    bwa.free_seq(l, bwsa[j])
  bwa.bns_destroy(bwa_iterator.bnsp)
Example #4
0
def main(argv):
    try:
        refseq_fname = argv[1]
        read_fname = argv[2]
        mate_fname = argv[3]
    except IndexError:
        sys.exit("Usage: %s REFSEQ_FN READ_FN MATE_FN" % sys.argv[0])

    seq_list_len = 10000
    max_isize = pairing_batch_size = 10000
    gopt, popt = bwa.gap_init_opt(), bwa.pe_init_opt()

    read_flow = Bio.SeqIO.parse(open(read_fname), 'fastq-illumina')
    mate_flow = Bio.SeqIO.parse(open(mate_fname), 'fastq-illumina')
    pairs_flow = it.izip(read_flow, mate_flow)
    pairs = list(it.islice(pairs_flow, 0, seq_list_len))
    bwts = bwa.restore_index(refseq_fname)
    bnsp, pacseq = bwa.restore_reference(refseq_fname)

    l = len(pairs)
    bwsa = bwa.build_bws_array(pairs)

    bwa_iterator = BWAIterator(refseq_fname, gopt, popt, max_isize,
                               pairing_batch_size)
    pairs = [p for p in bwa_iterator.analyze(bwsa, l)]
    print "READ POS GAPO GAPE MM STRAND SCORE CIGAR"
    for read, mate in pairs:
        if read.n_multi > 0:
            print
            multi_list = [m for m in read.itermulti()]
            for m in multi_list:
                print read.get_name(), m.pos, m.n_gapo, m.n_gape, m.n_mm, m.strand, \
                       m.score, m.get_cigar(read.len)

    for j in 0, 1:
        bwa.free_seq(l, bwsa[j])
    bwa.bns_destroy(bwa_iterator.bnsp)