Esempio n. 1
0
def overlapbatch(args):
    """
    %prog overlapbatch ctgfasta poolfasta

    Fish out the sequences in `poolfasta` that overlap with `ctgfasta`.
    Mix and combine using `minimus2`.
    """
    p = OptionParser(overlap.__doc__)
    opts, args = p.parse_args(args)
    if len(args) != 2:
        sys.exit(not p.print_help())

    ctgfasta, poolfasta = args
    f = Fasta(ctgfasta)
    for k, rec in f.iteritems_ordered():
        fastafile = k + ".fasta"
        fw = open(fastafile, "w")
        SeqIO.write([rec], fw, "fasta")
        fw.close()

        overlap([fastafile, poolfasta])
Esempio n. 2
0
def overlapbatch(args):
    """
    %prog overlapbatch ctgfasta poolfasta

    Fish out the sequences in `poolfasta` that overlap with `ctgfasta`.
    Mix and combine using `minimus2`.
    """
    p = OptionParser(overlap.__doc__)
    opts, args = p.parse_args(args)
    if len(args) != 2:
        sys.exit(not p.print_help())

    ctgfasta, poolfasta = args
    f = Fasta(ctgfasta)
    for k, rec in f.iteritems_ordered():
        fastafile = k + ".fasta"
        fw = open(fastafile, "w")
        SeqIO.write([rec], fw, "fasta")
        fw.close()

        overlap([fastafile, poolfasta])
Esempio n. 3
0
def circular(args):
    """
    %prog circular fastafile startpos

    Make circular genome, startpos is the place to start the sequence. This can
    be determined by mapping to a reference. Self overlaps are then resolved.
    Startpos is 1-based.
    """
    from jcvi.assembly.goldenpath import overlap

    p = OptionParser(circular.__doc__)
    p.add_option(
        "--flip",
        default=False,
        action="store_true",
        help="Reverse complement the sequence",
    )
    p.set_outfile()
    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    fastafile, startpos = args
    startpos = int(startpos)
    key, seq = next(parse_fasta(fastafile))
    aseq = seq[startpos:]
    bseq = seq[:startpos]
    aseqfile, bseqfile = "a.seq", "b.seq"

    for f, s in zip((aseqfile, bseqfile), (aseq, bseq)):
        fw = must_open(f, "w")
        print(">{0}\n{1}".format(f, s), file=fw)
        fw.close()

    o = overlap([aseqfile, bseqfile])
    seq = aseq[:o.qstop] + bseq[o.sstop:]
    seq = Seq(seq)

    if opts.flip:
        seq = seq.reverse_complement()

    for f in (aseqfile, bseqfile):
        os.remove(f)

    fw = must_open(opts.outfile, "w")
    rec = SeqRecord(seq, id=key, description="")
    SeqIO.write([rec], fw, "fasta")
    fw.close()
Esempio n. 4
0
def circular(args):
    """
    %prog circular fastafile startpos

    Make circular genome, startpos is the place to start the sequence. This can
    be determined by mapping to a reference. Self overlaps are then resolved.
    Startpos is 1-based.
    """
    from jcvi.assembly.goldenpath import overlap

    p = OptionParser(circular.__doc__)
    p.add_option("--flip", default=False, action="store_true",
                 help="Reverse complement the sequence")
    p.set_outfile()
    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    fastafile, startpos = args
    startpos = int(startpos)
    key, seq = parse_fasta(fastafile).next()
    aseq = seq[startpos:]
    bseq = seq[:startpos]
    aseqfile, bseqfile = "a.seq", "b.seq"

    for f, s in zip((aseqfile, bseqfile), (aseq, bseq)):
        fw = must_open(f, "w")
        print >> fw, ">{0}\n{1}".format(f, s)
        fw.close()

    o = overlap([aseqfile, bseqfile])
    seq = aseq[:o.qstop] + bseq[o.sstop:]
    seq = Seq(seq)

    if opts.flip:
        seq = seq.reverse_complement()

    for f in (aseqfile, bseqfile):
        os.remove(f)

    fw = must_open(opts.outfile, "w")
    rec = SeqRecord(seq, id=key, description="")
    SeqIO.write([rec], fw, "fasta")
    fw.close()