예제 #1
0
def blat(args):
    """
    %prog blat map1.txt ref.fasta

    Make ALLMAPS input csv based on sequences. The tab-delimited txt file
    include: name, LG, position, sequence.
    """
    from jcvi.formats.base import is_number
    from jcvi.formats.blast import best as blast_best, bed as blast_bed
    from jcvi.apps.align import blat as blat_align

    p = OptionParser(blat.__doc__)
    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    maptxt, ref = args
    pf = maptxt.rsplit(".", 1)[0]
    register = {}
    fastafile = pf + ".fasta"
    fp = open(maptxt)
    fw = open(fastafile, "w")
    for row in fp:
        name, lg, pos, seq = row.split()
        if not is_number(pos):
            continue
        register[name] = (pf + '-' + lg, pos)
        print(">{0}\n{1}\n".format(name, seq), file=fw)
    fw.close()

    blatfile = blat_align([ref, fastafile])
    bestfile = blast_best([blatfile])
    bedfile = blast_bed([bestfile])
    b = Bed(bedfile).order

    pf = ".".join(
        (op.basename(maptxt).split(".")[0], op.basename(ref).split(".")[0]))
    csvfile = pf + ".csv"
    fp = open(maptxt)
    fw = open(csvfile, "w")
    for row in fp:
        name, lg, pos, seq = row.split()
        if name not in b:
            continue
        bbi, bb = b[name]
        scaffold, scaffold_pos = bb.seqid, bb.start
        print(",".join(str(x) for x in \
                    (scaffold, scaffold_pos, lg, pos)), file=fw)
    fw.close()
예제 #2
0
def blat(args):
    """
    %prog blat map1.txt ref.fasta

    Make ALLMAPS input csv based on sequences. The tab-delimited txt file
    include: name, LG, position, sequence.
    """
    from jcvi.formats.base import is_number
    from jcvi.formats.blast import best as blast_best, bed as blast_bed
    from jcvi.apps.align import blat as blat_align

    p = OptionParser(blat.__doc__)
    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    maptxt, ref = args
    pf = maptxt.rsplit(".", 1)[0]
    register = {}
    fastafile = pf + ".fasta"
    fp = open(maptxt)
    fw = open(fastafile, "w")
    for row in fp:
        name, lg, pos, seq = row.split()
        if not is_number(pos):
            continue
        register[name] = (pf + '-' + lg, pos)
        print(">{0}\n{1}\n".format(name, seq), file=fw)
    fw.close()

    blatfile = blat_align([ref, fastafile])
    bestfile = blast_best([blatfile])
    bedfile = blast_bed([bestfile])
    b = Bed(bedfile).order

    pf = ".".join((op.basename(maptxt).split(".")[0],
                   op.basename(ref).split(".")[0]))
    csvfile = pf + ".csv"
    fp = open(maptxt)
    fw = open(csvfile, "w")
    for row in fp:
        name, lg, pos, seq = row.split()
        if name not in b:
            continue
        bbi, bb = b[name]
        scaffold, scaffold_pos = bb.seqid, bb.start
        print(",".join(str(x) for x in \
                    (scaffold, scaffold_pos, lg, pos)), file=fw)
    fw.close()
예제 #3
0
파일: tgbs.py 프로젝트: Nicholas-NVS/jcvi
def synteny(args):
    """
    %prog synteny mstmap.out novo.final.fasta reference.fasta

    Plot MSTmap against reference genome.
    """
    from jcvi.assembly.geneticmap import bed as geneticmap_bed
    from jcvi.apps.align import blat
    from jcvi.formats.blast import bed as blast_bed, best

    p = OptionParser(synteny.__doc__)
    opts, args = p.parse_args(args)

    if len(args) != 3:
        sys.exit(not p.print_help())

    mstmapout, novo, ref = args
    pf = mstmapout.split(".")[0]
    rf = ref.split(".")[0]
    mstmapbed = geneticmap_bed([mstmapout])
    cmd = "cut -d. -f1 {0}".format(mstmapbed)
    tmpbed = mstmapbed + ".tmp"
    sh(cmd, outfile=tmpbed)
    os.rename(tmpbed, pf + ".bed")

    cmd = "cut -f4 {0} | cut -d. -f1 | sort -u".format(mstmapbed)
    idsfile = pf + ".ids"
    sh(cmd, outfile=idsfile)
    fastafile = pf + ".fasta"
    cmd = "faSomeRecords {0} {1} {2}".format(novo, idsfile, fastafile)
    sh(cmd)
    blastfile = blat([ref, fastafile])
    bestblastfile = best([blastfile])
    blastbed = blast_bed([bestblastfile])
    os.rename(blastbed, rf + ".bed")

    anchorsfile = "{0}.{1}.anchors".format(pf, rf)
    cmd = "paste {0} {0}".format(idsfile)
    sh(cmd, outfile=anchorsfile)
예제 #4
0
파일: tgbs.py 프로젝트: zhaotao1987/jcvi
def synteny(args):
    """
    %prog synteny mstmap.out novo.final.fasta reference.fasta

    Plot MSTmap against reference genome.
    """
    from jcvi.assembly.geneticmap import bed as geneticmap_bed
    from jcvi.apps.align import blat
    from jcvi.formats.blast import bed as blast_bed, best

    p = OptionParser(synteny.__doc__)
    opts, args = p.parse_args(args)

    if len(args) != 3:
        sys.exit(not p.print_help())

    mstmapout, novo, ref = args
    pf = mstmapout.split(".")[0]
    rf = ref.split(".")[0]
    mstmapbed = geneticmap_bed([mstmapout])
    cmd = "cut -d. -f1 {0}".format(mstmapbed)
    tmpbed = mstmapbed + ".tmp"
    sh(cmd, outfile=tmpbed)
    os.rename(tmpbed, pf + ".bed")

    cmd = "cut -f4 {0} | cut -d. -f1 | sort -u".format(mstmapbed)
    idsfile = pf + ".ids"
    sh(cmd, outfile=idsfile)
    fastafile = pf + ".fasta"
    cmd = "faSomeRecords {0} {1} {2}".format(novo, idsfile, fastafile)
    sh(cmd)
    blastfile = blat([ref, fastafile])
    bestblastfile = best([blastfile])
    blastbed = blast_bed([bestblastfile])
    os.rename(blastbed, rf + ".bed")

    anchorsfile = "{0}.{1}.anchors".format(pf, rf)
    cmd = "paste {0} {0}".format(idsfile)
    sh(cmd, outfile=anchorsfile)