Python Gff 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: jcvi.formats.gff

클래스/타입: Gff

hotexamples.com에서의 예제들: 7

Python Gff - 7개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 jcvi.formats.gff.Gff에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Gff(7)

자주 사용되는 메소드들

Gff (7)

예제 #1

파일 보기

파일: reformat.py 프로젝트: zhaotao1987/jcvi

def augustus(args):
    """
    %prog augustus augustus.gff3 > reformatted.gff3

    AUGUSTUS does generate a gff3 (--gff3=on) but need some refinement.
    """
    from jcvi.formats.gff import Gff

    p = OptionParser(augustus.__doc__)
    p.set_outfile()
    opts, args = p.parse_args(args)

    if len(args) != 1:
        sys.exit(not p.print_help())

    ingff3, = args
    gff = Gff(ingff3)
    fw = must_open(opts.outfile, "w")
    seen = defaultdict(int)
    for g in gff:
        if g.type not in ("gene", "transcript", "CDS"):
            continue

        if g.type == "transcript":
            g.type = "mRNA"

        prefix = g.seqid + "_"
        pid = prefix + g.id
        newid = "{0}-{1}".format(pid, seen[pid]) if pid in seen else pid
        seen[pid] += 1
        g.attributes["ID"] = [newid]
        g.attributes["Parent"] = [(prefix + x) for x in g.attributes["Parent"]]
        g.update_attributes()
        print >> fw, g
    fw.close()

예제 #2

파일 보기

def trimUTR(args):
    """
    %prog trimUTR gffile

    Remove UTRs in the annotation set.
    """
    p = OptionParser(trimUTR.__doc__)
    p.set_outfile()
    opts, args = p.parse_args(args)

    if len(args) != 1:
        sys.exit(not p.print_help())

    gffile, = args
    g = make_index(gffile)
    gff = Gff(gffile)
    mRNA_register = {}
    fw = must_open(opts.outfile, "w")
    for c in gff:
        cid, ctype = c.accn, c.type
        if ctype == "gene":
            start, end = get_cds_minmax(g, cid)
            trim(c, start, end)
        elif ctype == "mRNA":
            start, end = get_cds_minmax(g, cid, level=1)
            trim(c, start, end)
            mRNA_register[cid] = (start, end)
        elif ctype != "CDS":
            start, end = mRNA_register[c.parent]
            trim(c, start, end)
        if c.start > c.end:
            print >> sys.stderr, cid, \
                    "destroyed [{0} > {1}]".format(c.start, c.end)
        else:
            print >> fw, c

예제 #3

파일 보기

def pasa(args):
    """
    %prog ${pasadb}.assemblies.fasta ${pasadb}.pasa_assemblies.gff3

    Wraps `pasa_asmbls_to_training_set.dbi`.
    """
    from jcvi.formats.base import SetFile
    from jcvi.formats.gff import Gff

    p = OptionParser(pasa.__doc__)
    p.set_home("pasa")
    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    fastafile, gffile = args
    transcodergff = fastafile + ".transdecoder.gff3"
    transcodergenomegff = fastafile + ".transdecoder.genome.gff3"
    if need_update((fastafile, gffile), (transcodergff, transcodergenomegff)):
        cmd = "{0}/scripts/pasa_asmbls_to_training_set.dbi".format(
            opts.pasa_home)
        cmd += " --pasa_transcripts_fasta {0} --pasa_transcripts_gff3 {1}".\
                format(fastafile, gffile)
        sh(cmd)

    completeids = fastafile.rsplit(".", 1)[0] + ".complete.ids"
    if need_update(transcodergff, completeids):
        cmd = "grep complete {0} | cut -f1 | sort -u".format(transcodergff)
        sh(cmd, outfile=completeids)

    complete = SetFile(completeids)
    seen = set()
    completegff = transcodergenomegff.rsplit(".", 1)[0] + ".complete.gff3"
    fw = open(completegff, "w")
    gff = Gff(transcodergenomegff)
    for g in gff:
        a = g.attributes
        if "Parent" in a:
            id = a["Parent"][0]
        else:
            id = a["ID"][0]
        asmbl_id = id.split("|")[0]
        if asmbl_id not in complete:
            continue
        print >> fw, g
        if g.type == "gene":
            seen.add(id)

    fw.close()
    logging.debug("A total of {0} complete models extracted to `{1}`.".\
                    format(len(seen), completegff))

예제 #4

파일 보기

def yeasttruth(args):
    """
    %prog yeasttruth Pillars.tab *.gff

    Prepare pairs data for 14 yeasts.
    """
    p = OptionParser(yeasttruth.__doc__)
    p.set_outfile()
    opts, args = p.parse_args(args)

    if len(args) < 2:
        sys.exit(not p.print_help())

    pillars = args[0]
    gffiles = args[1:]
    aliases = {}
    pivot = {}
    for gffile in gffiles:
        is_pivot = op.basename(gffile).startswith("Saccharomyces_cerevisiae")
        gff = Gff(gffile)
        for g in gff:
            if g.type != "gene":
                continue
            for a in g.attributes["Alias"]:
                aliases[a] = g.accn
                if is_pivot:
                    pivot[a] = g.accn
    logging.debug("Aliases imported: {0}".format(len(aliases)))
    logging.debug("Pivot imported: {0}".format(len(pivot)))
    fw = open("yeast.aliases", "w")
    for k, v in sorted(aliases.items()):
        print("\t".join((k, v)), file=fw)
    fw.close()

    fp = open(pillars)
    pairs = set()
    fw = must_open(opts.outfile, "w")
    for row in fp:
        atoms = [x for x in row.split() if x != "---"]
        pps = [pivot[x] for x in atoms if x in pivot]
        atoms = [aliases[x] for x in atoms if x in aliases]
        for p in pps:
            for a in atoms:
                if p == a:
                    continue
                pairs.add(tuple(sorted((p, a))))

    for a, b in sorted(pairs):
        print("\t".join((a, b)), file=fw)
    fw.close()

예제 #5

파일 보기

def uniq(args):
    """
    %prog uniq gffile cdsfasta

    Remove overlapping gene models. Similar to formats.gff.uniq(), overlapping
    'piles' are processed, one by one.

    Here, we use a different algorithm, that retains the best non-overlapping
    subset witin each pile, rather than single best model. Scoring function is
    also different, rather than based on score or span, we optimize for the
    subset that show the best combined score. Score is defined by:

    score = (1 - AED) * length
    """

    p = OptionParser(uniq.__doc__)
    p.set_outfile()
    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    gffile, cdsfasta = args
    gff = Gff(gffile)
    sizes = Sizes(cdsfasta).mapping
    gene_register = {}
    for g in gff:
        if g.type != "mRNA":
            continue
        aed = float(g.attributes["_AED"][0])
        gene_register[g.parent] = (1 - aed) * sizes[g.accn]

    allgenes = import_feats(gffile)
    g = get_piles(allgenes)

    bestids = set()
    for group in g:
        ranges = [
            to_range(x, score=gene_register[x.accn], id=x.accn) for x in group
        ]
        selected_chain, score = range_chain(ranges)
        bestids |= set(x.id for x in selected_chain)

    removed = set(x.accn for x in allgenes) - bestids
    fw = open("removed.ids", "w")
    print("\n".join(sorted(removed)), file=fw)
    fw.close()
    populate_children(opts.outfile, bestids, gffile, "gene")

예제 #6

파일 보기

def get_cds_beds(gffile, noUTR=False):
    from jcvi.formats.gff import Gff

    mrnabed = None
    cdsbeds = []
    gf = Gff(gffile)
    for g in gf:
        if g.type == "mRNA":
            mrnabed = g.bedline
        elif g.type == "CDS":
            cdsbeds.append(g.bedline)

    if noUTR:
        mrnabed.start = min(x.start for x in cdsbeds)
        mrnabed.end = max(x.end for x in cdsbeds)

    return mrnabed, cdsbeds

예제 #7

파일 보기

def augustus(args):
    """
    %prog augustus augustus.gff3 > reformatted.gff3

    AUGUSTUS does generate a gff3 (--gff3=on) but need some refinement.
    """
    p = OptionParser(augustus.__doc__)
    opts, args = p.parse_args(args)

    if len(args) != 1:
        sys.exit(not p.print_help())

    ingff3, = args
    gff = Gff(ingff3)
    for g in gff:
        if g.type not in ("gene", "transcript", "CDS"):
            continue

        if g.type == "transcript":
            g.type = "mRNA"

        print g