Exemple #1
0
def make_index(gff_file):
    """
    Make a sqlite database for fast retrieval of features.
    """
    import GFFutils
    db_file = gff_file + ".db"

    if need_update(gff_file, db_file):
        if op.exists(db_file):
            os.remove(db_file)
        GFFutils.create_gffdb(gff_file, db_file)

    return GFFutils.GFFDB(db_file)
def main(gff_file, fasta_file, parents, children):

    db_file = gff_file + ".db"

    if not op.exists(db_file):
        GFFutils.create_gffdb(gff_file, db_file)

    f = Fasta(fasta_file)
    g = GFFutils.GFFDB(db_file)

    parents = set(parents.split(','))
    parents_iter = [g.features_of_type(x) for x in parents]
    parents_list = itertools.chain(*parents_iter)
    children_list = set(children.split(','))

    for feat in parents_list:

        children = []
        for c in g.children(feat.id, 1):

            if c.featuretype not in children_list: continue
            child = f.sequence(
                dict(chr=c.chrom, start=c.start, stop=c.stop, strand=c.strand))
            children.append((child, c))

        if not children:
            print >>sys.stderr, "[warning] %s has no children with type %s" \
                                    % (feat.id, ','.join(children_list))
            continue
        # sort children in incremental position
        children.sort(key=lambda x: x[1].start)
        # reverse children if negative strand
        if feat.strand == '-': children.reverse()
        feat_seq = ''.join(x[0] for x in children)

        print ">%s" % feat.id
        print feat_seq
#!/usr/bin/python

import sys
import numpy as np
np.set_printoptions(threshold=np.inf)
#from itertools import count, tee, izip, islice
import GFFutils
import pyBigWig
import matplotlib.pyplot as plt

G = GFFutils.GFFDB("/home/user/dm1.db")
C_bw = pyBigWig.open("/home/user/Symb_treatedVscontrol_50bin.bw")
Gene_final = []


def separate_exon_intron(EI_list, EI1, EI2, EI3):
    for idx, ele in enumerate(EI_list):
        ele = len(ele)
        if int(idx) == 0:
            EI1.append(ele)
        elif int(idx) == 1:
            EI2.append(ele)
        elif int(idx) == 2:
            EI3.append(ele)


exon_len = []
upstream_3000, exon1, intron1, exon2, intron2, exon3,intron3, after_1000, before_1000,intron_3,exon_3, intron_2, exon_2, intron_1, exon_1, downstream_3000 = [],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]

for mRNA in G.features_of_type('mRNA'):
    exons = list(G.children(mRNA, featuretype='exon'))
Exemple #4
0
import GFFutils

G = GFFutils.GFFDB('dm3.db')
exon_len = []
exon1 = []
exon2 = []
exon3 = []
exon_3 = []
exon_2 = []
exon_1 = []
intron1 = []
intron2 = []
intron3 = []
intron_3 = []
intron_2 = []
intron_1 = []

gene_count = 0
for mRNA in G.features_of_type('mRNA'):
    #    print(mRNA)
    exons = list(G.children(mRNA, featuretype='exon'))
    introns = list(G.interfeatures(exons))
    if mRNA.strand == "-":
        first_3_exons = exons[-3:]
        last_3_exons = exons[:3]
        first_3_introns = introns[-3:]
        last_3_introns = introns[:3]
    else:
        first_3_exons = exons[:3]
        last_3_exons = exons[-3:]
        first_3_introns = introns[:3]