def make_index(gff_file): """ Make a sqlite database for fast retrieval of features. """ import GFFutils db_file = gff_file + ".db" if need_update(gff_file, db_file): if op.exists(db_file): os.remove(db_file) GFFutils.create_gffdb(gff_file, db_file) return GFFutils.GFFDB(db_file)
def main(gff_file, fasta_file, parents, children): db_file = gff_file + ".db" if not op.exists(db_file): GFFutils.create_gffdb(gff_file, db_file) f = Fasta(fasta_file) g = GFFutils.GFFDB(db_file) parents = set(parents.split(',')) parents_iter = [g.features_of_type(x) for x in parents] parents_list = itertools.chain(*parents_iter) children_list = set(children.split(',')) for feat in parents_list: children = [] for c in g.children(feat.id, 1): if c.featuretype not in children_list: continue child = f.sequence( dict(chr=c.chrom, start=c.start, stop=c.stop, strand=c.strand)) children.append((child, c)) if not children: print >>sys.stderr, "[warning] %s has no children with type %s" \ % (feat.id, ','.join(children_list)) continue # sort children in incremental position children.sort(key=lambda x: x[1].start) # reverse children if negative strand if feat.strand == '-': children.reverse() feat_seq = ''.join(x[0] for x in children) print ">%s" % feat.id print feat_seq
#!/usr/bin/python import sys import numpy as np np.set_printoptions(threshold=np.inf) #from itertools import count, tee, izip, islice import GFFutils import pyBigWig import matplotlib.pyplot as plt G = GFFutils.GFFDB("/home/user/dm1.db") C_bw = pyBigWig.open("/home/user/Symb_treatedVscontrol_50bin.bw") Gene_final = [] def separate_exon_intron(EI_list, EI1, EI2, EI3): for idx, ele in enumerate(EI_list): ele = len(ele) if int(idx) == 0: EI1.append(ele) elif int(idx) == 1: EI2.append(ele) elif int(idx) == 2: EI3.append(ele) exon_len = [] upstream_3000, exon1, intron1, exon2, intron2, exon3,intron3, after_1000, before_1000,intron_3,exon_3, intron_2, exon_2, intron_1, exon_1, downstream_3000 = [],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[] for mRNA in G.features_of_type('mRNA'): exons = list(G.children(mRNA, featuretype='exon'))
import GFFutils G = GFFutils.GFFDB('dm3.db') exon_len = [] exon1 = [] exon2 = [] exon3 = [] exon_3 = [] exon_2 = [] exon_1 = [] intron1 = [] intron2 = [] intron3 = [] intron_3 = [] intron_2 = [] intron_1 = [] gene_count = 0 for mRNA in G.features_of_type('mRNA'): # print(mRNA) exons = list(G.children(mRNA, featuretype='exon')) introns = list(G.interfeatures(exons)) if mRNA.strand == "-": first_3_exons = exons[-3:] last_3_exons = exons[:3] first_3_introns = introns[-3:] last_3_introns = introns[:3] else: first_3_exons = exons[:3] last_3_exons = exons[-3:] first_3_introns = introns[:3]