def get_scaff_from_minimus(contig): '''takes <contig> filename, returns GFF.File of scaffolding, e.g. LS146000 . . 1 946 . - . contig_end=1904;contig=8;contig_start=1004 ''' contigs = GFF.File() for l in open(contig): match = re.search( '#(?P<seqid>.+?)\(\d+\)\s(?P<strand>.+?)\s.+?\{(?P<sstart>\d+)\s(?P<send>\d+)\}\s\<(?P<cstart>\d+)\s(?P<cend>\d+)\>', l) if l.startswith('##'): num = l.split()[0][2:] elif match: m = match.groupdict() r = GFF.Region() r['seqid'] = m['seqid'] r['start'], r['end'] = [ str(s) for s in sorted((int(m['sstart']), int(m['send']))) ] r['attribute_contig'] = num r['attribute_cstart'], r['attribute_cend'] = [ str(s) for s in sorted((int(m['cstart']), int(m['cend']))) ] if m['strand'] == '[RC]': r['strand'] = '-' else: r['strand'] = '+' contigs.append(r) return contigs
def draw_bs_plot(sites,sp_order,site_styles,seq_lens,offsets=None,maxheight=0.8,minheight=0.4, fig=1,subpl=111,clear_plot=True,filename=None,**figargs): by_factor = dict(zip(set([r['source'] for r in sites]),[GFF.File() for i in set([r['source'] for r in sites])])) for r in sites: cut = site_styles[r['source']]['cut'] if r['score'] < cut and r['seqid'] in sp_order: by_factor[r['source']].append(r) print by_factor for k,v in by_factor.items(): normscores = Util.normalize([r['score'] for r in v],minheight,maxheight,to_abs=1) for i,vn in enumerate(normscores): by_factor[k][i]['score'] = vn sites_to_plot = [] for f in by_factor.values(): sites_to_plot.extend(f) figo = pylab.figure(fig,**figargs) if clear_plot: figo.clf() figo = pylab.figure(fig,**figargs) ax = figo.add_subplot(subpl) ax.set_yticks([]) #calc offsets, draw lines if offsets is None: offsets = [None]*(len(sp_order)+1) midpt = max([v for k,v in seq_lens.items() if k in sp_order])/2 for i,sp in enumerate(sp_order): rank = len(sp_order) - i if offsets[rank] is None: off = midpt - seq_lens[sp]/2 offsets[rank] = off print off,rank,seq_lens[sp]+off,rank ax.text(5,rank,sp) ax.add_line(matplotlib.lines.Line2D((offsets[rank],seq_lens[sp]+offsets[rank]),(rank,rank),color='k',alpha=0.25,lw=5)) for site in sites_to_plot: fc = site_styles[site['source']]['color'] ec = fc rank = len(sp_order) - sp_order.index(site['seqid']) ax.add_patch(matplotlib.patches.Ellipse( (site['start']+offsets[rank],rank), len(site), site['score'], fc=fc,ec=ec,alpha=site['score'] ) ) if filename: ax.autoscale_view() figo.savefig(filename) else: pylab.plot()
#test Fasta class (loading, editing, writing) test_fasta = Fasta("/home/brant/py_util/unit_test_data/seq.fasta") print "%s\n%s" % (test_fasta.filename, test_fasta.seq_len()) for k in test_fasta.iterkeys(): test_fasta[k] += "TGGCG" test_fasta.write_to_file("/home/brant/temp/temp.fa", 1) print "%s\n%s" % (test_fasta.filename, test_fasta.seq_len()) other_test_fasta = Fasta("/home/brant/temp/temp.fa") print other_test_fasta.seq_len() #end Fasta test print other_test_fasta.order print "test substr_from_gff\n" import GFF seqfile = os.path.join(paths['py_testdata'], "eve.ceratitis_capitata.fa") gfffile = os.path.join(paths['py_testdata'], "eve.ceratitis_capitata.fa.gff3") seq = Fasta(seqfile) gff = GFF.File(gfffile) evegene = seq.substr_from_gff([ region for region in gff if 'gene_name' in region['attributes'].keys() and region['attributes']['gene_name'] == 'eve' ], name_key='gene_name', plus_strand=1) print evegene
#gff sqlite action import GFF, os, sqlite3 def InsertGFFRegion(curobj, vals): curobj.execute('INSERT INTO gff VALUES (null,?,?,?,?,?,?,?,?,?,?)', vals) gff_filename = r"G:\AllBrantsStuff\python\ephinaroun\sqlite\dmel-all-r4.3.filtered.gff" DB_filename = os.path.join(os.path.dirname(gff_filename), '.' + os.path.basename(gff_filename) + '.DB') gff = GFF.File(gff_filename) connection = sqlite3.connect(DB_filename) cursor = connection.cursor() try: cursor.execute('drop table gff') connection.commit() except: pass cursor.execute('''CREATE TABLE gff ( id INTEGER PRIMARY KEY AUTOINCREMENT, sequence_name TEXT NOT NULL, source TEXT NOT NULL, type TEXT NOT NULL, start INTEGER NOT NULL, end INTEGER NOT NULL, score REAL NOT NULL, strand TEXT NOT NULL,