Exemple #1
0
def get_scaff_from_minimus(contig):
    '''takes <contig> filename, returns GFF.File of scaffolding, e.g.
	
	LS146000       .       .       1       946     .       -       .       contig_end=1904;contig=8;contig_start=1004
	'''

    contigs = GFF.File()

    for l in open(contig):
        match = re.search(
            '#(?P<seqid>.+?)\(\d+\)\s(?P<strand>.+?)\s.+?\{(?P<sstart>\d+)\s(?P<send>\d+)\}\s\<(?P<cstart>\d+)\s(?P<cend>\d+)\>',
            l)
        if l.startswith('##'):
            num = l.split()[0][2:]
        elif match:
            m = match.groupdict()
            r = GFF.Region()
            r['seqid'] = m['seqid']
            r['start'], r['end'] = [
                str(s) for s in sorted((int(m['sstart']), int(m['send'])))
            ]
            r['attribute_contig'] = num
            r['attribute_cstart'], r['attribute_cend'] = [
                str(s) for s in sorted((int(m['cstart']), int(m['cend'])))
            ]
            if m['strand'] == '[RC]':
                r['strand'] = '-'
            else:
                r['strand'] = '+'
            contigs.append(r)
    return contigs
Exemple #2
0
def draw_bs_plot(sites,sp_order,site_styles,seq_lens,offsets=None,maxheight=0.8,minheight=0.4,
                 fig=1,subpl=111,clear_plot=True,filename=None,**figargs):

    by_factor = dict(zip(set([r['source'] for r in sites]),[GFF.File() for i in set([r['source'] for r in sites])]))

    for r in sites:
        cut = site_styles[r['source']]['cut']
        if r['score'] < cut and r['seqid'] in sp_order:
            by_factor[r['source']].append(r)


    print by_factor

    for k,v in by_factor.items():
        normscores = Util.normalize([r['score'] for r in v],minheight,maxheight,to_abs=1)
        for i,vn in enumerate(normscores):
            by_factor[k][i]['score'] = vn

    sites_to_plot = []
    for f in by_factor.values():
        sites_to_plot.extend(f)
            
    figo = pylab.figure(fig,**figargs)
    if clear_plot:
        figo.clf()
        figo = pylab.figure(fig,**figargs)

    ax = figo.add_subplot(subpl)
    ax.set_yticks([])

    #calc offsets, draw lines
    if offsets is None:
        offsets = [None]*(len(sp_order)+1)
        midpt = max([v for k,v in seq_lens.items() if k in sp_order])/2
    for i,sp in enumerate(sp_order):
        rank = len(sp_order) - i
        if offsets[rank] is None:
            off = midpt - seq_lens[sp]/2
            offsets[rank] = off
            print off,rank,seq_lens[sp]+off,rank
        ax.text(5,rank,sp)
        ax.add_line(matplotlib.lines.Line2D((offsets[rank],seq_lens[sp]+offsets[rank]),(rank,rank),color='k',alpha=0.25,lw=5))
        

    for site in sites_to_plot:
        fc = site_styles[site['source']]['color']
        ec = fc
        rank = len(sp_order) - sp_order.index(site['seqid'])
        ax.add_patch(matplotlib.patches.Ellipse( (site['start']+offsets[rank],rank),
                                                   len(site),
                                                   site['score'],
                                                   fc=fc,ec=ec,alpha=site['score'] )
                     )
    
    if filename:
        ax.autoscale_view()
        figo.savefig(filename)
    else:
        pylab.plot()
Exemple #3
0
    #test Fasta class (loading, editing, writing)
    test_fasta = Fasta("/home/brant/py_util/unit_test_data/seq.fasta")

    print "%s\n%s" % (test_fasta.filename, test_fasta.seq_len())
    for k in test_fasta.iterkeys():
        test_fasta[k] += "TGGCG"
    test_fasta.write_to_file("/home/brant/temp/temp.fa", 1)

    print "%s\n%s" % (test_fasta.filename, test_fasta.seq_len())

    other_test_fasta = Fasta("/home/brant/temp/temp.fa")
    print other_test_fasta.seq_len()
    #end Fasta test
    print other_test_fasta.order

    print "test substr_from_gff\n"
    import GFF
    seqfile = os.path.join(paths['py_testdata'], "eve.ceratitis_capitata.fa")
    gfffile = os.path.join(paths['py_testdata'],
                           "eve.ceratitis_capitata.fa.gff3")
    seq = Fasta(seqfile)
    gff = GFF.File(gfffile)
    evegene = seq.substr_from_gff([
        region for region in gff if 'gene_name' in region['attributes'].keys()
        and region['attributes']['gene_name'] == 'eve'
    ],
                                  name_key='gene_name',
                                  plus_strand=1)
    print evegene
Exemple #4
0
#gff sqlite action

import GFF, os, sqlite3


def InsertGFFRegion(curobj, vals):
    curobj.execute('INSERT INTO gff VALUES (null,?,?,?,?,?,?,?,?,?,?)', vals)


gff_filename = r"G:\AllBrantsStuff\python\ephinaroun\sqlite\dmel-all-r4.3.filtered.gff"
DB_filename = os.path.join(os.path.dirname(gff_filename),
                           '.' + os.path.basename(gff_filename) + '.DB')

gff = GFF.File(gff_filename)
connection = sqlite3.connect(DB_filename)
cursor = connection.cursor()

try:
    cursor.execute('drop table gff')
    connection.commit()
except:
    pass
cursor.execute('''CREATE TABLE gff (
				id INTEGER PRIMARY KEY AUTOINCREMENT,
				sequence_name TEXT NOT NULL,
				source TEXT NOT NULL,
				type TEXT NOT NULL,
				start INTEGER NOT NULL,
				end INTEGER NOT NULL,
				score REAL NOT NULL,
				strand TEXT NOT NULL,