-
Notifications
You must be signed in to change notification settings - Fork 0
/
c11.py
62 lines (31 loc) · 1.74 KB
/
c11.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#!user/bin/env python
import pygr.Data
from pygr.seqdb import BlastDB, AnnotationDB
seqdb = BlastDB('/somepath/hg18.fa')
exon_slices = pygr.Data.Collection(path='exon_slices.db',mode='c',writeback=False) # Store on disk
exon_db = AnnotationDB(exon_slices, seqdb,sliceAttrDict=dict(id=0, exon_id=1, orientation=2, gene_id=3, start=4, stop=5))
from pygr.cnestedlist import NLMSA
nlmsa = NLMSA('exonAnnot','w',use_virtual_lpo=True,bidirectional=False)
ifile = file('exonslice.txt')
for line in ifile:
row = [x for x in line.split()] # CONVERT TO LIST SO MUTABLE
row[1] = int(row[1]) # CONVERT FROM STRING TO INTEGER
exon_slices[row[1]] = row
exon = exon_db[row[1]] # GET THE ANNOTATION OBJECT FOR THIS EXON
nlmsa.addAnnotation(exon) # SAVE IT TO GENOME MAPPING
exon_db.clear()
nlmsa.build() # FINALIZE GENOME ALIGNMENT INDEXES
ifile.close()
pygr.Data.Bio.Seq.Genome.hg18 = seqdb # SAVE TO pygr.Data
pygr.Data.Bio.Annotation.hg18.exons = exon_db
pygr.Data.Bio.Annotation.hg18.exonmap = nlmsa
pygr.Data.schema.Bio.Annotation.hg18.exonmap = \
pygr.Data.ManyToManyRelation(seqdb,bindAttrs=('exons',))
pygr.Data.save()
# [Note that to work with these results using the pygr.Data schema (i.e. to search a piece of genomic sequence using its "exons" attribute), you should make sure to load the data from pygr.Data. To do this, start a new Python session and run a test like the following:]
import pygr.Data
hg18 = pygr.Data.Bio.Seq.Genome.hg18()
chr16 = seqdb['chr1']
qint = chr16[10000:20000] # an arbitrary genomic interval containing genes...
for exon in qint.exons:
print(exon)