Example #1
0
 def setUp(self):
     self.assembly = Assembly('ce6')
     self.assembly.genrep = GenRep(url='http://bbcftools.epfl.ch/genrep/',
                                   root='/db/genrep')
     self.assembly.intype = '0'
     self.chromosomes = {
         (3066, u'NC_003279', 6): {
             'length': 15072421,
             'name': u'chrI'
         },
         (3067, u'NC_003280', 7): {
             'length': 15279323,
             'name': u'chrII'
         },
         (3068, u'NC_003281', 8): {
             'length': 13783681,
             'name': u'chrIII'
         },
         (3069, u'NC_003282', 5): {
             'length': 17493785,
             'name': u'chrIV'
         },
         (3070, u'NC_003283', 8): {
             'length': 20919568,
             'name': u'chrV'
         },
         (3071, u'NC_003284', 7): {
             'length': 17718854,
             'name': u'chrX'
         },
         (2948, u'NC_001328', 1): {
             'length': 13794,
             'name': u'chrM'
         }
     }
Example #2
0
 def adn(self, ass, chr, id, **kw):
     id = int(id)
     g = GenRep()
     chrs = g.get_genrep_objects('chromosomes',
                                 'chromosome',
                                 filters={'name': chr},
                                 params={'assembly_id': ass})
     ass = Assembly(ass)
     for chrid, chrs in ass.chromosomes.iteritems():
         if chrs['name'] == chr:
             start = id * chunk
             end = start + chunk
             return g.get_sequence(chrid[0], [[start, end]])
     return ''
Example #3
0
def merge_junc_files(trackList, assembly):
    out = track('all.junc',
                format='txt',
                fields=['chr', 'start', 'end', 'strand', 'score'])
    from bbcflib.genrep import Assembly
    a = Assembly(assembly)
    for c in a.chromosomes:
        tl = [
            track(t,
                  fields=['chr', 'start', 'end', 'strand', 'score'],
                  format='txt').read(str(c[0]) + '_' + c[1] + '.' + str(c[2]))
            for t in trackList
        ]
        #all = concatenate(tl,remove_duplicates=True)
        all = concatenate(tl,
                          group_by=['chr', 'start', 'end'],
                          aggregate={'score': lambda x: sum(x)})
        out.write(all, mode='append')
Example #4
0
def main(assembly, filename):
    a = Assembly(assembly)
    tmap = a.get_transcript_mapping()

    # Get total num of reads

    f = open(filename)
    g = open('simulation/count_simulation.txt', 'wb')

    header = [
        'ID', 'Count', 'RPKM', 'Chrom', 'Start', 'End', 'Strand', 'GeneName',
        'Length', 'Type', 'Sense', 'Synonyms'
    ]
    g.write('\t'.join(header) + '\n')

    for line in f:
        loc, tid, coding, length, \
            expr_fraction, expr_number, lib_fraction, lib_number, seq_fraction, seq_number, \
            cov_fraction, chisq, var_coeff = line.split('\t')
        chrom, coord = loc.split(':')
        start, end = coord[:-1].split('-')
        strand = '1' if coord[-1] == 'W' else '-1'
        nreads = float(seq_number)
        if nreads != 0:
            ntotal = nreads / float(seq_fraction)
            rpkm = 1e9 * nreads / (float(length) * ntotal)
        else:
            rpkm = 0.0
        t = tmap.get(tid)
        if t is not None:
            newline = [
                tid, seq_number,
                str(rpkm), chrom, start, end, strand, t.gene_name, length,
                'transcript', '.', '.'
            ]
            g.write('\t'.join(newline) + '\n')

    f.close()
    g.close()
Example #5
0
def add_new_sequence(sequence):
    '''
    Method called when a new sequence is created on GDV.
    It should import fast from JBrowse
    '''
    print 'add new sequence'
    file_url = Assembly(sequence).get_sqlite_url()
    print file_url
    out = os.path.join(filemanager.temporary_directory(), 'Genes.sql')
    fileinfo = filemanager.FileInfo(inputtype='url',
                                    inpath=file_url,
                                    trackname='Genes',
                                    extension='sql',
                                    outpath=out,
                                    admin=True)
    print fileinfo
    user = DBSession.query(User).filter(
        User.key == constants.admin_user_key()).first()
    user_info = {'id': user.id, 'name': user.name, 'email': user.email}
    sequence_info = {'id': sequence.id, 'name': sequence.name}

    # track
    t = Track()
    t.name = fileinfo.trackname
    t.sequence_id = sequence.id
    t.user_id = user.id
    DBSession.add(t)
    DBSession.flush()
    # send task
    async = tasks.new_input.delay(user_info, fileinfo, sequence_info, t.id)
    t.task_id = async .task_id
    DBSession.add(t)

    sequence.default_tracks.append(t)
    DBSession.add(sequence)
    DBSession.flush()
#!/usr/bin/env python

import sys
if len(sys.argv) < 2:
    print "Usage: header_translation <assembly_name>"
    sys.exit(1)

from bbcflib.genrep import Assembly

assembly = sys.argv[1]
a = Assembly(assembly)

ac2name = {}
for k, v in a.chrmeta.items():
    ac2name[v['ac']] = k

f = open("header.sam")
#g = open("reheader.txt", "wb")
h = open("reheader.sam", "wb")

for line in f:
    L = line.split('\t')
    chrom = L[1].split(':')[1]
    length = L[2].split(':')[1]
    newchrom = ac2name[chrom]
    #g.write('%s\t%s' % (newchrom,length))
    h.write(line.replace(chrom, newchrom))

f.close()
g.close()
Example #7
0
from bbcflib.genrep import Assembly
a = Assembly('hg38')
chrmeta = a.chrmeta

md5 = "cbcc5aeeb39d29065c6641aafd5ccaa430706008"

filename = "%s_ENSEMBL.gtf" % md5
to = "%s_REFSEQ.gtf" % md5
f = open(filename)
g = open(to, "wb")
for line in f:
    L = line.split('\t')
    ensembl = L[0]
    refseq = chrmeta[ensembl]['ac']
    newline = [refseq] + L[1:]
    g.write('\t'.join(newline))
f.close()
g.close()
Example #8
0
 def setUp(self):
     self.assembly = Assembly('ce6')
     self.root = self.assembly.genrep.root
     self.intype = 0
     """