Esempio n. 1
0
'''
import numpy as np
import tables
import logging
import pysam

from track import Track, TrackError, parse_interval
from intervaltrack import IntervalTrack, get_base_dtype_fields, \
    REF_COL_NAME, START_COL_NAME, END_COL_NAME
from vectortrack import VectorTrack
from io.sam import BamCoverageIterator, BamCoverageStatistics

JUNCTION_GROUP = "junctions"
COVERAGE_GROUP = "coverage"

junction_dtype = (get_base_dtype_fields() +
                  [('id', '<u4'),
                   ('seqdata_name', 'a36'),
                   ('strand', 'a4'),
                   ('reads', '<i4'),
                   ('left_coverage', '<u4'),
                   ('right_coverage', '<u4')])
junction_expectedrows = 1e5

def tophat_bed_to_juncs(source, line_iter):
    '''
    chr5    180666535       180668532       JUNC00002848    36      -       180666535       180668532       255,0,0 2       47,41   0,1956
    '''
    juncarr = np.empty(1, dtype=junction_dtype)
    junc = juncarr[0]
    for line in line_iter:
Esempio n. 2
0
from track import TrackError
from intervaltrack import IntervalTrack, get_base_dtype_fields, \
    REF_COL_NAME, START_COL_NAME, END_COL_NAME

EXON_TYPE = "exon"
GENE_TYPE = "gene"
TRANSCRIPT_TYPE = "transcript"

FEATURE_TABLE = "features"
FEATURE_ALIAS_TABLE = "aliases"
FEATURE_ASSOC_TABLE = "feature_assoc"
ALIAS_ASSOC_TABLE = "alias_assoc"

# the numpy dtype used to represent features 
feature_dtype = (get_base_dtype_fields() +
                 [('id', '<u4'),
                  ('name', 'a32'),
                  ('source', 'a24'), 
                  ('feature_type', 'a23'), 
                  ('strand', 'a1'),
                  ('score', '<i4')])
feature_expectedrows = 1e5

feature_assoc_dtype = [('parent_id', '<u4'),
                       ('child_id', '<u4'),
                       ('value', '<u4')]
feature_alias_dtype = [('id', '<u4'),
                       ('source', 'a16'),
                       ('name', 'a128')]
alias_assoc_dtype = [('alias_id', '<u4'),