''' import numpy as np import tables import logging import pysam from track import Track, TrackError, parse_interval from intervaltrack import IntervalTrack, get_base_dtype_fields, \ REF_COL_NAME, START_COL_NAME, END_COL_NAME from vectortrack import VectorTrack from io.sam import BamCoverageIterator, BamCoverageStatistics JUNCTION_GROUP = "junctions" COVERAGE_GROUP = "coverage" junction_dtype = (get_base_dtype_fields() + [('id', '<u4'), ('seqdata_name', 'a36'), ('strand', 'a4'), ('reads', '<i4'), ('left_coverage', '<u4'), ('right_coverage', '<u4')]) junction_expectedrows = 1e5 def tophat_bed_to_juncs(source, line_iter): ''' chr5 180666535 180668532 JUNC00002848 36 - 180666535 180668532 255,0,0 2 47,41 0,1956 ''' juncarr = np.empty(1, dtype=junction_dtype) junc = juncarr[0] for line in line_iter:
from track import TrackError from intervaltrack import IntervalTrack, get_base_dtype_fields, \ REF_COL_NAME, START_COL_NAME, END_COL_NAME EXON_TYPE = "exon" GENE_TYPE = "gene" TRANSCRIPT_TYPE = "transcript" FEATURE_TABLE = "features" FEATURE_ALIAS_TABLE = "aliases" FEATURE_ASSOC_TABLE = "feature_assoc" ALIAS_ASSOC_TABLE = "alias_assoc" # the numpy dtype used to represent features feature_dtype = (get_base_dtype_fields() + [('id', '<u4'), ('name', 'a32'), ('source', 'a24'), ('feature_type', 'a23'), ('strand', 'a1'), ('score', '<i4')]) feature_expectedrows = 1e5 feature_assoc_dtype = [('parent_id', '<u4'), ('child_id', '<u4'), ('value', '<u4')] feature_alias_dtype = [('id', '<u4'), ('source', 'a16'), ('name', 'a128')] alias_assoc_dtype = [('alias_id', '<u4'),