def load_sample_metadata(file_): start = time.time() fmd=snaputil.load_cpickle_file("%s.pkl" % (file_)) if fmd: end = time.time() taken = end-start return fmd start = time.time() fmd={} #dont need the hash-on-column headers just yet with open(file_,"r") as f: for line in f: line = line.rstrip() fields=line.split("\t") fmd[fields[0]]=line end = time.time() taken = end-start #sys.stderr.write("time taken to load samples from normal: %d\n" % taken) snaputil.store_cpickle_file("%s.pkl" % (file_),fmd) return fmd
def pack_sample_mapped_ids(line): (sid, ids) = line.rstrip().split("|") ba = bitarray() i = 0 last_y = -1 ids_ = ids.split(",") ids_.pop() for y in sorted([int(x) for x in ids_]): last_y = y while i < y: ba.append(False) i += 1 ba.append(True) i += 1 while i <= MAX_ID: ba.append(False) i += 1 #su.store_cpickle_file("./sample_ids1_%s.pkl" % (k),ba, compress=True) su.store_cpickle_file("%s/%s.pkl" % (path, sid), ba, compress=False) return True
def pack_sample_mapped_ids(line): (sid,ids) = line.rstrip().split("|") ba=bitarray() i = 0 last_y = -1 ids_ = ids.split(",") ids_.pop() for y in sorted([int(x) for x in ids_]): last_y = y while i < y: ba.append(False) i+=1 ba.append(True) i+=1 while i <= MAX_ID: ba.append(False) i+=1 #su.store_cpickle_file("./sample_ids1_%s.pkl" % (k),ba, compress=True) su.store_cpickle_file("%s/%s.pkl" % (path,sid), ba, compress=False) return True
def __init__(self, load_refseq=True, load_canonical=True, load_transcript=False): self.ensembl_id_patt = re.compile('(ENST\d+)') if load_refseq: gene_file = "%s/%s" % (snapconf.TABIX_DB_PATH, snapconf.REFSEQ_ANNOTATION) gene_pickle_file = "%s.pkl" % (gene_file) self.gene_map = snaputil.load_cpickle_file(gene_pickle_file) if not self.gene_map: self.load_gene_coords(gene_file) snaputil.store_cpickle_file(gene_pickle_file, self.gene_map) if load_canonical: canonical_gene_file = "%s/%s" % (snapconf.TABIX_DB_PATH, snapconf.CANONICAL_ANNOTATION) canonical_gene_pickle_file = "%s.pkl" % (canonical_gene_file) self.canonical_gene_map = snaputil.load_cpickle_file( canonical_gene_pickle_file) if not self.canonical_gene_map: self.load_canonical_gene_coords(canonical_gene_file) snaputil.store_cpickle_file(canonical_gene_pickle_file, self.canonical_gene_map) #per transcript exons if load_transcript: transcript_file = "%s/%s" % (snapconf.TABIX_DB_PATH, snapconf.TABIX_GENE_INTERVAL_DB) transcript_pickle_file = "%s.pkl" % (transcript_file) self.transcript_map = snaputil.load_cpickle_file( transcript_pickle_file) if not self.transcript_map: self.load_transcripts(transcript_file) snaputil.store_cpickle_file(transcript_pickle_file, self.transcript_map)
def __init__(self,load_refseq=True,load_canonical=True,load_transcript=False): self.ensembl_id_patt = re.compile('(ENST\d+)') if load_refseq: gene_file = "%s/%s" % (snapconf.TABIX_DB_PATH,snapconf.REFSEQ_ANNOTATION) gene_pickle_file = "%s.pkl" % (gene_file) self.gene_map = snaputil.load_cpickle_file(gene_pickle_file) if not self.gene_map: self.load_gene_coords(gene_file) snaputil.store_cpickle_file(gene_pickle_file,self.gene_map) if load_canonical: canonical_gene_file = "%s/%s" % (snapconf.TABIX_DB_PATH,snapconf.CANONICAL_ANNOTATION) canonical_gene_pickle_file = "%s.pkl" % (canonical_gene_file) self.canonical_gene_map = snaputil.load_cpickle_file(canonical_gene_pickle_file) if not self.canonical_gene_map: self.load_canonical_gene_coords(canonical_gene_file) snaputil.store_cpickle_file(canonical_gene_pickle_file,self.canonical_gene_map) #per transcript exons if load_transcript: transcript_file = "%s/%s" % (snapconf.TABIX_DB_PATH,snapconf.TABIX_GENE_INTERVAL_DB) transcript_pickle_file = "%s.pkl" % (transcript_file) self.transcript_map = snaputil.load_cpickle_file(transcript_pickle_file) if not self.transcript_map: self.load_transcripts(transcript_file) snaputil.store_cpickle_file(transcript_pickle_file,self.transcript_map)