Esempio n. 1
0
def load_sample_metadata(file_):
    start = time.time()
    fmd=snaputil.load_cpickle_file("%s.pkl" % (file_))
    if fmd:
        end = time.time()
        taken = end-start
        return fmd
    start = time.time()
    fmd={}
    #dont need the hash-on-column headers just yet
    with open(file_,"r") as f:
       for line in f:
           line = line.rstrip()
           fields=line.split("\t")
           fmd[fields[0]]=line
    end = time.time()
    taken = end-start
    #sys.stderr.write("time taken to load samples from normal: %d\n" % taken)
    snaputil.store_cpickle_file("%s.pkl" % (file_),fmd)
    return fmd
Esempio n. 2
0
def pack_sample_mapped_ids(line):
    (sid, ids) = line.rstrip().split("|")
    ba = bitarray()
    i = 0
    last_y = -1
    ids_ = ids.split(",")
    ids_.pop()
    for y in sorted([int(x) for x in ids_]):
        last_y = y
        while i < y:
            ba.append(False)
            i += 1
        ba.append(True)
        i += 1
    while i <= MAX_ID:
        ba.append(False)
        i += 1
    #su.store_cpickle_file("./sample_ids1_%s.pkl" % (k),ba, compress=True)
    su.store_cpickle_file("%s/%s.pkl" % (path, sid), ba, compress=False)
    return True
Esempio n. 3
0
def pack_sample_mapped_ids(line):    
    (sid,ids) = line.rstrip().split("|")
    ba=bitarray()
    i = 0
    last_y = -1
    ids_ = ids.split(",")
    ids_.pop()
    for y in sorted([int(x) for x in ids_]):
        last_y = y
        while i < y:
            ba.append(False)
            i+=1
        ba.append(True)
        i+=1
    while i <= MAX_ID:
        ba.append(False)
        i+=1
    #su.store_cpickle_file("./sample_ids1_%s.pkl" % (k),ba, compress=True)
    su.store_cpickle_file("%s/%s.pkl" % (path,sid), ba, compress=False)
    return True
Esempio n. 4
0
def load_sample_metadata(file_):
    start = time.time()
    fmd=snaputil.load_cpickle_file("%s.pkl" % (file_))
    if fmd:
        end = time.time()
        taken = end-start
        return fmd
    start = time.time()
    fmd={}
    #dont need the hash-on-column headers just yet
    with open(file_,"r") as f:
       for line in f:
           line = line.rstrip()
           fields=line.split("\t")
           fmd[fields[0]]=line
    end = time.time()
    taken = end-start
    #sys.stderr.write("time taken to load samples from normal: %d\n" % taken)
    snaputil.store_cpickle_file("%s.pkl" % (file_),fmd)
    return fmd
Esempio n. 5
0
    def __init__(self,
                 load_refseq=True,
                 load_canonical=True,
                 load_transcript=False):
        self.ensembl_id_patt = re.compile('(ENST\d+)')
        if load_refseq:
            gene_file = "%s/%s" % (snapconf.TABIX_DB_PATH,
                                   snapconf.REFSEQ_ANNOTATION)
            gene_pickle_file = "%s.pkl" % (gene_file)
            self.gene_map = snaputil.load_cpickle_file(gene_pickle_file)
            if not self.gene_map:
                self.load_gene_coords(gene_file)
            snaputil.store_cpickle_file(gene_pickle_file, self.gene_map)
        if load_canonical:
            canonical_gene_file = "%s/%s" % (snapconf.TABIX_DB_PATH,
                                             snapconf.CANONICAL_ANNOTATION)
            canonical_gene_pickle_file = "%s.pkl" % (canonical_gene_file)
            self.canonical_gene_map = snaputil.load_cpickle_file(
                canonical_gene_pickle_file)
            if not self.canonical_gene_map:
                self.load_canonical_gene_coords(canonical_gene_file)
            snaputil.store_cpickle_file(canonical_gene_pickle_file,
                                        self.canonical_gene_map)

        #per transcript exons
        if load_transcript:
            transcript_file = "%s/%s" % (snapconf.TABIX_DB_PATH,
                                         snapconf.TABIX_GENE_INTERVAL_DB)
            transcript_pickle_file = "%s.pkl" % (transcript_file)
            self.transcript_map = snaputil.load_cpickle_file(
                transcript_pickle_file)
            if not self.transcript_map:
                self.load_transcripts(transcript_file)
            snaputil.store_cpickle_file(transcript_pickle_file,
                                        self.transcript_map)
Esempio n. 6
0
 def __init__(self,load_refseq=True,load_canonical=True,load_transcript=False):
     self.ensembl_id_patt = re.compile('(ENST\d+)')
     if load_refseq:
         gene_file = "%s/%s" % (snapconf.TABIX_DB_PATH,snapconf.REFSEQ_ANNOTATION)
         gene_pickle_file = "%s.pkl" % (gene_file)
         self.gene_map = snaputil.load_cpickle_file(gene_pickle_file)
         if not self.gene_map:
             self.load_gene_coords(gene_file)
         snaputil.store_cpickle_file(gene_pickle_file,self.gene_map)
     if load_canonical:
         canonical_gene_file = "%s/%s" % (snapconf.TABIX_DB_PATH,snapconf.CANONICAL_ANNOTATION)
         canonical_gene_pickle_file = "%s.pkl" % (canonical_gene_file)
         self.canonical_gene_map = snaputil.load_cpickle_file(canonical_gene_pickle_file)
         if not self.canonical_gene_map:
             self.load_canonical_gene_coords(canonical_gene_file)
         snaputil.store_cpickle_file(canonical_gene_pickle_file,self.canonical_gene_map)
    
     #per transcript exons
     if load_transcript:
         transcript_file = "%s/%s" % (snapconf.TABIX_DB_PATH,snapconf.TABIX_GENE_INTERVAL_DB)
         transcript_pickle_file = "%s.pkl" % (transcript_file)
         self.transcript_map = snaputil.load_cpickle_file(transcript_pickle_file)
         if not self.transcript_map:
             self.load_transcripts(transcript_file)
         snaputil.store_cpickle_file(transcript_pickle_file,self.transcript_map)