def read_dir(self, d): lookup = dict() scores_by_what = dict() if not os.path.exists(d): for p in DATA_SEARCH_PATH: if os.path.exists(os.path.join(p, d)): d = os.path.join(p, d) break for file in os.listdir(d): if file.endswith(".match"): for line in (open(os.path.join(d, file))): if line.startswith("#"): continue line = line.strip() f = line.split() if len(f) >= 4: chr, start, end, what = f[0:4] else: continue if not chr in lookup: lookup[chr] = intervals.Intersecter() lookup[chr].add_interval( intervals.Interval(int(start), int(end), what)) else: k = file.split('.')[0] if not k in scores_by_what: scores_by_what[k] = FileBinnedArray( open(os.path.join(d, file))) if lookup == {}: self.lookup = None self.scores = scores_by_what #return None, scores_by_what else: self.lookup = lookup self.scores = scores_by_what
def __init__(self, qualfiles=None, qualspecies=None, minqual=None, mask="?", cache=100): if not qualfiles: raise Exception("No quality files.") if not qualspecies: raise Exception("No species dictionary.") if not minqual: raise Exception("No minimum quality specified.") self.mask = "?" self.minqual = minqual self.mask = mask self.total = 0 self.masked = 0 self.qualfiles = qualfiles self.qualspecies = qualspecies self.cache = cache * 2 # typical bin size is 512K # load quality files into FileBinnedArray self.qualities = {} for species, qualfile in self.qualfiles.items(): specdict = {} for chrom in self.qualspecies[species]: specdict[chrom] = FileBinnedArray( \ open(qualfile + "." + chrom + ".bqv", "rb"), \ cache = self.cache/len(qualfiles) ) self.qualities[species] = specdict
def __getitem__(self, key): value = None if key in self.cache: value = self.cache[key] else: fname = os.path.join(self.dir, "%s.ba" % key) if os.path.exists(fname): value = FileBinnedArray(open(fname)) self.cache[key] = value if value is None: raise KeyError("File does not exist: " + fname) return value