def main(): input_fname = sys.argv[1] out_fname = sys.argv[2] reader = BedGraphReader( open( input_fname ) ) # Fill array from reader d = array_tree_dict_from_reader( reader, {}, block_size=BLOCK_SIZE ) for array_tree in d.itervalues(): array_tree.root.build_summary() FileArrayTreeDict.dict_to_file( d, open( out_fname, "w" ) )
def setUp(self): tree = ArrayTree(10000, 10) # max value of 10000, each block has 10 numbers for i in range(5000): tree[i] = i # Insert extra copies to test frequency for i in range(3000): tree[i] = i tree.set_range(5000, 9001, 100) tree.root.build_summary() d = {'test': tree} f = tempfile.TemporaryFile() FileArrayTreeDict.dict_to_file(d, f) f.seek(0) self.filearraytreedict = FileArrayTreeDict(f) self.filearraytree = self.filearraytreedict['test']
def setUp(self): tree = ArrayTree(10000, 10) # max value of 10000, each block has 10 numbers for i in range(5000): tree[i] = i # Insert extra copies to test frequency for i in range(3000): tree[i] = i tree.set_range(5000, 9001, 100) tree.root.build_summary() d = {'test': tree} f = tempfile.TemporaryFile() FileArrayTreeDict.dict_to_file( d, f ) f.seek(0) self.filearraytreedict = FileArrayTreeDict(f) self.filearraytree = self.filearraytreedict['test']
def main(): sizes_fname = sys.argv[1] out_fname = sys.argv[2] sizes = {} for line in open(sizes_fname): fields = line.split() sizes[fields[0]] = int(fields[1]) # Fill array from wiggle d = array_tree_dict_from_reader(WiggleReader(sys.stdin), sizes) for value in d.values(): value.root.build_summary() with open(out_fname, "w") as f: FileArrayTreeDict.dict_to_file(d, f)
def test_big(self): tree = ArrayTree(2147483647, 1000) # What we use for tracks for i in range(5000): tree[i] = i # Insert extra copies to test frequency for i in range(3000): tree[i] = i tree.set_range(5000, 9001, 100) tree.set_range(14000000, 15000000, 200) tree.root.build_summary() d = {'test': tree} f = tempfile.TemporaryFile() FileArrayTreeDict.dict_to_file(d, f) f.seek(0) at = FileArrayTreeDict(f)['test'] lvl1 = at.get_summary(14000000, 1) avgs = [float(_) for _ in lvl1.sums/lvl1.counts] self.assertEqual(len(avgs), 1000) self.assertEqual(avgs, [200 for i in range(0, 1000)])
def test_big(self): tree = ArrayTree(2147483647, 1000) # What we use for tracks for i in range(5000): tree[i] = i # Insert extra copies to test frequency for i in range(3000): tree[i] = i tree.set_range(5000, 9001, 100) tree.set_range(14000000, 15000000, 200) tree.root.build_summary() d = {'test': tree} f = tempfile.TemporaryFile() FileArrayTreeDict.dict_to_file( d, f ) f.seek(0) at = FileArrayTreeDict(f)['test'] lvl1 = at.get_summary(14000000, 1) avgs = [float(_) for _ in lvl1.sums/lvl1.counts] self.assertEqual( len(avgs), 1000 ) self.assertEqual( avgs, [ 200 for i in range(0, 1000)] )
def get_stats(self, chrom): f = open(self.dataset.file_name) d = FileArrayTreeDict(f) try: chrom_array_tree = d[chrom] except KeyError: return "no data" root_summary = chrom_array_tree.get_summary(0, chrom_array_tree.levels) f.close() return { 'max': float(max(root_summary.maxs)), 'min': float(min(root_summary.mins)) }
def get_data(self, chrom, start, end, **kwargs): f = open(self.dataset.file_name) d = FileArrayTreeDict(f) # Get the right chromosome try: chrom_array_tree = d[chrom] except KeyError: return None block_size = chrom_array_tree.block_size start = int(start) end = int(end) resolution = max(1, ceil(float(kwargs['resolution']))) level = int(floor(log(resolution, block_size))) level = max(level, 0) stepsize = block_size**level step1 = stepsize * block_size # Is the requested level valid? assert 0 <= level <= chrom_array_tree.levels results = [] for block_start in range(start, end, stepsize * block_size): # print block_start # Return either data point or a summary depending on the level indexes = range(block_start, block_start + stepsize * block_size, stepsize) if level > 0: s = chrom_array_tree.get_summary(block_start, level) if s is not None: results.extend(zip(indexes, map(float, s.sums / s.counts))) else: v = chrom_array_tree.get_leaf(block_start) if v is not None: results.extend(zip(indexes, map(float, v))) f.close() return results