Exemple #1
0
def main():
    input_fname = sys.argv[1]
    out_fname = sys.argv[2]

    reader = BedGraphReader( open( input_fname ) )

    # Fill array from reader
    d = array_tree_dict_from_reader( reader, {}, block_size=BLOCK_SIZE )

    for array_tree in d.itervalues():
        array_tree.root.build_summary()

    FileArrayTreeDict.dict_to_file( d, open( out_fname, "w" ) )
    def setUp(self):
        tree = ArrayTree(10000, 10)  # max value of 10000, each block has 10 numbers
        for i in range(5000):
            tree[i] = i

        # Insert extra copies to test frequency
        for i in range(3000):
            tree[i] = i

        tree.set_range(5000, 9001, 100)
        tree.root.build_summary()

        d = {'test': tree}
        f = tempfile.TemporaryFile()
        FileArrayTreeDict.dict_to_file(d, f)
        f.seek(0)
        self.filearraytreedict = FileArrayTreeDict(f)
        self.filearraytree = self.filearraytreedict['test']
 def setUp(self):
     tree = ArrayTree(10000, 10) # max value of 10000, each block has 10 numbers
     for i in range(5000):
         tree[i] = i
     
     # Insert extra copies to test frequency
     for i in range(3000):
         tree[i] = i
     
     tree.set_range(5000, 9001, 100)
     tree.root.build_summary()
     
     d = {'test': tree}
     f = tempfile.TemporaryFile()
     FileArrayTreeDict.dict_to_file( d, f )
     f.seek(0)
     self.filearraytreedict = FileArrayTreeDict(f)
     self.filearraytree = self.filearraytreedict['test']
def main():

    sizes_fname = sys.argv[1]
    out_fname = sys.argv[2]

    sizes = {}
    for line in open(sizes_fname):
        fields = line.split()
        sizes[fields[0]] = int(fields[1])

    # Fill array from wiggle
    d = array_tree_dict_from_reader(WiggleReader(sys.stdin), sizes)

    for value in d.values():
        value.root.build_summary()

    with open(out_fname, "w") as f:
        FileArrayTreeDict.dict_to_file(d, f)
    def test_big(self):
        tree = ArrayTree(2147483647, 1000)  # What we use for tracks
        for i in range(5000):
            tree[i] = i

        # Insert extra copies to test frequency
        for i in range(3000):
            tree[i] = i

        tree.set_range(5000, 9001, 100)
        tree.set_range(14000000, 15000000, 200)
        tree.root.build_summary()

        d = {'test': tree}
        f = tempfile.TemporaryFile()
        FileArrayTreeDict.dict_to_file(d, f)
        f.seek(0)
        at = FileArrayTreeDict(f)['test']

        lvl1 = at.get_summary(14000000, 1)
        avgs = [float(_) for _ in lvl1.sums/lvl1.counts]
        self.assertEqual(len(avgs), 1000)
        self.assertEqual(avgs, [200 for i in range(0, 1000)])
 def test_big(self):
     tree = ArrayTree(2147483647, 1000) # What we use for tracks
     for i in range(5000):
         tree[i] = i
     
     # Insert extra copies to test frequency
     for i in range(3000):
         tree[i] = i
     
     tree.set_range(5000, 9001, 100)
     tree.set_range(14000000, 15000000, 200)
     tree.root.build_summary()
     
     d = {'test': tree}
     f = tempfile.TemporaryFile()
     FileArrayTreeDict.dict_to_file( d, f )
     f.seek(0)
     at = FileArrayTreeDict(f)['test']
     
     lvl1 = at.get_summary(14000000, 1)
     avgs = [float(_) for _ in lvl1.sums/lvl1.counts]
     self.assertEqual( len(avgs), 1000 )
     self.assertEqual( avgs, [ 200 for i in range(0, 1000)] )
Exemple #7
0
    def get_stats(self, chrom):
        f = open(self.dataset.file_name)
        d = FileArrayTreeDict(f)
        try:
            chrom_array_tree = d[chrom]
        except KeyError:
            return "no data"

        root_summary = chrom_array_tree.get_summary(0, chrom_array_tree.levels)
        f.close()
        return {
            'max': float(max(root_summary.maxs)),
            'min': float(min(root_summary.mins))
        }
Exemple #8
0
    def get_data(self, chrom, start, end, **kwargs):
        f = open(self.dataset.file_name)
        d = FileArrayTreeDict(f)

        # Get the right chromosome
        try:
            chrom_array_tree = d[chrom]
        except KeyError:
            return None

        block_size = chrom_array_tree.block_size
        start = int(start)
        end = int(end)
        resolution = max(1, ceil(float(kwargs['resolution'])))

        level = int(floor(log(resolution, block_size)))
        level = max(level, 0)
        stepsize = block_size**level
        step1 = stepsize * block_size

        # Is the requested level valid?
        assert 0 <= level <= chrom_array_tree.levels

        results = []
        for block_start in range(start, end, stepsize * block_size):
            # print block_start
            # Return either data point or a summary depending on the level
            indexes = range(block_start, block_start + stepsize * block_size,
                            stepsize)
            if level > 0:
                s = chrom_array_tree.get_summary(block_start, level)
                if s is not None:
                    results.extend(zip(indexes, map(float, s.sums / s.counts)))
            else:
                v = chrom_array_tree.get_leaf(block_start)
                if v is not None:
                    results.extend(zip(indexes, map(float, v)))

        f.close()
        return results