def test_bigbed_tiles(): filename = op.join( "data", "masterlist_DHSs_733samples_WM20180608_all_mean_signal_colorsMax.bed.bb" ) tileset_info = hgbb.tileset_info(filename) num_chroms = len(tileset_info["chromsizes"]) base_tile = hgbb.tiles(filename, ["x.0.0"]) base_tile_data = base_tile[0][1] assert (num_chroms * hgbb.MAX_ELEMENTS) == len( base_tile_data ), "Number of chromosomes in bigBed file was {}\nExpected: {}\n".format( len(base_tile_data), (num_chroms * hgbb.MAX_ELEMENTS)) signif_tile = hgbb.tiles(filename, ["x.0.0.significant"]) signif_tile_data = signif_tile[0][1] assert (num_chroms * hgbb.MAX_ELEMENTS) == len( signif_tile_data ), "Number of chromosomes in bigBed file was {}\nExpected: {}\n".format( len(signif_tile_data), (num_chroms * hgbb.MAX_ELEMENTS)) max_elements = [5, 10, 25, 100] for me in max_elements: max_elems_tile = hgbb.tiles(filename, ["x.0.0|max:{}".format(me)]) max_elems_tile_data = max_elems_tile[0][1] for mtd in max_elems_tile_data: assert len(mtd) <= ( me * hgbb.MAX_ELEMENTS ), "Number of observed elements with max element threshold was {}\nExpected: {}\n".format( len(mtd), (me * hgbb.MAX_ELEMENTS)) min_max_element_ranges = [(1, 5), (5, 10)] for mmer in min_max_element_ranges: min_element = mmer[0] max_element = mmer[1] min_max_elems_tile = hgbb.tiles( filename, ["x.0.0|min:{}|max:{}".format(min_element, max_element)]) min_max_elems_tile_data = min_max_elems_tile[0][1] mmtdl = len(min_max_elems_tile_data) assert ( mmtdl >= min_element and mmtdl <= max_element * hgbb.MAX_ELEMENTS ), "Number of observed elements with min/max threshold was {}\nExpected: {} to {}\nAll data length: {}\nAll data: {}\n".format( mmtdl, min_element, max_element, mmtdl, min_max_elems_tile_data)
def generate_tiles(tileset_tile_ids): ''' Generate a tiles for the give tile_ids. All of the tile_ids must come from the same tileset. This function will determine the appropriate handler this tile given the tileset's filetype and datatype Parameters ---------- tileset_tile_ids: tuple A four-tuple containing the following parameters. tileset: tilesets.models.Tileset object The tileset that the tile ids should be retrieved from tile_ids: [str,...] A list of tile_ids (e.g. xyx.0.0.1) identifying the tiles to be retrieved raw: str or False The value of the GET request parameter `raw`. tileset_options: dict or None An optional dict containing tileset options, including aggregation options. Returns ------- tile_list: [(tile_id, tile_data),...] A list of tile_id, tile_data tuples ''' tileset, tile_ids, raw, tileset_options = tileset_tile_ids if tileset.filetype == 'hitile': return generate_hitile_tiles(tileset, tile_ids) elif tileset.filetype == 'beddb': return hgbe.tiles(tileset.datafile.path, tile_ids) elif tileset.filetype == 'bed2ddb' or tileset.filetype == '2dannodb': return generate_bed2ddb_tiles(tileset, tile_ids) elif tileset.filetype == 'geodb': return generate_bed2ddb_tiles(tileset, tile_ids, hggo.get_tiles) elif tileset.filetype == 'hibed': return generate_hibed_tiles(tileset, tile_ids) elif tileset.filetype == 'cooler': return hgco.generate_tiles(tileset.datafile.path, tile_ids) elif tileset.filetype == 'bigwig': chromsizes = get_chromsizes(tileset) return hgbi.tiles(tileset.datafile.path, tile_ids, chromsizes=chromsizes) elif tileset.filetype == 'bigbed': chromsizes = get_chromsizes(tileset) return hgbb.tiles(tileset.datafile.path, tile_ids, chromsizes=chromsizes) elif tileset.filetype == 'multivec': return generate_1d_tiles(tileset.datafile.path, tile_ids, ctmu.get_single_tile, tileset_options) elif tileset.filetype == 'imtiles': return hgim.get_tiles(tileset.datafile.path, tile_ids, raw) elif tileset.filetype == 'bam': return ctb.tiles(tileset.datafile.path, tile_ids, index_filename=tileset.indexfile.path, max_tile_width=hss.MAX_BAM_TILE_WIDTH) else: filetype = tileset.filetype filepath = tileset.datafile.path if filetype in hgti.by_filetype: return hgti.by_filetype[filetype](filepath).tiles(tile_ids) return [(ti, { 'error': 'Unknown tileset filetype: {}'.format(tileset.filetype) }) for ti in tile_ids]