def test_bedfile_to_multivec(): runner = clt.CliRunner() input_file = op.join(testdir, "sample_data", "sample.multival.bed") chromsizes_file = op.join(testdir, "sample_data", "sample.multival.chrom.sizes") with tempfile.TemporaryDirectory() as tmp_dir: out_file = op.join(tmp_dir, "out.multivec") _ = runner.invoke( ccc.bedfile_to_multivec, [ input_file, "--output-file", out_file, "--assembly", "hg38", "--num-rows", 3, "--chromsizes-filename", chromsizes_file, "--starting-resolution", "1000", ], ) # import traceback # a, b, tb = result.exc_info # print("exc_info:", result.exc_info) # print("result:", result) # print("result.output", result.output) # print("result.error", traceback.print_tb(tb)) # print("Exception:", a,b) tsinfo = ctv.tileset_info(out_file) # print("tsinfo:", tsinfo) assert "resolutions" in tsinfo assert tsinfo["max_pos"][0] == 18000 tile = ctv.get_single_tile(out_file, (0, 0)) # input_file: # chr1 0 1000 1.0 2.0 3.0 # chr1 1000 2000 # chr2 5000 6000 20.0 30.0 40.0 # # # input chromsizes # chr1 10000 # chr2 8000 # first row, first chrom first value assert len(tile) == 3 assert tile[0][0] == 1.0 assert tile[0][15] == 20.0 assert tile[1][0] == 2.0 assert tile[2][0] == 3.0 assert tile[2][15] == 40.0
def test_multivec(): filename = op.join("test/sample_data", "sample_gwas.multires.mv5") with h5py.File(filename, "r") as h5: tile_size = h5["info"].attrs["tile-size"] resolutions = list(h5["resolutions"].keys()) reso = resolutions[0] chroms = h5[f"resolutions/{reso}/chroms/name"][:] num_rows = h5[f"resolutions/{reso}/values"][chroms[0]].shape[1] total_length = sum(h5["chroms/length"]) # info info = hgmu.tileset_info(filename) assert info["shape"] == [tile_size, num_rows] assert info["tile_size"] == tile_size assert info["max_pos"] == total_length assert set(info["resolutions"]) == set(int(reso) for reso in resolutions) # get_single_tile test_tile = hgmu.get_single_tile(filename, [0, 0]) assert list(test_tile.shape)[::-1] == info["shape"] with pytest.raises(IndexError): hgmu.get_single_tile(filename, [len(resolutions), 0]) # tiles tids = [ f"test_uuid.{level}.0.1231.123" for level in range(len(resolutions)) ] tiles = hgmu.tiles(filename, tids) for tile_id, tile_value in tiles: tile_pos = [int(i) for i in tile_id.split(".")[1:3]] single_tile = hgmu.get_single_tile(filename, tile_pos).astype( tile_value["dtype"]) assert (base64.b64encode( single_tile.ravel()).decode("utf-8") == tile_value["dense"])
def tileset_info(request): """Get information about a tileset Tilesets have information critical to their display such as the maximum number of dimensions and well as their width. This needs to be relayed to the client in order for it to know which tiles to request. Args: request (django.http.HTTPRequest): The request object containing tileset_ids in the 'd' parameter. Return: django.http.JsonResponse: A JSON object containing the tileset meta-information """ queryset = tm.Tileset.objects.all() tileset_uuids = request.GET.getlist("d") tileset_infos = {} chromsizes_error = None if "cs" in request.GET: # we need to call a different server to get the tiles if not "ci" in request.GET.getlist: chromsizes_error = "cs param present without ci" # call the request server and get the chromsizes pass else: if "ci" in request.GET: try: chromsizes = tm.Tileset.objects.get(uuid=request.GET["ci"]) data = tcs.chromsizes_array_to_series( tcs.get_tsv_chromsizes(chromsizes.datafile.path) ) except Exception as ex: pass for tileset_uuid in tileset_uuids: tileset_object = queryset.filter(uuid=tileset_uuid).first() if tileset_uuid == "osm-image": tileset_infos[tileset_uuid] = { "min_x": 0, "max_height": 134217728, "min_y": 0, "max_y": 134217728, "max_zoom": 19, "tile_size": 256, } continue if tileset_object is None: tileset_infos[tileset_uuid] = { "error": "No such tileset with uid: {}".format(tileset_uuid) } continue if tileset_object.private and request.user != tileset_object.owner: # dataset is not public tileset_infos[tileset_uuid] = {"error": "Forbidden"} continue if tileset_object.requiresAuthentication and not request.user.is_authenticated: # dataset is not public tileset_infos[tileset_uuid] = { "error": "This request required authentication" } continue #print(tileset_uuid,"accessibleTilesets" in request.session) if tileset_object.requiresAuthentication and ( ("accessibleTilesets" not in request.session) or (tileset_uuid not in request.session["accessibleTilesets"]) ): # dataset is not accessible for this user tileset_infos[tileset_uuid] = { "error": "You don't have access to this tileset" } continue if tileset_object.filetype == "hitile" or tileset_object.filetype == "hibed": tileset_info = hdft.get_tileset_info( h5py.File(tileset_object.datafile.path, "r") ) tileset_infos[tileset_uuid] = { "min_pos": [int(tileset_info["min_pos"])], "max_pos": [int(tileset_info["max_pos"])], "max_width": 2 ** math.ceil( math.log(tileset_info["max_pos"] - tileset_info["min_pos"]) / math.log(2) ), "tile_size": int(tileset_info["tile_size"]), "max_zoom": int(tileset_info["max_zoom"]), } elif tileset_object.filetype == "bigwig": chromsizes = tgt.get_chromsizes(tileset_object) tsinfo = hgbi.tileset_info(tileset_object.datafile.path, chromsizes) # print('tsinfo:', tsinfo) if "chromsizes" in tsinfo: tsinfo["chromsizes"] = [(c, int(s)) for c, s in tsinfo["chromsizes"]] tileset_infos[tileset_uuid] = tsinfo elif tileset_object.filetype == "bigbed": chromsizes = tgt.get_chromsizes(tileset_object) tsinfo = hgbi.tileset_info(tileset_object.datafile.path, chromsizes) # print('tsinfo:', tsinfo) if "chromsizes" in tsinfo: tsinfo["chromsizes"] = [(c, int(s)) for c, s in tsinfo["chromsizes"]] tileset_infos[tileset_uuid] = tsinfo elif tileset_object.filetype == "multivec": tileset_infos[tileset_uuid] = hgmu.tileset_info( tileset_object.datafile.path ) elif tileset_object.filetype == "elastic_search": response = urllib.urlopen(tileset_object.datafile + "/tileset_info") tileset_infos[tileset_uuid] = json.loads(response.read()) elif tileset_object.filetype == "beddb": tileset_infos[tileset_uuid] = cdt.get_tileset_info( tileset_object.datafile.path ) elif tileset_object.filetype == "bed2ddb": tileset_infos[tileset_uuid] = cdt.get_2d_tileset_info( tileset_object.datafile.path ) elif tileset_object.filetype == "cooler": tileset_infos[tileset_uuid] = hgco.tileset_info( tileset_object.datafile.path ) elif tileset_object.filetype == "time-interval-json": tileset_infos[tileset_uuid] = hgti.tileset_info( tileset_object.datafile.path ) elif ( tileset_object.filetype == "2dannodb" or tileset_object.filetype == "imtiles" ): tileset_infos[tileset_uuid] = hgim.get_tileset_info( tileset_object.datafile.path ) elif tileset_object.filetype == "geodb": tileset_infos[tileset_uuid] = hggo.tileset_info( tileset_object.datafile.path ) elif tileset_object.filetype == "bam": tileset_infos[tileset_uuid] = ctb.tileset_info(tileset_object.datafile.path) tileset_infos[tileset_uuid]["max_tile_width"] = hss.MAX_BAM_TILE_WIDTH else: # Unknown filetype tileset_infos[tileset_uuid] = { "error": "Unknown filetype " + tileset_object.filetype } tileset_infos[tileset_uuid]["name"] = tileset_object.name tileset_infos[tileset_uuid]["datatype"] = tileset_object.datatype tileset_infos[tileset_uuid]["coordSystem"] = tileset_object.coordSystem tileset_infos[tileset_uuid]["coordSystem2"] = tileset_object.coordSystem2 return JsonResponse(tileset_infos)
def tileset_info(request): ''' Get information about a tileset Tilesets have information critical to their display such as the maximum number of dimensions and well as their width. This needs to be relayed to the client in order for it to know which tiles to request. Args: request (django.http.HTTPRequest): The request object containing tileset_ids in the 'd' parameter. Return: django.http.JsonResponse: A JSON object containing the tileset meta-information ''' queryset = tm.Tileset.objects.all() tileset_uuids = request.GET.getlist("d") tileset_infos = {} chromsizes_error = None if 'cs' in request.GET: # we need to call a different server to get the tiles if not 'ci' in request.GET.getlist: chromsizes_error = 'cs param present without ci' # call the request server and get the chromsizes pass else: if 'ci' in request.GET: try: chromsizes = tm.Tileset.objects.get(uuid=request.GET['ci']) data = tcs.chromsizes_array_to_series( tcs.get_tsv_chromsizes(chromsizes.datafile.path)) except Exception as ex: pass for tileset_uuid in tileset_uuids: tileset_object = queryset.filter(uuid=tileset_uuid).first() if tileset_uuid == 'osm-image': tileset_infos[tileset_uuid] = { 'min_x': 0, 'max_height': 134217728, 'min_y': 0, 'max_y': 134217728, 'max_zoom': 19, 'tile_size': 256 } continue if tileset_object is None: tileset_infos[tileset_uuid] = { 'error': 'No such tileset with uid: {}'.format(tileset_uuid) } continue if tileset_object.private and request.user != tileset_object.owner: # dataset is not public tileset_infos[tileset_uuid] = {'error': "Forbidden"} continue if ( tileset_object.filetype == 'hitile' or tileset_object.filetype == 'hibed' ): tileset_info = hdft.get_tileset_info( h5py.File(tileset_object.datafile.path, 'r')) tileset_infos[tileset_uuid] = { "min_pos": [int(tileset_info['min_pos'])], "max_pos": [int(tileset_info['max_pos'])], "max_width": 2 ** math.ceil( math.log( tileset_info['max_pos'] - tileset_info['min_pos'] ) / math.log(2) ), "tile_size": int(tileset_info['tile_size']), "max_zoom": int(tileset_info['max_zoom']) } elif tileset_object.filetype == 'bigwig': chromsizes = tgt.get_chromsizes(tileset_object) tsinfo = hgbi.tileset_info( tileset_object.datafile.path, chromsizes ) #print('tsinfo:', tsinfo) if 'chromsizes' in tsinfo: tsinfo['chromsizes'] = [(c, int(s)) for c,s in tsinfo['chromsizes']] tileset_infos[tileset_uuid] = tsinfo elif tileset_object.filetype == 'bigbed': chromsizes = tgt.get_chromsizes(tileset_object) tsinfo = hgbi.tileset_info( tileset_object.datafile.path, chromsizes ) #print('tsinfo:', tsinfo) if 'chromsizes' in tsinfo: tsinfo['chromsizes'] = [(c, int(s)) for c,s in tsinfo['chromsizes']] tileset_infos[tileset_uuid] = tsinfo elif tileset_object.filetype == 'multivec': tileset_infos[tileset_uuid] = hgmu.tileset_info( tileset_object.datafile.path) elif tileset_object.filetype == "elastic_search": response = urllib.urlopen( tileset_object.datafile + "/tileset_info") tileset_infos[tileset_uuid] = json.loads(response.read()) elif tileset_object.filetype == 'beddb': tileset_infos[tileset_uuid] = cdt.get_tileset_info( tileset_object.datafile.path ) elif tileset_object.filetype == 'bed2ddb': tileset_infos[tileset_uuid] = cdt.get_2d_tileset_info( tileset_object.datafile.path ) elif tileset_object.filetype == 'cooler': tileset_infos[tileset_uuid] = hgco.tileset_info( tileset_object.datafile.path ) elif tileset_object.filetype == 'time-interval-json': tileset_infos[tileset_uuid] = hgti.tileset_info( tileset_object.datafile.path ) elif ( tileset_object.filetype == '2dannodb' or tileset_object.filetype == 'imtiles' ): tileset_infos[tileset_uuid] = hgim.get_tileset_info( tileset_object.datafile.path ) elif tileset_object.filetype == 'geodb': tileset_infos[tileset_uuid] = hggo.tileset_info( tileset_object.datafile.path ) elif tileset_object.filetype == 'bam': tileset_infos[tileset_uuid] = ctb.tileset_info( tileset_object.datafile.path ) tileset_infos[tileset_uuid]['max_tile_width'] = hss.MAX_BAM_TILE_WIDTH else: # Unknown filetype tileset_infos[tileset_uuid] = { 'error': 'Unknown filetype ' + tileset_object.filetype } tileset_infos[tileset_uuid]['name'] = tileset_object.name tileset_infos[tileset_uuid]['datatype'] = tileset_object.datatype tileset_infos[tileset_uuid]['coordSystem'] = tileset_object.coordSystem tileset_infos[tileset_uuid]['coordSystem2'] =\ tileset_object.coordSystem2 return JsonResponse(tileset_infos)
def test_multivec(): filename = op.join('data', 'all.KL.bed.multires.mv5') tsinfo = hgmu.tileset_info(filename)