Exemple #1
0
def test_bedfile_to_multivec():
    runner = clt.CliRunner()
    input_file = op.join(testdir, "sample_data", "sample.multival.bed")
    chromsizes_file = op.join(testdir, "sample_data",
                              "sample.multival.chrom.sizes")

    with tempfile.TemporaryDirectory() as tmp_dir:
        out_file = op.join(tmp_dir, "out.multivec")

        _ = runner.invoke(
            ccc.bedfile_to_multivec,
            [
                input_file,
                "--output-file",
                out_file,
                "--assembly",
                "hg38",
                "--num-rows",
                3,
                "--chromsizes-filename",
                chromsizes_file,
                "--starting-resolution",
                "1000",
            ],
        )

        # import traceback
        # a, b, tb = result.exc_info

        # print("exc_info:", result.exc_info)
        # print("result:", result)
        # print("result.output", result.output)
        # print("result.error", traceback.print_tb(tb))
        # print("Exception:", a,b)

        tsinfo = ctv.tileset_info(out_file)
        # print("tsinfo:", tsinfo)

        assert "resolutions" in tsinfo
        assert tsinfo["max_pos"][0] == 18000
        tile = ctv.get_single_tile(out_file, (0, 0))

        # input_file:
        # chr1    0   1000    1.0 2.0 3.0
        # chr1    1000    2000
        # chr2    5000    6000    20.0    30.0    40.0
        #
        # # input chromsizes
        # chr1  10000
        # chr2    8000

        # first row, first chrom first value
        assert len(tile) == 3

        assert tile[0][0] == 1.0
        assert tile[0][15] == 20.0

        assert tile[1][0] == 2.0
        assert tile[2][0] == 3.0
        assert tile[2][15] == 40.0
Exemple #2
0
def test_multivec():
    filename = op.join("test/sample_data", "sample_gwas.multires.mv5")
    with h5py.File(filename, "r") as h5:
        tile_size = h5["info"].attrs["tile-size"]
        resolutions = list(h5["resolutions"].keys())
        reso = resolutions[0]
        chroms = h5[f"resolutions/{reso}/chroms/name"][:]
        num_rows = h5[f"resolutions/{reso}/values"][chroms[0]].shape[1]
        total_length = sum(h5["chroms/length"])
    # info
    info = hgmu.tileset_info(filename)
    assert info["shape"] == [tile_size, num_rows]
    assert info["tile_size"] == tile_size
    assert info["max_pos"] == total_length
    assert set(info["resolutions"]) == set(int(reso) for reso in resolutions)

    # get_single_tile
    test_tile = hgmu.get_single_tile(filename, [0, 0])
    assert list(test_tile.shape)[::-1] == info["shape"]
    with pytest.raises(IndexError):
        hgmu.get_single_tile(filename, [len(resolutions), 0])

    # tiles
    tids = [
        f"test_uuid.{level}.0.1231.123" for level in range(len(resolutions))
    ]
    tiles = hgmu.tiles(filename, tids)
    for tile_id, tile_value in tiles:
        tile_pos = [int(i) for i in tile_id.split(".")[1:3]]
        single_tile = hgmu.get_single_tile(filename, tile_pos).astype(
            tile_value["dtype"])
        assert (base64.b64encode(
            single_tile.ravel()).decode("utf-8") == tile_value["dense"])
def tileset_info(request):
    """Get information about a tileset

    Tilesets have information critical to their display
    such as the maximum number of dimensions and well as
    their width. This needs to be relayed to the client
    in order for it to know which tiles to request.

    Args:
        request (django.http.HTTPRequest): The request object
            containing tileset_ids in the 'd' parameter.
    Return:
        django.http.JsonResponse: A JSON object containing
            the tileset meta-information
    """
    queryset = tm.Tileset.objects.all()
    tileset_uuids = request.GET.getlist("d")
    tileset_infos = {}

    chromsizes_error = None

    if "cs" in request.GET:
        # we need to call a different server to get the tiles
        if not "ci" in request.GET.getlist:
            chromsizes_error = "cs param present without ci"

        # call the request server and get the chromsizes
        pass
    else:
        if "ci" in request.GET:
            try:
                chromsizes = tm.Tileset.objects.get(uuid=request.GET["ci"])
                data = tcs.chromsizes_array_to_series(
                    tcs.get_tsv_chromsizes(chromsizes.datafile.path)
                )
            except Exception as ex:
                pass

    for tileset_uuid in tileset_uuids:
        tileset_object = queryset.filter(uuid=tileset_uuid).first()

        if tileset_uuid == "osm-image":
            tileset_infos[tileset_uuid] = {
                "min_x": 0,
                "max_height": 134217728,
                "min_y": 0,
                "max_y": 134217728,
                "max_zoom": 19,
                "tile_size": 256,
            }
            continue

        if tileset_object is None:
            tileset_infos[tileset_uuid] = {
                "error": "No such tileset with uid: {}".format(tileset_uuid)
            }
            continue

        if tileset_object.private and request.user != tileset_object.owner:
            # dataset is not public
            tileset_infos[tileset_uuid] = {"error": "Forbidden"}
            continue

        if tileset_object.requiresAuthentication and not request.user.is_authenticated:
            # dataset is not public
            tileset_infos[tileset_uuid] = {
                "error": "This request required authentication"
            }
            continue
        
        #print(tileset_uuid,"accessibleTilesets" in request.session)
        if tileset_object.requiresAuthentication and (
            ("accessibleTilesets" not in request.session)
            or (tileset_uuid not in request.session["accessibleTilesets"])
        ):
            # dataset is not accessible for this user
            tileset_infos[tileset_uuid] = {
                "error": "You don't have access to this tileset"
            }
            continue

        if tileset_object.filetype == "hitile" or tileset_object.filetype == "hibed":
            tileset_info = hdft.get_tileset_info(
                h5py.File(tileset_object.datafile.path, "r")
            )
            tileset_infos[tileset_uuid] = {
                "min_pos": [int(tileset_info["min_pos"])],
                "max_pos": [int(tileset_info["max_pos"])],
                "max_width": 2
                ** math.ceil(
                    math.log(tileset_info["max_pos"] - tileset_info["min_pos"])
                    / math.log(2)
                ),
                "tile_size": int(tileset_info["tile_size"]),
                "max_zoom": int(tileset_info["max_zoom"]),
            }
        elif tileset_object.filetype == "bigwig":
            chromsizes = tgt.get_chromsizes(tileset_object)
            tsinfo = hgbi.tileset_info(tileset_object.datafile.path, chromsizes)
            # print('tsinfo:', tsinfo)
            if "chromsizes" in tsinfo:
                tsinfo["chromsizes"] = [(c, int(s)) for c, s in tsinfo["chromsizes"]]
            tileset_infos[tileset_uuid] = tsinfo
        elif tileset_object.filetype == "bigbed":
            chromsizes = tgt.get_chromsizes(tileset_object)
            tsinfo = hgbi.tileset_info(tileset_object.datafile.path, chromsizes)
            # print('tsinfo:', tsinfo)
            if "chromsizes" in tsinfo:
                tsinfo["chromsizes"] = [(c, int(s)) for c, s in tsinfo["chromsizes"]]
            tileset_infos[tileset_uuid] = tsinfo
        elif tileset_object.filetype == "multivec":
            tileset_infos[tileset_uuid] = hgmu.tileset_info(
                tileset_object.datafile.path
            )
        elif tileset_object.filetype == "elastic_search":
            response = urllib.urlopen(tileset_object.datafile + "/tileset_info")
            tileset_infos[tileset_uuid] = json.loads(response.read())
        elif tileset_object.filetype == "beddb":
            tileset_infos[tileset_uuid] = cdt.get_tileset_info(
                tileset_object.datafile.path
            )
        elif tileset_object.filetype == "bed2ddb":
            tileset_infos[tileset_uuid] = cdt.get_2d_tileset_info(
                tileset_object.datafile.path
            )
        elif tileset_object.filetype == "cooler":
            tileset_infos[tileset_uuid] = hgco.tileset_info(
                tileset_object.datafile.path
            )
        elif tileset_object.filetype == "time-interval-json":
            tileset_infos[tileset_uuid] = hgti.tileset_info(
                tileset_object.datafile.path
            )
        elif (
            tileset_object.filetype == "2dannodb"
            or tileset_object.filetype == "imtiles"
        ):
            tileset_infos[tileset_uuid] = hgim.get_tileset_info(
                tileset_object.datafile.path
            )
        elif tileset_object.filetype == "geodb":
            tileset_infos[tileset_uuid] = hggo.tileset_info(
                tileset_object.datafile.path
            )
        elif tileset_object.filetype == "bam":
            tileset_infos[tileset_uuid] = ctb.tileset_info(tileset_object.datafile.path)
            tileset_infos[tileset_uuid]["max_tile_width"] = hss.MAX_BAM_TILE_WIDTH
        else:
            # Unknown filetype
            tileset_infos[tileset_uuid] = {
                "error": "Unknown filetype " + tileset_object.filetype
            }

        tileset_infos[tileset_uuid]["name"] = tileset_object.name
        tileset_infos[tileset_uuid]["datatype"] = tileset_object.datatype
        tileset_infos[tileset_uuid]["coordSystem"] = tileset_object.coordSystem
        tileset_infos[tileset_uuid]["coordSystem2"] = tileset_object.coordSystem2

    return JsonResponse(tileset_infos)
Exemple #4
0
def tileset_info(request):
    ''' Get information about a tileset

    Tilesets have information critical to their display
    such as the maximum number of dimensions and well as
    their width. This needs to be relayed to the client
    in order for it to know which tiles to request.

    Args:
        request (django.http.HTTPRequest): The request object
            containing tileset_ids in the 'd' parameter.
    Return:
        django.http.JsonResponse: A JSON object containing
            the tileset meta-information
    '''
    queryset = tm.Tileset.objects.all()
    tileset_uuids = request.GET.getlist("d")
    tileset_infos = {}

    chromsizes_error = None

    if 'cs' in request.GET:
        # we need to call a different server to get the tiles
        if not 'ci' in request.GET.getlist:
            chromsizes_error = 'cs param present without ci'

        # call the request server and get the chromsizes
        pass
    else:
        if 'ci' in request.GET:
            try:
                chromsizes = tm.Tileset.objects.get(uuid=request.GET['ci'])
                data = tcs.chromsizes_array_to_series(
                        tcs.get_tsv_chromsizes(chromsizes.datafile.path))
            except Exception as ex:
                pass

    for tileset_uuid in tileset_uuids:
        tileset_object = queryset.filter(uuid=tileset_uuid).first()

        if tileset_uuid == 'osm-image':
            tileset_infos[tileset_uuid] = {
                'min_x': 0,
                'max_height': 134217728,
                'min_y': 0,
                'max_y': 134217728,
                'max_zoom': 19,
                'tile_size': 256
            }
            continue

        if tileset_object is None:
            tileset_infos[tileset_uuid] = {
                'error': 'No such tileset with uid: {}'.format(tileset_uuid)
            }
            continue

        if tileset_object.private and request.user != tileset_object.owner:
            # dataset is not public
            tileset_infos[tileset_uuid] = {'error': "Forbidden"}
            continue

        if (
            tileset_object.filetype == 'hitile' or
            tileset_object.filetype == 'hibed'
        ):
            tileset_info = hdft.get_tileset_info(
                h5py.File(tileset_object.datafile.path, 'r'))
            tileset_infos[tileset_uuid] = {
                "min_pos": [int(tileset_info['min_pos'])],
                "max_pos": [int(tileset_info['max_pos'])],
                "max_width": 2 ** math.ceil(
                    math.log(
                        tileset_info['max_pos'] - tileset_info['min_pos']
                    ) / math.log(2)
                ),
                "tile_size": int(tileset_info['tile_size']),
                "max_zoom": int(tileset_info['max_zoom'])
            }
        elif tileset_object.filetype == 'bigwig':
            chromsizes = tgt.get_chromsizes(tileset_object)
            tsinfo = hgbi.tileset_info(
                    tileset_object.datafile.path,
                    chromsizes
                )
            #print('tsinfo:', tsinfo)
            if 'chromsizes' in tsinfo:
                tsinfo['chromsizes'] = [(c, int(s)) for c,s in tsinfo['chromsizes']]
            tileset_infos[tileset_uuid] = tsinfo
        elif tileset_object.filetype == 'bigbed':
            chromsizes = tgt.get_chromsizes(tileset_object)
            tsinfo = hgbi.tileset_info(
                    tileset_object.datafile.path,
                    chromsizes
                )
            #print('tsinfo:', tsinfo)
            if 'chromsizes' in tsinfo:
                tsinfo['chromsizes'] = [(c, int(s)) for c,s in tsinfo['chromsizes']]
            tileset_infos[tileset_uuid] = tsinfo
        elif tileset_object.filetype == 'multivec':
            tileset_infos[tileset_uuid] = hgmu.tileset_info(
                    tileset_object.datafile.path)
        elif tileset_object.filetype == "elastic_search":
            response = urllib.urlopen(
                tileset_object.datafile + "/tileset_info")
            tileset_infos[tileset_uuid] = json.loads(response.read())
        elif tileset_object.filetype == 'beddb':
            tileset_infos[tileset_uuid] = cdt.get_tileset_info(
                tileset_object.datafile.path
            )
        elif tileset_object.filetype == 'bed2ddb':
            tileset_infos[tileset_uuid] = cdt.get_2d_tileset_info(
                tileset_object.datafile.path
            )
        elif tileset_object.filetype == 'cooler':
            tileset_infos[tileset_uuid] = hgco.tileset_info(
                    tileset_object.datafile.path
            )
        elif tileset_object.filetype == 'time-interval-json':
            tileset_infos[tileset_uuid] = hgti.tileset_info(
                    tileset_object.datafile.path
            )
        elif (
            tileset_object.filetype == '2dannodb' or
            tileset_object.filetype == 'imtiles'
        ):
            tileset_infos[tileset_uuid] = hgim.get_tileset_info(
                tileset_object.datafile.path
            )
        elif tileset_object.filetype == 'geodb':
            tileset_infos[tileset_uuid] = hggo.tileset_info(
                tileset_object.datafile.path
            )
        elif tileset_object.filetype == 'bam':
            tileset_infos[tileset_uuid] = ctb.tileset_info(
                tileset_object.datafile.path
            )
            tileset_infos[tileset_uuid]['max_tile_width'] = hss.MAX_BAM_TILE_WIDTH
        else:
            # Unknown filetype
            tileset_infos[tileset_uuid] = {
                'error': 'Unknown filetype ' + tileset_object.filetype
            }

        tileset_infos[tileset_uuid]['name'] = tileset_object.name
        tileset_infos[tileset_uuid]['datatype'] = tileset_object.datatype
        tileset_infos[tileset_uuid]['coordSystem'] = tileset_object.coordSystem
        tileset_infos[tileset_uuid]['coordSystem2'] =\
            tileset_object.coordSystem2

    return JsonResponse(tileset_infos)
Exemple #5
0
def test_multivec():
    filename = op.join('data', 'all.KL.bed.multires.mv5')

    tsinfo = hgmu.tileset_info(filename)