Esempio n. 1
0
def test_bigbed_tiles():
    filename = op.join(
        "data",
        "masterlist_DHSs_733samples_WM20180608_all_mean_signal_colorsMax.bed.bb"
    )

    tileset_info = hgbb.tileset_info(filename)
    num_chroms = len(tileset_info["chromsizes"])

    base_tile = hgbb.tiles(filename, ["x.0.0"])
    base_tile_data = base_tile[0][1]
    assert (num_chroms * hgbb.MAX_ELEMENTS) == len(
        base_tile_data
    ), "Number of chromosomes in bigBed file was {}\nExpected: {}\n".format(
        len(base_tile_data), (num_chroms * hgbb.MAX_ELEMENTS))

    signif_tile = hgbb.tiles(filename, ["x.0.0.significant"])
    signif_tile_data = signif_tile[0][1]
    assert (num_chroms * hgbb.MAX_ELEMENTS) == len(
        signif_tile_data
    ), "Number of chromosomes in bigBed file was {}\nExpected: {}\n".format(
        len(signif_tile_data), (num_chroms * hgbb.MAX_ELEMENTS))

    max_elements = [5, 10, 25, 100]
    for me in max_elements:
        max_elems_tile = hgbb.tiles(filename, ["x.0.0|max:{}".format(me)])
        max_elems_tile_data = max_elems_tile[0][1]
        for mtd in max_elems_tile_data:
            assert len(mtd) <= (
                me * hgbb.MAX_ELEMENTS
            ), "Number of observed elements with max element threshold was {}\nExpected: {}\n".format(
                len(mtd), (me * hgbb.MAX_ELEMENTS))

    min_max_element_ranges = [(1, 5), (5, 10)]
    for mmer in min_max_element_ranges:
        min_element = mmer[0]
        max_element = mmer[1]
        min_max_elems_tile = hgbb.tiles(
            filename, ["x.0.0|min:{}|max:{}".format(min_element, max_element)])
        min_max_elems_tile_data = min_max_elems_tile[0][1]
        mmtdl = len(min_max_elems_tile_data)
        assert (
            mmtdl >= min_element and mmtdl <= max_element * hgbb.MAX_ELEMENTS
        ), "Number of observed elements with min/max threshold was {}\nExpected: {} to {}\nAll data length: {}\nAll data: {}\n".format(
            mmtdl, min_element, max_element, mmtdl, min_max_elems_tile_data)
def generate_tiles(tileset_tile_ids):
    '''
    Generate a tiles for the give tile_ids.

    All of the tile_ids must come from the same tileset. This function
    will determine the appropriate handler this tile given the tileset's
    filetype and datatype

    Parameters
    ----------
    tileset_tile_ids: tuple
        A four-tuple containing the following parameters.
    tileset: tilesets.models.Tileset object
        The tileset that the tile ids should be retrieved from
    tile_ids: [str,...]
        A list of tile_ids (e.g. xyx.0.0.1) identifying the tiles
        to be retrieved
    raw: str or False
        The value of the GET request parameter `raw`.
    tileset_options: dict or None
        An optional dict containing tileset options, including aggregation options.

    Returns
    -------
    tile_list: [(tile_id, tile_data),...]
        A list of tile_id, tile_data tuples
    '''
    tileset, tile_ids, raw, tileset_options = tileset_tile_ids

    if tileset.filetype == 'hitile':
        return generate_hitile_tiles(tileset, tile_ids)
    elif tileset.filetype == 'beddb':
        return hgbe.tiles(tileset.datafile.path, tile_ids)
    elif tileset.filetype == 'bed2ddb' or tileset.filetype == '2dannodb':
        return generate_bed2ddb_tiles(tileset, tile_ids)
    elif tileset.filetype == 'geodb':
        return generate_bed2ddb_tiles(tileset, tile_ids, hggo.get_tiles)
    elif tileset.filetype == 'hibed':
        return generate_hibed_tiles(tileset, tile_ids)
    elif tileset.filetype == 'cooler':
        return hgco.generate_tiles(tileset.datafile.path, tile_ids)
    elif tileset.filetype == 'bigwig':
        chromsizes = get_chromsizes(tileset)
        return hgbi.tiles(tileset.datafile.path,
                          tile_ids,
                          chromsizes=chromsizes)
    elif tileset.filetype == 'bigbed':
        chromsizes = get_chromsizes(tileset)
        return hgbb.tiles(tileset.datafile.path,
                          tile_ids,
                          chromsizes=chromsizes)
    elif tileset.filetype == 'multivec':
        return generate_1d_tiles(tileset.datafile.path, tile_ids,
                                 ctmu.get_single_tile, tileset_options)
    elif tileset.filetype == 'imtiles':
        return hgim.get_tiles(tileset.datafile.path, tile_ids, raw)
    elif tileset.filetype == 'bam':
        return ctb.tiles(tileset.datafile.path,
                         tile_ids,
                         index_filename=tileset.indexfile.path,
                         max_tile_width=hss.MAX_BAM_TILE_WIDTH)
    else:
        filetype = tileset.filetype
        filepath = tileset.datafile.path

        if filetype in hgti.by_filetype:
            return hgti.by_filetype[filetype](filepath).tiles(tile_ids)

        return [(ti, {
            'error':
            'Unknown tileset filetype: {}'.format(tileset.filetype)
        }) for ti in tile_ids]