Ejemplo n.º 1
0
def test_bigwig_tiles():
    filename = op.join(
        'data',
        'wgEncodeCaltechRnaSeqHuvecR1x75dTh1014IlnaPlusSignalRep2.bigWig'
    )

    mean_tile = hgbi.tiles(filename, ['x.0.0'])
    mean_mean_tile = hgbi.tiles(filename, ['x.0.0.mean'])
    min_tile = hgbi.tiles(filename, ['x.0.0.min'])
    max_tile = hgbi.tiles(filename, ['x.0.0.max'])
    std_tile = hgbi.tiles(filename, ['x.0.0.std'])

    assert mean_tile[0][1]['max_value'] == mean_mean_tile[0][1]['max_value']
    assert mean_tile[0][1]['max_value'] > min_tile[0][1]['max_value']
    assert max_tile[0][1]['max_value'] > mean_tile[0][1]['max_value']
    assert max_tile[0][1]['max_value'] > mean_tile[0][1]['max_value'] + \
        std_tile[0][1]['max_value']

    min_max_tile = hgbi.tiles(filename, ['x.0.0.minMax'])
    whisker_tile = hgbi.tiles(filename, ['x.0.0.whisker'])

    mean_val = np.frombuffer(
        base64.b64decode(mean_tile[0][1]['dense']),
        dtype=mean_tile[0][1]['dtype']
    )

    min_val = np.frombuffer(
        base64.b64decode(min_tile[0][1]['dense']),
        dtype=min_tile[0][1]['dtype']
    )

    max_val = np.frombuffer(
        base64.b64decode(max_tile[0][1]['dense']),
        dtype=max_tile[0][1]['dtype']
    )

    std_val = np.frombuffer(
        base64.b64decode(std_tile[0][1]['dense']),
        dtype=std_tile[0][1]['dtype']
    )

    min_max_val = np.frombuffer(
        base64.b64decode(min_max_tile[0][1]['dense']),
        dtype=min_max_tile[0][1]['dtype']
    )

    whisker_val = np.frombuffer(
        base64.b64decode(whisker_tile[0][1]['dense']),
        dtype=whisker_tile[0][1]['dtype']
    )

    assert min_max_val.shape[0] == 2 * mean_val.shape[0]
    assert arr_eq_nan(min_max_val[::2], min_val)
    assert arr_eq_nan(min_max_val[1::2], max_val)

    assert whisker_val.shape[0] == 4 * mean_val.shape[0]
    assert arr_eq_nan(whisker_val[::4], min_val)
    assert arr_eq_nan(whisker_val[1::4], max_val)
    assert arr_eq_nan(whisker_val[2::4], mean_val)
    assert arr_eq_nan(whisker_val[3::4], std_val)
Ejemplo n.º 2
0
    def tiles(cls, tileset_dict, tids):
        filetype = tileset_dict.get('filetype', "")
        if filetype == "bam":
            data = bam_tiles.tiles(
                tileset_dict.datafile, tids,
                index_filename=tileset_dict.index_filename
            )
        elif filetype == "cooler":
            data = cooler_tiles.tiles(tileset_dict['datafile'], tids)
        elif filetype == "bigwig":
            data = bigwig_tiles.tiles(tileset_dict['datafile'], tids)
        else:
            data = {'error': f"Unknown tileset filetype: {filetype}"}

        return data
def generate_tiles(tileset_tile_ids):
    '''
    Generate a tiles for the give tile_ids.

    All of the tile_ids must come from the same tileset. This function
    will determine the appropriate handler this tile given the tileset's
    filetype and datatype

    Parameters
    ----------
    tileset: tilesets.models.Tileset object
        The tileset that the tile ids should be retrieved from
    tile_ids: [str,...]
        A list of tile_ids (e.g. xyx.0.0.1) identifying the tiles
        to be retrieved

    Returns
    -------
    tile_list: [(tile_id, tile_data),...]
        A list of tile_id, tile_data tuples
    '''
    tileset, tile_ids, raw = tileset_tile_ids

    if tileset.filetype == 'hitile':
        return generate_hitile_tiles(tileset, tile_ids)
    elif tileset.filetype == 'beddb':
        return hgbe.tiles(tileset.datafile.path, tile_ids)
    elif tileset.filetype == 'bed2ddb' or tileset.filetype == '2dannodb':
        return generate_bed2ddb_tiles(tileset, tile_ids)
    elif tileset.filetype == 'geodb':
        return generate_bed2ddb_tiles(tileset, tile_ids, hggo.get_tiles)
    elif tileset.filetype == 'hibed':
        return generate_hibed_tiles(tileset, tile_ids)
    elif tileset.filetype == 'cooler':
        return hgco.generate_tiles(tileset.datafile.path, tile_ids)
    elif tileset.filetype == 'bigwig':
        chromsizes = get_chromsizes(tileset)
        return hgbi.tiles(tileset.datafile.path, tile_ids, chromsizes=chromsizes)
    elif tileset.filetype == 'multivec':
        return generate_1d_tiles(
                tileset.datafile.path,
                tile_ids,
                ctmu.get_single_tile)
    elif tileset.filetype == 'imtiles':
        return hgim.get_tiles(tileset.datafile.path, tile_ids, raw)
    else:
        return [(ti, {'error': 'Unknown tileset filetype: {}'.format(tileset.filetype)}) for ti in tile_ids]
Ejemplo n.º 4
0
def test_bigwig_tiles():
    filename = op.join(
        'data',
        'wgEncodeCaltechRnaSeqHuvecR1x75dTh1014IlnaPlusSignalRep2.bigWig')

    hgbi.tiles(filename, ['x.0.0'])
Ejemplo n.º 5
0
def test_bigwig_tiles():
    filename = op.join(
        "data",
        "wgEncodeCaltechRnaSeqHuvecR1x75dTh1014IlnaPlusSignalRep2.bigWig")

    mean_tile = hgbi.tiles(filename, ["x.0.0"])
    mean_mean_tile = hgbi.tiles(filename, ["x.0.0.mean"])
    min_tile = hgbi.tiles(filename, ["x.0.0.min"])
    max_tile = hgbi.tiles(filename, ["x.0.0.max"])
    std_tile = hgbi.tiles(filename, ["x.0.0.std"])

    assert mean_tile[0][1]["max_value"] == mean_mean_tile[0][1]["max_value"]
    assert mean_tile[0][1]["max_value"] > min_tile[0][1]["max_value"]
    assert max_tile[0][1]["max_value"] > mean_tile[0][1]["max_value"]
    assert (max_tile[0][1]["max_value"] >
            mean_tile[0][1]["max_value"] + std_tile[0][1]["max_value"])

    min_max_tile = hgbi.tiles(filename, ["x.0.0.minMax"])
    whisker_tile = hgbi.tiles(filename, ["x.0.0.whisker"])

    mean_val = np.frombuffer(base64.b64decode(mean_tile[0][1]["dense"]),
                             dtype=mean_tile[0][1]["dtype"])

    min_val = np.frombuffer(base64.b64decode(min_tile[0][1]["dense"]),
                            dtype=min_tile[0][1]["dtype"])

    max_val = np.frombuffer(base64.b64decode(max_tile[0][1]["dense"]),
                            dtype=max_tile[0][1]["dtype"])

    std_val = np.frombuffer(base64.b64decode(std_tile[0][1]["dense"]),
                            dtype=std_tile[0][1]["dtype"])

    min_max_val = np.frombuffer(base64.b64decode(min_max_tile[0][1]["dense"]),
                                dtype=min_max_tile[0][1]["dtype"])

    mean_tile = hgbi.tiles(filename, ["x.0.0"])
    mean_mean_tile = hgbi.tiles(filename, ["x.0.0.mean"])
    min_tile = hgbi.tiles(filename, ["x.0.0.min"])
    max_tile = hgbi.tiles(filename, ["x.0.0.max"])
    std_tile = hgbi.tiles(filename, ["x.0.0.std"])

    assert mean_tile[0][1]["max_value"] == mean_mean_tile[0][1]["max_value"]
    assert mean_tile[0][1]["max_value"] > min_tile[0][1]["max_value"]
    assert max_tile[0][1]["max_value"] > mean_tile[0][1]["max_value"]
    assert (max_tile[0][1]["max_value"] >
            mean_tile[0][1]["max_value"] + std_tile[0][1]["max_value"])

    min_max_tile = hgbi.tiles(filename, ["x.0.0.minMax"])
    whisker_tile = hgbi.tiles(filename, ["x.0.0.whisker"])

    mean_val = np.frombuffer(base64.b64decode(mean_tile[0][1]["dense"]),
                             dtype=mean_tile[0][1]["dtype"])

    min_val = np.frombuffer(base64.b64decode(min_tile[0][1]["dense"]),
                            dtype=min_tile[0][1]["dtype"])

    max_val = np.frombuffer(base64.b64decode(max_tile[0][1]["dense"]),
                            dtype=max_tile[0][1]["dtype"])

    std_val = np.frombuffer(base64.b64decode(std_tile[0][1]["dense"]),
                            dtype=std_tile[0][1]["dtype"])

    min_max_val = np.frombuffer(base64.b64decode(min_max_tile[0][1]["dense"]),
                                dtype=min_max_tile[0][1]["dtype"])

    whisker_val = np.frombuffer(base64.b64decode(whisker_tile[0][1]["dense"]),
                                dtype=whisker_tile[0][1]["dtype"])

    assert min_max_val.shape[0] == 2 * mean_val.shape[0]
    assert np.allclose(min_max_val[::2], min_val, equal_nan=True)
    assert np.allclose(min_max_val[1::2], max_val, equal_nan=True)

    assert whisker_val.shape[0] == 4 * mean_val.shape[0]
    assert np.allclose(whisker_val[::4], min_val, equal_nan=True)
    assert np.allclose(whisker_val[1::4], max_val, equal_nan=True)
    assert np.allclose(whisker_val[2::4], mean_val, equal_nan=True)
    assert np.allclose(whisker_val[3::4], std_val, equal_nan=True)
Ejemplo n.º 6
0
def generate_tiles(tileset_tile_ids):
    '''
    Generate a tiles for the give tile_ids.

    All of the tile_ids must come from the same tileset. This function
    will determine the appropriate handler this tile given the tileset's
    filetype and datatype

    Parameters
    ----------
    tileset_tile_ids: tuple
        A four-tuple containing the following parameters.
    tileset: tilesets.models.Tileset object
        The tileset that the tile ids should be retrieved from
    tile_ids: [str,...]
        A list of tile_ids (e.g. xyx.0.0.1) identifying the tiles
        to be retrieved
    raw: str or False
        The value of the GET request parameter `raw`.
    tileset_options: dict or None
        An optional dict containing tileset options, including aggregation options.

    Returns
    -------
    tile_list: [(tile_id, tile_data),...]
        A list of tile_id, tile_data tuples
    '''
    tileset, tile_ids, raw, tileset_options = tileset_tile_ids

    if tileset.filetype == 'hitile':
        return generate_hitile_tiles(tileset, tile_ids)
    elif tileset.filetype == 'beddb':
        return hgbe.tiles(tileset.datafile.path, tile_ids)
    elif tileset.filetype == 'bed2ddb' or tileset.filetype == '2dannodb':
        return generate_bed2ddb_tiles(tileset, tile_ids)
    elif tileset.filetype == 'geodb':
        return generate_bed2ddb_tiles(tileset, tile_ids, hggo.get_tiles)
    elif tileset.filetype == 'hibed':
        return generate_hibed_tiles(tileset, tile_ids)
    elif tileset.filetype == 'cooler':
        return hgco.generate_tiles(tileset.datafile.path, tile_ids)
    elif tileset.filetype == 'bigwig':
        chromsizes = get_chromsizes(tileset)
        return hgbi.tiles(tileset.datafile.path,
                          tile_ids,
                          chromsizes=chromsizes)
    elif tileset.filetype == 'bigbed':
        chromsizes = get_chromsizes(tileset)
        return hgbb.tiles(tileset.datafile.path,
                          tile_ids,
                          chromsizes=chromsizes)
    elif tileset.filetype == 'multivec':
        return generate_1d_tiles(tileset.datafile.path, tile_ids,
                                 ctmu.get_single_tile, tileset_options)
    elif tileset.filetype == 'imtiles':
        return hgim.get_tiles(tileset.datafile.path, tile_ids, raw)
    elif tileset.filetype == 'bam':
        return ctb.tiles(tileset.datafile.path,
                         tile_ids,
                         index_filename=tileset.indexfile.path,
                         max_tile_width=hss.MAX_BAM_TILE_WIDTH)
    else:
        filetype = tileset.filetype
        filepath = tileset.datafile.path

        if filetype in hgti.by_filetype:
            return hgti.by_filetype[filetype](filepath).tiles(tile_ids)

        return [(ti, {
            'error':
            'Unknown tileset filetype: {}'.format(tileset.filetype)
        }) for ti in tile_ids]
Ejemplo n.º 7
0
def bigwig(filepath, chromsizes=None, uuid=None):
    return Tileset(
        tileset_info=lambda: hgbi.tileset_info(filepath, chromsizes),
        tiles=lambda tids: hgbi.tiles(filepath, tids, chromsizes=chromsizes),
        uuid=uuid,
    )