Beispiel #1
0
def test_random_importance():
    # check that when aggregating using random importance, all values that
    # are in a higher resolution tile are also in the lower resolution
    f = tempfile.NamedTemporaryFile(delete=False)

    runner = clt.CliRunner()
    input_file = op.join(testdir, "sample_data",
                         "25435_PM15-000877_SM-7QK6O.seg")

    result = runner.invoke(
        cca.bedfile,
        [
            input_file,
            "--max-per-tile",
            "2",
            "--importance-column",
            "random",
            "--assembly",
            "b37",
            "--has-header",
            "--output-file",
            f.name,
        ],
    )

    # import traceback
    a, b, tb = result.exc_info
    """
    print("exc_info:", result.exc_info)
    print("result:", result)
    print("result.output", result.output)
    print("result.error", traceback.print_tb(tb))
    print("Exception:", a,b)
    """

    tsinfo = ctb.tileset_info(f.name)
    assert "version" in tsinfo
    assert len(tsinfo["header"]) > 0

    # check to make sure that tiles in the higher zoom levels
    # are all present in lower zoom levels
    found = {}

    for rect in ctb.tiles(f.name, ["x.5.15"])[0][1]:
        found[rect["xStart"]] = False

    for rect in ctb.tiles(f.name, ["x.6.30"])[0][1]:
        if rect["xStart"] in found:
            found[rect["xStart"]] = True

    for rect in ctb.tiles(f.name, ["x.6.31"])[0][1]:
        if rect["xStart"] in found:
            found[rect["xStart"]] = True

    for key, value in found.items():
        assert value

    pass
Beispiel #2
0
def test_chromosome_limit():
    f = tempfile.NamedTemporaryFile(delete=False)

    runner = clt.CliRunner()
    input_file = op.join(testdir, "sample_data",
                         "geneAnnotationsExonsUnions.short.bed")

    runner.invoke(
        cca.bedfile,
        [
            input_file,
            "--max-per-tile",
            "60",
            "--importance-column",
            "5",
            "--assembly",
            "hg19",
            "--chromosome",
            "chr14",
            "--output-file",
            f.name,
        ],
    )
    # TODO: Make assertions about result

    # print('output:', result.output, result)
    rows = ctb.tiles(f.name, ["x.0.0"])[0][1]

    for row in rows:
        assert row["fields"][0] == "chr14"

    os.remove(f.name)
    pass
Beispiel #3
0
def test_gene_annotations():
    runner = clt.CliRunner()
    input_file = op.join(testdir, "sample_data", "exon_unions_mm10.bed")
    f = tempfile.NamedTemporaryFile(delete=False)

    result = runner.invoke(
        cca.bedfile,
        [
            input_file,
            "--max-per-tile",
            "20",
            "--importance-column",
            "5",
            "--delimiter",
            "\t",
            "--assembly",
            "mm10",
            "--output-file",
            f.name,
        ],
    )

    # import traceback
    a, b, tb = result.exc_info
    """
    print("exc_info:", result.exc_info)
    print("result:", result)
    print("result.output", result.output)
    print("result.error", traceback.print_tb(tb))
    print("Exception:", a,b)
    """

    rows = ctb.tiles(f.name, ["x.0.0"])[0][1]
    assert len(rows) == 2

    rows = ctb.tiles(f.name, ["x.11.113"])[0][1]
    assert rows[0]["fields"][3] == "Lrp1b"

    rows = ctb.tiles(f.name, ["x.11.112"])[0][1]
    assert rows[0]["fields"][3] == "Lrp1b"
Beispiel #4
0
def test_no_chromosome_limit():
    f = tempfile.NamedTemporaryFile(delete=False)

    runner = clt.CliRunner()
    input_file = op.join(testdir, "sample_data",
                         "geneAnnotationsExonsUnions.short.bed")

    result = runner.invoke(
        cca.bedfile,
        [
            input_file,
            "--max-per-tile",
            "60",
            "--importance-column",
            "5",
            "--assembly",
            "hg19",
            "--output-file",
            f.name,
        ],
    )

    # import traceback
    """
    print("exc_info:", result.exc_info)
    print("result:", result)
    print("result.output", result.output)
    print("result.error", traceback.print_tb(tb))
    print("Exception:", a,b)
    """
    a, b, tb = result.exc_info

    rows = ctb.tiles(f.name, ["x.0.0"])[0][1]
    foundOther = False

    for row in rows:
        if row["fields"][0] != "chr1":
            # print("row", row)
            assert row["xStart"] > 200000000
        if row["fields"][0] != "chr14":
            foundOther = True
        break
    # make sure there's chromosome other than 14 in the output
    assert foundOther

    os.remove(f.name)
    pass
def generate_tiles(tileset_tile_ids):
    '''
    Generate a tiles for the give tile_ids.

    All of the tile_ids must come from the same tileset. This function
    will determine the appropriate handler this tile given the tileset's
    filetype and datatype

    Parameters
    ----------
    tileset: tilesets.models.Tileset object
        The tileset that the tile ids should be retrieved from
    tile_ids: [str,...]
        A list of tile_ids (e.g. xyx.0.0.1) identifying the tiles
        to be retrieved

    Returns
    -------
    tile_list: [(tile_id, tile_data),...]
        A list of tile_id, tile_data tuples
    '''
    tileset, tile_ids, raw = tileset_tile_ids

    if tileset.filetype == 'hitile':
        return generate_hitile_tiles(tileset, tile_ids)
    elif tileset.filetype == 'beddb':
        return hgbe.tiles(tileset.datafile.path, tile_ids)
    elif tileset.filetype == 'bed2ddb' or tileset.filetype == '2dannodb':
        return generate_bed2ddb_tiles(tileset, tile_ids)
    elif tileset.filetype == 'geodb':
        return generate_bed2ddb_tiles(tileset, tile_ids, hggo.get_tiles)
    elif tileset.filetype == 'hibed':
        return generate_hibed_tiles(tileset, tile_ids)
    elif tileset.filetype == 'cooler':
        return hgco.generate_tiles(tileset.datafile.path, tile_ids)
    elif tileset.filetype == 'bigwig':
        chromsizes = get_chromsizes(tileset)
        return hgbi.tiles(tileset.datafile.path, tile_ids, chromsizes=chromsizes)
    elif tileset.filetype == 'multivec':
        return generate_1d_tiles(
                tileset.datafile.path,
                tile_ids,
                ctmu.get_single_tile)
    elif tileset.filetype == 'imtiles':
        return hgim.get_tiles(tileset.datafile.path, tile_ids, raw)
    else:
        return [(ti, {'error': 'Unknown tileset filetype: {}'.format(tileset.filetype)}) for ti in tile_ids]
Beispiel #6
0
def test_get_tiles():
    filename = "test/sample_data/gene_annotations.short.db"

    ctb.tiles(filename, ["x.18.169283"])
def generate_tiles(tileset_tile_ids):
    '''
    Generate a tiles for the give tile_ids.

    All of the tile_ids must come from the same tileset. This function
    will determine the appropriate handler this tile given the tileset's
    filetype and datatype

    Parameters
    ----------
    tileset_tile_ids: tuple
        A four-tuple containing the following parameters.
    tileset: tilesets.models.Tileset object
        The tileset that the tile ids should be retrieved from
    tile_ids: [str,...]
        A list of tile_ids (e.g. xyx.0.0.1) identifying the tiles
        to be retrieved
    raw: str or False
        The value of the GET request parameter `raw`.
    tileset_options: dict or None
        An optional dict containing tileset options, including aggregation options.

    Returns
    -------
    tile_list: [(tile_id, tile_data),...]
        A list of tile_id, tile_data tuples
    '''
    tileset, tile_ids, raw, tileset_options = tileset_tile_ids

    if tileset.filetype == 'hitile':
        return generate_hitile_tiles(tileset, tile_ids)
    elif tileset.filetype == 'beddb':
        return hgbe.tiles(tileset.datafile.path, tile_ids)
    elif tileset.filetype == 'bed2ddb' or tileset.filetype == '2dannodb':
        return generate_bed2ddb_tiles(tileset, tile_ids)
    elif tileset.filetype == 'geodb':
        return generate_bed2ddb_tiles(tileset, tile_ids, hggo.get_tiles)
    elif tileset.filetype == 'hibed':
        return generate_hibed_tiles(tileset, tile_ids)
    elif tileset.filetype == 'cooler':
        return hgco.generate_tiles(tileset.datafile.path, tile_ids)
    elif tileset.filetype == 'bigwig':
        chromsizes = get_chromsizes(tileset)
        return hgbi.tiles(tileset.datafile.path,
                          tile_ids,
                          chromsizes=chromsizes)
    elif tileset.filetype == 'bigbed':
        chromsizes = get_chromsizes(tileset)
        return hgbb.tiles(tileset.datafile.path,
                          tile_ids,
                          chromsizes=chromsizes)
    elif tileset.filetype == 'multivec':
        return generate_1d_tiles(tileset.datafile.path, tile_ids,
                                 ctmu.get_single_tile, tileset_options)
    elif tileset.filetype == 'imtiles':
        return hgim.get_tiles(tileset.datafile.path, tile_ids, raw)
    elif tileset.filetype == 'bam':
        return ctb.tiles(tileset.datafile.path,
                         tile_ids,
                         index_filename=tileset.indexfile.path,
                         max_tile_width=hss.MAX_BAM_TILE_WIDTH)
    else:
        filetype = tileset.filetype
        filepath = tileset.datafile.path

        if filetype in hgti.by_filetype:
            return hgti.by_filetype[filetype](filepath).tiles(tile_ids)

        return [(ti, {
            'error':
            'Unknown tileset filetype: {}'.format(tileset.filetype)
        }) for ti in tile_ids]
Beispiel #8
0
def test_get_tiles():
    filename = op.join("data", "corrected.geneListwithStrand.bed.multires")

    hgbe.tiles(filename, ["x.1.0", "x.1.1"])
Beispiel #9
0
def test_name_in_tile():
    filename = op.join("data", "geneAnnotationsExonUnions.1000.bed.v3.beddb")

    tiles = hgbe.tiles(filename, ["x.1.0", "x.1.1"])

    assert "name" in tiles[0][1][0]
Beispiel #10
0
def test_get_tiles():
    filename = op.join('data', 'corrected.geneListwithStrand.bed.multires')

    ret = hgbe.tiles(filename, ['x.1.0', 'x.1.1'])