def test_gene_annotations():
    runner = clt.CliRunner()
    input_file = op.join(testdir, 'sample_data', 'exon_unions_mm10.bed')
    f = tempfile.NamedTemporaryFile(delete=False)

    result = runner.invoke(
        cca.bedfile,
        [input_file,
         '--max-per-tile', '20', '--importance-column', '5',
         '--delimiter', '\t',
         '--assembly', 'mm10', '--output-file', f.name])

    # import traceback
    a, b, tb = result.exc_info
    '''
    print("exc_info:", result.exc_info)
    print("result:", result)
    print("result.output", result.output)
    print("result.error", traceback.print_tb(tb))
    print("Exception:", a,b)
    '''

    rows = cdt.get_tiles(f.name, 0, 0)
    assert(len(rows[0]) == 2)

    rows = cdt.get_tiles(f.name, 11, 113)
    assert(rows[113][0]['fields'][3] == 'Lrp1b')

    rows = cdt.get_tiles(f.name, 11, 112)
    assert(rows[112][0]['fields'][3] == 'Lrp1b')
Exemple #2
0
def test_clodius_aggregate_bedfile():
    input_file = op.join(testdir, 'sample_data',
                         'geneAnnotationsExonsUnions.hg19.short.bed')
    output_file = '/tmp/geneAnnotationsExonsUnions.hg19.short.bed'

    runner = clt.CliRunner()
    result = runner.invoke(cca.bedfile, [
        input_file, '--max-per-tile', 20, '--importance-column', 5,
        '--assembly', 'hg19', '--output-file', output_file, '--delimiter', '\t'
    ])

    print("exc_info:", result.exc_info)
    a, b, tb = result.exc_info
    print("result:", result)
    print("result.output", result.output)
    print("result.error", traceback.print_tb(tb))
    print("Exception:", a, b)
    assert (result.exit_code == 0)

    results = cdt.get_tiles(output_file, 6, 3, num_tiles=1)
    print("results:", results)

    assert (len(results[3][0]['fields']) == 14)

    runner = clt.CliRunner()
    result = runner.invoke(cca.bedfile, [
        input_file, '--max-per-tile', 20, '--importance-column', 5,
        '--assembly', 'hg19', '--output-file', output_file
    ])

    assert (result.exit_code == 0)

    results = cdt.get_tiles(output_file, 6, 3, num_tiles=3)

    assert (len(results[3][0]['fields']) == 17)
Exemple #3
0
def test_clodius_aggregate_bedfile():
    input_file = op.join(testdir, "sample_data",
                         "geneAnnotationsExonsUnions.hg19.short.bed")
    output_file = "/tmp/geneAnnotationsExonsUnions.hg19.short.bed"

    runner = clt.CliRunner()
    result = runner.invoke(
        cca.bedfile,
        [
            input_file,
            "--max-per-tile",
            20,
            "--importance-column",
            5,
            "--assembly",
            "hg19",
            "--output-file",
            output_file,
            "--delimiter",
            "\t",
        ],
    )

    a, b, tb = result.exc_info
    """
    print("exc_info:", result.exc_info)
    print("result:", result)
    print("result.output", result.output)
    print("result.error", traceback.print_tb(tb))
    print("Exception:", a,b)
    """
    assert result.exit_code == 0

    results = cdt.get_tiles(output_file, 6, 3, num_tiles=1)
    # print("results:", results)

    assert len(results[3][0]["fields"]) == 14

    runner = clt.CliRunner()
    result = runner.invoke(
        cca.bedfile,
        [
            input_file,
            "--max-per-tile",
            20,
            "--importance-column",
            5,
            "--assembly",
            "hg19",
            "--output-file",
            output_file,
        ],
    )

    assert result.exit_code == 0

    results = cdt.get_tiles(output_file, 6, 3, num_tiles=3)

    assert len(results[3][0]["fields"]) == 17
Exemple #4
0
def test_random_importance():
    # check that when aggregating using random importance, all values that
    # are in a higher resolution tile are also in the lower resolution
    f = tempfile.NamedTemporaryFile(delete=False)

    runner = clt.CliRunner()
    input_file = op.join(testdir, 'sample_data', '25435_PM15-000877_SM-7QK6O.seg')

    result = runner.invoke(
            cca.bedfile,
            [input_file,
                '--max-per-tile', '2', '--importance-column', 'random',
                '--assembly', 'b37', '--has-header', '--output-file', f.name])

    import traceback
    print("exc_info:", result.exc_info)
    a,b,tb = result.exc_info
    print("result:", result)
    print("result.output", result.output)
    print("result.error", traceback.print_tb(tb))
    print("Exception:", a,b)

    tileset_info = cdt.get_tileset_info(f.name)
    # print("tileset_info:", tileset_info)

    rows = cdt.get_tiles(f.name, 0, 0)
    #print("rows:", rows)

    rows = list(cdt.get_tiles(f.name, 1, 0).values()) + list(cdt.get_tiles(f.name, 1,1).values())
    #print('rows:', rows)

    found = False
    for row in cdt.get_tiles(f.name, 6, 31).values():
        for rect in row:
            if rect['xEnd'] == 2195875458:
                found = True


    found = False
    for row in cdt.get_tiles(f.name, 6, 32).values():
        for rect in row:
            if rect['xEnd'] == 2195875458:
                found = True

    assert(found == True)

    pass
Exemple #5
0
def test_get_tiles():
    filename = 'test/sample_data/gene_annotations.short.db'

    tiles = cdt.get_tiles(filename, 18, 169283)[169283]

    # print("tiles:", tiles)
    x = int(tiles[0]['xStart'])

    fields = tiles[0]['fields']
def test_random_importance():
    # check that when aggregating using random importance, all values that
    # are in a higher resolution tile are also in the lower resolution
    f = tempfile.NamedTemporaryFile(delete=False)

    runner = clt.CliRunner()
    input_file = op.join(testdir, 'sample_data',
                         '25435_PM15-000877_SM-7QK6O.seg')

    result = runner.invoke(
        cca.bedfile,
        [input_file,
         '--max-per-tile', '2', '--importance-column', 'random',
         '--assembly', 'b37', '--has-header', '--output-file', f.name])

    # import traceback
    a, b, tb = result.exc_info
    '''
    print("exc_info:", result.exc_info)
    print("result:", result)
    print("result.output", result.output)
    print("result.error", traceback.print_tb(tb))
    print("Exception:", a,b)
    '''

    cdt.get_tileset_info(f.name)
    # print("tileset_info:", tileset_info)
    # TODO: Make assertions about result

    cdt.get_tiles(f.name, 0, 0)
    # print("rows:", rows)
    # TODO: Make assertions about result

    list(cdt.get_tiles(f.name, 1, 0).values()) + \
        list(cdt.get_tiles(f.name, 1, 1).values())
    # print('rows:', rows)
    # TODO: Make assertions about result

    # check to make sure that tiles in the higher zoom levels
    # are all present in lower zoom levels
    found = {}
    for row in cdt.get_tiles(f.name, 5, 15).values():
        for rect in row:
            found[rect['xStart']] = False

    for row in cdt.get_tiles(f.name, 6, 30).values():
        for rect in row:
            if rect['xStart'] in found:
                found[rect['xStart']] = True

    for row in cdt.get_tiles(f.name, 6, 31).values():
        for rect in row:
            if rect['xStart'] in found:
                found[rect['xStart']] = True

    for key, value in found.items():
        assert(value)

    pass
Exemple #7
0
def generate_beddb_tiles(tileset, tile_ids):
    '''
    Generate tiles from a beddb file.

    Parameters
    ----------
    tileset: tilesets.models.Tileset object
        The tileset that the tile ids should be retrieved from
    tile_ids: [str,...]
        A list of tile_ids (e.g. xyx.0.1) identifying the tiles
        to be retrieved

    Returns
    -------
    generated_tiles: [(tile_id, tile_data),...]
        A list of tile_id, tile_data tuples
    '''
    tile_ids_by_zoom = bin_tiles_by_zoom(tile_ids).values()
    partitioned_tile_ids = list(it.chain(*[partition_by_adjacent_tiles(t, dimension=1)
        for t in tile_ids_by_zoom]))

    generated_tiles = []

    for tile_group in partitioned_tile_ids:
        zoom_level = int(tile_group[0].split('.')[1])
        tileset_id = tile_group[0].split('.')[0]
        tile_positions = [[int(x) for x in t.split('.')[2:3]] for t in tile_group]

        if len(tile_positions) == 0:
            continue

        minx = min([t[0] for t in tile_positions])
        maxx = max([t[0] for t in tile_positions])

        t1 = time.time()
        tile_data_by_position = cdt.get_tiles(
            get_cached_datapath(tileset.datafile.url),
            zoom_level,
            minx,
            maxx - minx + 1
        )
        generated_tiles += [(".".join(map(str, [tileset_id] + [zoom_level] + [position])), tile_data)
            for (position, tile_data) in tile_data_by_position.items()]

    return generated_tiles
Exemple #8
0
def test_chromosome_limit():
    f = tempfile.NamedTemporaryFile(delete=False)

    runner = clt.CliRunner()
    input_file = op.join(testdir, 'sample_data',
                         'geneAnnotationsExonsUnions.short.bed')

    result = runner.invoke(cca.bedfile, [
        input_file, '--max-per-tile', '60', '--importance-column', '5',
        '--assembly', 'hg19', '--chromosome', 'chr14', '--output-file', f.name
    ])

    # print('output:', result.output, result)
    rows = cdt.get_tiles(f.name, 0, 0)[0]
    foundOther = False

    for row in rows:
        assert (row['fields'][0] == 'chr14')

    os.remove(f.name)
    pass
def test_no_chromosome_limit():
    f = tempfile.NamedTemporaryFile(delete=False)

    runner = clt.CliRunner()
    input_file = op.join(testdir, 'sample_data',
                         'geneAnnotationsExonsUnions.short.bed')

    result = runner.invoke(
        cca.bedfile,
        [input_file,
         '--max-per-tile', '60', '--importance-column', '5',
         '--assembly', 'hg19',
         '--output-file', f.name])

    # import traceback
    '''
    print("exc_info:", result.exc_info)
    print("result:", result)
    print("result.output", result.output)
    print("result.error", traceback.print_tb(tb))
    print("Exception:", a,b)
    '''
    a, b, tb = result.exc_info

    rows = cdt.get_tiles(f.name, 0, 0)[0]
    foundOther = False

    for row in rows:
        if row['fields'][0] != 'chr1':
            # print("row", row)
            assert(row['xStart'] > 200000000)
        if row['fields'][0] != 'chr14':
            foundOther = True
        break
    # make sure there's chromosome other than 14 in the output
    assert(foundOther)

    os.remove(f.name)
    pass
def test_get_tiles():
    filename = 'test/sample_data/gene_annotations.short.db'

    cdt.get_tiles(filename, 18, 169283)[169283]