def test_random_importance(): # check that when aggregating using random importance, all values that # are in a higher resolution tile are also in the lower resolution f = tempfile.NamedTemporaryFile(delete=False) runner = clt.CliRunner() input_file = op.join(testdir, "sample_data", "25435_PM15-000877_SM-7QK6O.seg") result = runner.invoke( cca.bedfile, [ input_file, "--max-per-tile", "2", "--importance-column", "random", "--assembly", "b37", "--has-header", "--output-file", f.name, ], ) # import traceback a, b, tb = result.exc_info """ print("exc_info:", result.exc_info) print("result:", result) print("result.output", result.output) print("result.error", traceback.print_tb(tb)) print("Exception:", a,b) """ tsinfo = ctb.tileset_info(f.name) assert "version" in tsinfo assert len(tsinfo["header"]) > 0 # check to make sure that tiles in the higher zoom levels # are all present in lower zoom levels found = {} for rect in ctb.tiles(f.name, ["x.5.15"])[0][1]: found[rect["xStart"]] = False for rect in ctb.tiles(f.name, ["x.6.30"])[0][1]: if rect["xStart"] in found: found[rect["xStart"]] = True for rect in ctb.tiles(f.name, ["x.6.31"])[0][1]: if rect["xStart"] in found: found[rect["xStart"]] = True for key, value in found.items(): assert value pass
def test_chromosome_limit(): f = tempfile.NamedTemporaryFile(delete=False) runner = clt.CliRunner() input_file = op.join(testdir, "sample_data", "geneAnnotationsExonsUnions.short.bed") runner.invoke( cca.bedfile, [ input_file, "--max-per-tile", "60", "--importance-column", "5", "--assembly", "hg19", "--chromosome", "chr14", "--output-file", f.name, ], ) # TODO: Make assertions about result # print('output:', result.output, result) rows = ctb.tiles(f.name, ["x.0.0"])[0][1] for row in rows: assert row["fields"][0] == "chr14" os.remove(f.name) pass
def test_gene_annotations(): runner = clt.CliRunner() input_file = op.join(testdir, "sample_data", "exon_unions_mm10.bed") f = tempfile.NamedTemporaryFile(delete=False) result = runner.invoke( cca.bedfile, [ input_file, "--max-per-tile", "20", "--importance-column", "5", "--delimiter", "\t", "--assembly", "mm10", "--output-file", f.name, ], ) # import traceback a, b, tb = result.exc_info """ print("exc_info:", result.exc_info) print("result:", result) print("result.output", result.output) print("result.error", traceback.print_tb(tb)) print("Exception:", a,b) """ rows = ctb.tiles(f.name, ["x.0.0"])[0][1] assert len(rows) == 2 rows = ctb.tiles(f.name, ["x.11.113"])[0][1] assert rows[0]["fields"][3] == "Lrp1b" rows = ctb.tiles(f.name, ["x.11.112"])[0][1] assert rows[0]["fields"][3] == "Lrp1b"
def test_no_chromosome_limit(): f = tempfile.NamedTemporaryFile(delete=False) runner = clt.CliRunner() input_file = op.join(testdir, "sample_data", "geneAnnotationsExonsUnions.short.bed") result = runner.invoke( cca.bedfile, [ input_file, "--max-per-tile", "60", "--importance-column", "5", "--assembly", "hg19", "--output-file", f.name, ], ) # import traceback """ print("exc_info:", result.exc_info) print("result:", result) print("result.output", result.output) print("result.error", traceback.print_tb(tb)) print("Exception:", a,b) """ a, b, tb = result.exc_info rows = ctb.tiles(f.name, ["x.0.0"])[0][1] foundOther = False for row in rows: if row["fields"][0] != "chr1": # print("row", row) assert row["xStart"] > 200000000 if row["fields"][0] != "chr14": foundOther = True break # make sure there's chromosome other than 14 in the output assert foundOther os.remove(f.name) pass
def generate_tiles(tileset_tile_ids): ''' Generate a tiles for the give tile_ids. All of the tile_ids must come from the same tileset. This function will determine the appropriate handler this tile given the tileset's filetype and datatype Parameters ---------- tileset: tilesets.models.Tileset object The tileset that the tile ids should be retrieved from tile_ids: [str,...] A list of tile_ids (e.g. xyx.0.0.1) identifying the tiles to be retrieved Returns ------- tile_list: [(tile_id, tile_data),...] A list of tile_id, tile_data tuples ''' tileset, tile_ids, raw = tileset_tile_ids if tileset.filetype == 'hitile': return generate_hitile_tiles(tileset, tile_ids) elif tileset.filetype == 'beddb': return hgbe.tiles(tileset.datafile.path, tile_ids) elif tileset.filetype == 'bed2ddb' or tileset.filetype == '2dannodb': return generate_bed2ddb_tiles(tileset, tile_ids) elif tileset.filetype == 'geodb': return generate_bed2ddb_tiles(tileset, tile_ids, hggo.get_tiles) elif tileset.filetype == 'hibed': return generate_hibed_tiles(tileset, tile_ids) elif tileset.filetype == 'cooler': return hgco.generate_tiles(tileset.datafile.path, tile_ids) elif tileset.filetype == 'bigwig': chromsizes = get_chromsizes(tileset) return hgbi.tiles(tileset.datafile.path, tile_ids, chromsizes=chromsizes) elif tileset.filetype == 'multivec': return generate_1d_tiles( tileset.datafile.path, tile_ids, ctmu.get_single_tile) elif tileset.filetype == 'imtiles': return hgim.get_tiles(tileset.datafile.path, tile_ids, raw) else: return [(ti, {'error': 'Unknown tileset filetype: {}'.format(tileset.filetype)}) for ti in tile_ids]
def test_get_tiles(): filename = "test/sample_data/gene_annotations.short.db" ctb.tiles(filename, ["x.18.169283"])
def generate_tiles(tileset_tile_ids): ''' Generate a tiles for the give tile_ids. All of the tile_ids must come from the same tileset. This function will determine the appropriate handler this tile given the tileset's filetype and datatype Parameters ---------- tileset_tile_ids: tuple A four-tuple containing the following parameters. tileset: tilesets.models.Tileset object The tileset that the tile ids should be retrieved from tile_ids: [str,...] A list of tile_ids (e.g. xyx.0.0.1) identifying the tiles to be retrieved raw: str or False The value of the GET request parameter `raw`. tileset_options: dict or None An optional dict containing tileset options, including aggregation options. Returns ------- tile_list: [(tile_id, tile_data),...] A list of tile_id, tile_data tuples ''' tileset, tile_ids, raw, tileset_options = tileset_tile_ids if tileset.filetype == 'hitile': return generate_hitile_tiles(tileset, tile_ids) elif tileset.filetype == 'beddb': return hgbe.tiles(tileset.datafile.path, tile_ids) elif tileset.filetype == 'bed2ddb' or tileset.filetype == '2dannodb': return generate_bed2ddb_tiles(tileset, tile_ids) elif tileset.filetype == 'geodb': return generate_bed2ddb_tiles(tileset, tile_ids, hggo.get_tiles) elif tileset.filetype == 'hibed': return generate_hibed_tiles(tileset, tile_ids) elif tileset.filetype == 'cooler': return hgco.generate_tiles(tileset.datafile.path, tile_ids) elif tileset.filetype == 'bigwig': chromsizes = get_chromsizes(tileset) return hgbi.tiles(tileset.datafile.path, tile_ids, chromsizes=chromsizes) elif tileset.filetype == 'bigbed': chromsizes = get_chromsizes(tileset) return hgbb.tiles(tileset.datafile.path, tile_ids, chromsizes=chromsizes) elif tileset.filetype == 'multivec': return generate_1d_tiles(tileset.datafile.path, tile_ids, ctmu.get_single_tile, tileset_options) elif tileset.filetype == 'imtiles': return hgim.get_tiles(tileset.datafile.path, tile_ids, raw) elif tileset.filetype == 'bam': return ctb.tiles(tileset.datafile.path, tile_ids, index_filename=tileset.indexfile.path, max_tile_width=hss.MAX_BAM_TILE_WIDTH) else: filetype = tileset.filetype filepath = tileset.datafile.path if filetype in hgti.by_filetype: return hgti.by_filetype[filetype](filepath).tiles(tile_ids) return [(ti, { 'error': 'Unknown tileset filetype: {}'.format(tileset.filetype) }) for ti in tile_ids]
def test_get_tiles(): filename = op.join("data", "corrected.geneListwithStrand.bed.multires") hgbe.tiles(filename, ["x.1.0", "x.1.1"])
def test_name_in_tile(): filename = op.join("data", "geneAnnotationsExonUnions.1000.bed.v3.beddb") tiles = hgbe.tiles(filename, ["x.1.0", "x.1.1"]) assert "name" in tiles[0][1][0]
def test_get_tiles(): filename = op.join('data', 'corrected.geneListwithStrand.bed.multires') ret = hgbe.tiles(filename, ['x.1.0', 'x.1.1'])