def test_cupy(): t1 = mt.ones((100, 50), chunk_size=50, gpu=True) t2 = mt.ones(50, chunk_size=50, gpu=True) t = (t1 - t2) / mt.sqrt(t2 * (1 - t2) * len(t2)) graph = TileableGraph([t.data]) next(TileableGraphBuilder(graph).build()) context = dict() chunk_graph_builder = ChunkGraphBuilder(graph, fuse_enabled=False, tile_context=context) chunk_graph = next(chunk_graph_builder.build()) CupyRuntimeOptimizer(chunk_graph).optimize() assert any(n.op.__class__.__name__ == 'TensorCpFuseChunk' for n in chunk_graph)
def test_k_means_init_large_n_clusters(): chunk_bytes_limit = options.chunk_store_limit * 2 n_cluster = 2000 x = mt.random.rand(1000_000, 64, chunk_size=250_000) centers = _init_centroids(x, n_cluster, init='k-means||') t_graph = next(TileableGraphBuilder(TileableGraph([centers])).build()) graph = next(ChunkGraphBuilder(t_graph).build()) for c in graph: nbytes = c.nbytes if not np.isnan(nbytes): assert nbytes <= chunk_bytes_limit
def test_read_csv_head(gen_data1): pdf, tempdir = gen_data1 file_path = os.path.join(tempdir, 'test.csv') pdf.to_csv(file_path) df1 = md.read_csv(file_path) df2 = df1.head(5) graph = TileableGraph([df2.data]) next(TileableGraphBuilder(graph).build()) context = dict() chunk_graph_builder = ChunkGraphBuilder(graph, fuse_enabled=False, tile_context=context) chunk_graph = next(chunk_graph_builder.build()) chunk1 = context[df1.data].chunks[0].data chunk2 = context[df2.data].chunks[0].data records = optimize(chunk_graph) assert records.get_optimization_result(chunk1) is None opt_chunk2 = records.get_optimization_result(chunk2) assert opt_chunk2.op.nrows == 5 assert len(chunk_graph) == 1 assert opt_chunk2 in chunk_graph.results
def test_groupby_read_csv(gen_data1): pdf, tempdir = gen_data1 file_path = os.path.join(tempdir, 'test.csv') pdf.to_csv(file_path) df1 = md.read_csv(file_path) df2 = df1[['a', 'b']] graph = TileableGraph([df2.data]) next(TileableGraphBuilder(graph).build()) context = dict() chunk_graph_builder = ChunkGraphBuilder(graph, fuse_enabled=False, tile_context=context) chunk_graph = next(chunk_graph_builder.build()) chunk1 = context[df1.data].chunks[0].data chunk2 = context[df2.data].chunks[0].data records = optimize(chunk_graph) opt_chunk1 = records.get_optimization_result(chunk1) assert opt_chunk1 is None opt_chunk2 = records.get_optimization_result(chunk2) assert opt_chunk2 is not None assert opt_chunk2.op.usecols == ['a', 'b'] # original tileable should not be modified assert chunk2.inputs[0] is chunk1
async def test_execute_with_cancel(actor_pool, cancel_phase): pool, session_id, meta_api, storage_api, execution_ref = actor_pool # config for different phases ref_to_delay = None if cancel_phase == 'prepare': ref_to_delay = await mo.actor_ref(StorageManagerActor.default_uid(), address=pool.external_address) elif cancel_phase == 'quota': ref_to_delay = await mo.actor_ref(QuotaActor.gen_uid('numa-0'), address=pool.external_address) elif cancel_phase == 'slot': ref_to_delay = await mo.actor_ref( BandSlotManagerActor.gen_uid('numa-0'), address=pool.external_address) if ref_to_delay: await ref_to_delay.set_delay_fetch_time(100) def delay_fun(delay, _inp1): time.sleep(delay) return delay input1 = TensorFetch(key='input1', source_key='input1', dtype=np.dtype(int)).new_chunk([]) remote_result = RemoteFunction(function=delay_fun, function_args=[100, input1], function_kwargs={}, n_output=1) \ .new_chunk([input1]) data1 = np.random.rand(10, 10) await meta_api.set_chunk_meta(input1, memory_size=data1.nbytes, store_size=data1.nbytes, bands=[(pool.external_address, 'numa-0')]) await storage_api.put(input1.key, data1) chunk_graph = ChunkGraph([remote_result]) chunk_graph.add_node(input1) chunk_graph.add_node(remote_result) chunk_graph.add_edge(input1, remote_result) subtask = Subtask(f'test_task_{uuid.uuid4()}', session_id=session_id, chunk_graph=chunk_graph) aiotask = asyncio.create_task( execution_ref.run_subtask(subtask, 'numa-0', pool.external_address)) await asyncio.sleep(1) with Timer() as timer: await execution_ref.cancel_subtask(subtask.subtask_id, kill_timeout=1) with pytest.raises(asyncio.CancelledError): await asyncio.wait_for(aiotask, timeout=30) assert timer.duration < 6 # check for different phases if ref_to_delay is not None: assert await ref_to_delay.get_is_cancelled() await ref_to_delay.set_delay_fetch_time(0) # test if slot is restored remote_tileable = mr.spawn(delay_fun, args=(0.5, None)) graph = TileableGraph([remote_tileable.data]) next(TileableGraphBuilder(graph).build()) chunk_graph = next(ChunkGraphBuilder(graph, fuse_enabled=False).build()) subtask = Subtask(f'test_task2_{uuid.uuid4()}', session_id=session_id, chunk_graph=chunk_graph) await asyncio.wait_for(execution_ref.run_subtask(subtask, 'numa-0', pool.external_address), timeout=30)