def test_if_run(self): X = np.random.randn(64) shard_sizes = (int(X.shape[0]/8),) X_sharded = BigMatrix("if_test", shape=X.shape, shard_sizes=shard_sizes, write_header=True) O_sharded = BigMatrix("if_test_output", shape=X.shape, shard_sizes=shard_sizes, write_header=True) X_sharded.free() shard_matrix(X_sharded, X) f = frontend.lpcompile(f1_if) p = f(X_sharded, O_sharded, X_sharded.num_blocks(0)) num_cores = 1 executor = fs.ProcessPoolExecutor(num_cores) config = npw.config.default() p_ex = lp.LambdaPackProgram(p, config=config) p_ex.start() all_futures = [] for i in range(num_cores): all_futures.append(executor.submit( job_runner.lambdapack_run, p_ex, pipeline_width=1, idle_timeout=5, timeout=60)) p_ex.wait() time.sleep(5) p_ex.free() for i in range(X_sharded.num_blocks(0)): Ob = O_sharded.get_block(i) Xb = X_sharded.get_block(i) if ((i % 2) == 0): assert(np.allclose(Ob, 1*Xb)) else: assert(np.allclose(Ob, 2*Xb))
def test_multiple_shard_matrix_multiply(self): fexec = lithops.FunctionExecutor(runtime='jsampe/numpy-lithops:04', log_level='DEBUG') X = np.random.randn(16, 16) X_shard_sizes = tuple(map(int, np.array(X.shape) / 2)) X_sharded = BigMatrix("gemm_test_1", shape=X.shape, shard_sizes=X_shard_sizes, storage=fexec.storage) Y = np.random.randn(16, 16) Y_shard_sizes = tuple(map(int, np.array(Y.shape) / 2)) Y_sharded = BigMatrix("gemm_test_2", shape=Y.shape, shard_sizes=Y_shard_sizes, storage=fexec.storage) shard_matrix(X_sharded, X) shard_matrix(Y_sharded, Y) XY_sharded = binops.gemm(fexec, X_sharded, Y_sharded, X_sharded.bucket, 1) XY_sharded_local = XY_sharded.numpy() XY = X.dot(Y) X_sharded.free() Y_sharded.free() XY_sharded.free() assert (np.all(np.isclose(XY, XY_sharded_local))) os.system("rm -rf /dev/shm/*")
def test_cholesky(): X = np.random.randn(64, 64) #X = np.random.randn(4, 4) A = X.dot(X.T) + np.eye(X.shape[0]) shard_size = 16 #shard_size = 4 shard_sizes = (shard_size, shard_size) A_sharded = BigMatrix("cholesky_test_A", shape=A.shape, shard_sizes=shard_sizes, write_header=True) A_sharded.free() shard_matrix(A_sharded, A) program, meta = cholesky(A_sharded) executor = fs.ProcessPoolExecutor(1) print("starting program") program.start() future = executor.submit(job_runner.lambdapack_run, program, timeout=60 * 10, idle_timeout=6) #job_runner.lambdapack_run(program, timeout=60, idle_timeout=6) program.wait() program.free() L_sharded = meta["outputs"][0] L_npw = L_sharded.numpy() L = np.linalg.cholesky(A) assert (np.allclose(L_npw, L)) print("great success!")
def test_cholesky_lambda(): X = np.random.randn(128, 128) A = X.dot(X.T) + np.eye(X.shape[0]) shard_size = 128 shard_sizes = (shard_size, shard_size) A_sharded = BigMatrix("job_runner_test", shape=A.shape, shard_sizes=shard_sizes, write_header=True) A_sharded.free() shard_matrix(A_sharded, A) program, meta = cholesky(A_sharded) executor = fs.ProcessPoolExecutor(1) print("starting program") program.start() pwex = pywren.default_executor() futures = pwex.map( lambda x: job_runner.lambdapack_run( program, timeout=60, idle_timeout=6), range(16)) pywren.wait(futures) print("RESULTSSS") print([f.result() for f in futures]) futures = pwex.map( lambda x: job_runner.lambdapack_run( program, timeout=60, idle_timeout=6), range(16)) program.wait() #program.free() L_sharded = meta["outputs"][0] L_npw = L_sharded.numpy() L = np.linalg.cholesky(A) assert (np.allclose(L_npw, L)) print("great success!")
def test_cholesky_multiprocess(): X = np.random.randn(128, 128) A = X.dot(X.T) + 1e9 * np.eye(X.shape[0]) shard_size = 8 shard_sizes = (shard_size, shard_size) A_sharded = BigMatrix("job_runner_test", shape=A.shape, shard_sizes=shard_sizes, write_header=True) A_sharded.free() shard_matrix(A_sharded, A) program, meta = cholesky(A_sharded) executor = fs.ProcessPoolExecutor(8) print("starting program") program.start() futures = [] for i in range(8): future = executor.submit(job_runner.lambdapack_run, program, timeout=25) futures.append(future) print("Waiting for futures") fs.wait(futures) [f.result() for f in futures] futures = [] for i in range(8): future = executor.submit(job_runner.lambdapack_run, program, timeout=25) futures.append(future) print("Waiting for futures..again") fs.wait(futures) [f.result() for f in futures] print("great success!") return 0
def test_matmul(self): size = 4 shard_size = 2 np.random.seed(0) A = np.random.randn(size, size) B = np.random.randn(size, size) C = np.dot(A, B) shard_sizes = (shard_size, shard_size) A_sharded = BigMatrix("matmul_test_A", shape=A.shape, shard_sizes=shard_sizes, write_header=True) A_sharded.free() shard_matrix(A_sharded, A) B_sharded = BigMatrix("matmul_test_B", shape=B.shape, shard_sizes=shard_sizes, write_header=True) B_sharded.free() shard_matrix(B_sharded, B) Temp = BigMatrix("matmul_test_Temp", shape=[A.shape[0], B.shape[1], B.shape[0], 100], shard_sizes=[ A_sharded.shard_sizes[0], B_sharded.shard_sizes[1], 1, 1 ], write_header=True) C_sharded = BigMatrix("matmul_test_C", shape=C.shape, shard_sizes=shard_sizes, write_header=True) b_fac = 2 config = npw.config.default() compiled_matmul = frontend.lpcompile(matmul) program = compiled_matmul(A_sharded, B_sharded, A_sharded.num_blocks(0), A_sharded.num_blocks(1), B_sharded.num_blocks(1), b_fac, Temp, C_sharded) program_executable = lp.LambdaPackProgram(program, config=config) program_executable.start() job_runner.lambdapack_run(program_executable, pipeline_width=1, idle_timeout=5, timeout=60) executor = fs.ThreadPoolExecutor(1) all_futures = [ executor.submit(job_runner.lambdapack_run, program_executable, pipeline_width=1, idle_timeout=5, timeout=60) ] program_executable.wait() program_executable.free() C_remote = C_sharded.numpy() assert (np.allclose(C, C_remote))
def test_multiple_shard_transpose_matrix(self): X = np.random.randn(128, 128) shard_sizes = tuple(map(int, np.array(X.shape)/2)) X_sharded = BigMatrix("test_1", shape=X.shape, shard_sizes=shard_sizes) shard_matrix(X_sharded, X) X_sharded_local = X_sharded.T.numpy() X_sharded.free() assert(np.all(X.T == X_sharded_local))
def test_single_multiaxis(self): X = np.random.randn(8, 8, 8, 8) X_sharded = BigMatrix("multiaxis", shape=X.shape, shard_sizes=X.shape) print("BLOCK_IDXS", X_sharded.block_idxs) shard_matrix(X_sharded, X) print("BLOCK_IDXS_EXIST", X_sharded.block_idxs_exist) X_sharded_local = X_sharded.numpy() X_sharded.free() assert (np.all(X_sharded_local == X))
def test_single_shard_matrix_multiply(self): X = np.random.randn(16, 16) X_sharded = BigMatrix("gemm_test_0", shape=X.shape, shard_sizes=X.shape) shard_matrix(X_sharded, X) pwex = pywren.lambda_executor() XXT_sharded = binops.gemm(pwex, X_sharded, X_sharded.T, X_sharded.bucket, 1) XXT_sharded_local = XXT_sharded.numpy() XXT = X.dot(X.T) X_sharded.free() XXT_sharded.free() assert (np.all(np.isclose(XXT, XXT_sharded_local))) os.system("rm -rf /dev/shm/*")
def test_multiple_shard_cholesky(self): np.random.seed(1) size = 128 shard_size = 64 np.random.seed(1) print("Generating X") executor = fs.ProcessPoolExecutor(cpu_count) X = np.random.randn(size, 128) print("Generating A") A = X.dot(X.T) + np.eye(X.shape[0]) y = np.random.randn(size) pwex = pywren.default_executor() print("sharding A") shard_sizes = (shard_size, shard_size) A_sharded = BigSymmetricMatrix("cholesky_test_A", shape=A.shape, shard_sizes=shard_sizes) y_sharded = BigMatrix("cholesky_test_y", shape=y.shape, shard_sizes=shard_sizes[:1]) A_sharded.free() y_sharded.free() A_sharded = BigSymmetricMatrix("cholesky_test_A", shape=A.shape, shard_sizes=shard_sizes) y_sharded = BigMatrix("cholesky_test_y", shape=y.shape, shard_sizes=shard_sizes[:1]) t = time.time() shard_matrix(A_sharded, A, executor=executor) e = time.time() print("A_sharded", e - t) t = time.time() shard_matrix(y_sharded, y, executor=executor) e = time.time() print("y_sharded time", e - t) print("Computing LL^{T}") L = cholesky(A) print(L) L_sharded = uops.chol(pwex, A_sharded) L_sharded_local = L_sharded.numpy() print(L_sharded_local) print(L) print("L_{infty} difference ", np.max(np.abs(L_sharded_local - L))) assert (np.allclose(L, L_sharded_local)) os.system("rm -rf /dev/shm/*")
def test_if_static(self): X = np.random.randn(64, 64) shard_sizes = (int(X.shape[0]/8), X.shape[1]) X_sharded = BigMatrix("if_test", shape=X.shape, shard_sizes=shard_sizes, write_header=True) O_sharded = BigMatrix("if_test_output", shape=X.shape, shard_sizes=shard_sizes, write_header=True) X_sharded.free() shard_matrix(X_sharded, X) f = frontend.lpcompile(f1_if) p = f(X_sharded, O_sharded, X_sharded.num_blocks(0)) assert(p.starters == p.find_terminators()) for s, var_values in p.starters: if(var_values['i'] % 2 == 0): assert s == 0 else: assert s == 1
def test_cholesky_lambda(): X = np.random.randn(64, 64) A = X.dot(X.T) + np.eye(X.shape[0]) shard_size = 16 shard_sizes = (shard_size, shard_size) A_sharded = BigMatrix("cholesky_test_A", shape=A.shape, shard_sizes=shard_sizes, write_header=True) A_sharded.free() shard_matrix(A_sharded, A) program, meta = cholesky(A_sharded) futures = run_program_in_pywren(program) program.start() program.wait() program.free() L_sharded = meta["outputs"][0] L_npw = L_sharded.numpy() L = np.linalg.cholesky(A) assert(np.allclose(L_npw, L)) print("great success!")
def test_sharded_matrix_row_put_big(self): s = 2 X = np.arange(0, 2048 * 2048 * s).reshape(2048, 2048 * s) X_sharded = BigMatrix("row_put_test", shape=X.shape, shard_sizes=[2048, 2048]) t = time.time() matrix_utils.put_row(X_sharded, X, 0) e = time.time() print(X.shape) print("Effective GB/s", (2048 * 2048 * s * 8) / (1e9 * (e - t))) print("Upload Time", e - t) t = time.time() row_0 = matrix_utils.get_row(X_sharded, 0) e = time.time() X_sharded.free() os.system("rm -rf /dev/shm/*") assert (np.all(X == row_0))
def test_sharded_multiaxis(self): X = np.random.randn(8, 8, 8, 8) shard_sizes = tuple(map(int, np.array(X.shape)/2)) X_sharded = BigMatrix("multiaxis_2", shape=X.shape, shard_sizes=shard_sizes) shard_matrix(X_sharded, X) print("BLOCK_IDXS", X_sharded.block_idxs) X_sharded_local = X_sharded.numpy() print(X_sharded.free()) assert(np.all(X_sharded_local == X))
def test_single_shard_gemv(self): X = np.random.randn(16, 16) Y = np.random.randn(16) X_sharded = BigMatrix("gemv_test_0", shape=X.shape, shard_sizes=X.shape) Y_sharded = BigMatrix("gemv_test_2", shape=Y.shape, shard_sizes=Y.shape) shard_matrix(X_sharded, X) pwex = pywren.default_executor() XY_sharded = binops.gemv(pwex, X_sharded, Y_sharded, X_sharded.bucket, 1) XY_sharded_local = XY_sharded.numpy() XY = X.dot(Y) print(XY) print(XY_sharded_local) X_sharded.free() XY_sharded.free() assert (np.all(np.isclose(XY, XY_sharded_local)))
def test_multiple_shard_matrix_multiply_symmetric_2(self): X = np.random.randn(16, 16) shard_sizes = [8, 16] X_sharded = BigMatrix("gemm_test_1", shape=X.shape, shard_sizes=shard_sizes) shard_matrix(X_sharded, X) pwex = pywren.lambda_executor() XTX_sharded = binops.gemm(pwex, X_sharded.T, X_sharded, X_sharded.bucket, 1, local=True) XTX_sharded_local = XTX_sharded.numpy() XTX = X.T.dot(X) X_sharded.free() XTX_sharded.free() assert (np.all(np.isclose(XTX, XTX_sharded_local))) os.system("rm -rf /dev/shm/*")
def test_single_shard_matrix_multiply(self): fexec = lithops.FunctionExecutor(runtime='jsampe/numpy-lithops:04', log_level='DEBUG') X = np.random.randn(16, 16) X_sharded = BigMatrix("gemm_test_0", shape=X.shape, shard_sizes=X.shape, storage=fexec.storage) shard_matrix(X_sharded, X) XX_sharded = binops.gemm(fexec, X_sharded, X_sharded.T, X_sharded.bucket, 1) XX_sharded_local = XX_sharded.numpy() XX = X.dot(X.T) X_sharded.free() XX_sharded.free() assert (np.all(np.isclose(XX, XX_sharded_local))) os.system("rm -rf /dev/shm/*")
def test_multiple_shard_matrix_multiply(self): X = np.random.randn(16, 16) Y = np.random.randn(16, 16) shard_sizes = tuple(map(int, np.array(X.shape) / 2)) X_sharded = BigMatrix("gemm_test_1", shape=X.shape, shard_sizes=shard_sizes) Y_sharded = BigMatrix("gemm_test_2", shape=X.shape, shard_sizes=shard_sizes) shard_matrix(X_sharded, X) shard_matrix(Y_sharded, Y) pwex = pywren.lambda_executor() XY_sharded = binops.gemm(pwex, X_sharded, Y_sharded, X_sharded.bucket, 1) XY_sharded_local = XY_sharded.numpy() XY = X.dot(Y) X_sharded.free() Y_sharded.free() XY_sharded.free() assert (np.all(np.isclose(XY, XY_sharded_local))) os.system("rm -rf /dev/shm/*")
def test_multiple_shard_matrix_gemv(self): X = np.random.randn(16, 16) Y = np.random.randn(16, 1) shard_sizes_0 = tuple(map(int, np.array(X.shape) / 2)) shard_sizes_1 = (Y.shape[0], 1) X_sharded = BigMatrix("gemv_test_1", shape=X.shape, shard_sizes=shard_sizes_0) Y_sharded = BigMatrix("gemv_test_2", shape=Y.shape, shard_sizes=shard_sizes_1) shard_matrix(X_sharded, X) shard_matrix(Y_sharded, Y) pwex = pywren.default_executor() XY_sharded = binops.gemv(pwex, X_sharded, Y_sharded, X_sharded.bucket, 1) XY_sharded_local = XY_sharded.numpy() XY = X.dot(Y) X_sharded.free() Y_sharded.free() XY_sharded.free() assert (np.all(np.isclose(XY, XY_sharded_local)))
def test_cholesky_timeouts(): X = np.random.randn(64, 64) A = X.dot(X.T) + np.eye(X.shape[0]) shard_size = 8 shard_sizes = (shard_size, shard_size) A_sharded = BigMatrix("job_runner_test", shape=A.shape, shard_sizes=shard_sizes, write_header=True) A_sharded.free() shard_matrix(A_sharded, A) program, meta = cholesky(A_sharded) executor = fs.ProcessPoolExecutor(1) print("starting program") program.start() future = executor.submit(job_runner.lambdapack_run, program, timeout=10, idle_timeout=6) time.sleep(15) print("poop") assert (int(program.get_up()) == 0) program.free() print("great success!")
def test_cholesky_multi_repeats(): ''' Insert repeated instructions into PC queue avoid double increments ''' print("RUNNING MULTI") np.random.seed(1) size = 256 shard_size = 30 repeats = 15 total_repeats = 150 np.random.seed(2) print("Generating X") X = np.random.randn(size, 128) print("Generating A") A = X.dot(X.T) + size * np.eye(X.shape[0]) shard_sizes = (shard_size, shard_size) A_sharded = BigMatrix("cholesky_test_A_{0}".format(int(time.time())), shape=A.shape, shard_sizes=shard_sizes, write_header=True) A_sharded.free() shard_matrix(A_sharded, A) program, meta = cholesky(A_sharded) states = compiler.walk_program(program.program.remote_calls) L_sharded = meta["outputs"][0] L_sharded.free() pwex = pywren.default_executor() executor = pywren.lambda_executor config = npw.config.default() print("PROGRAM HASH", program.hash) cores = 1 program.start() jobs = [] for c in range(cores): p = mp.Process(target=job_runner.lambdapack_run, args=(program, ), kwargs={ 'timeout': 3600, 'pipeline_width': 3 }) jobs.append(p) p.start() np.random.seed(0) while (program.program_status() == lp.PS.RUNNING): sqs = boto3.resource('sqs', region_name=program.control_plane.region) time.sleep(0.5) waiting = 0 running = 0 for i, queue_url in enumerate(program.queue_urls): client = boto3.client('sqs') print("Priority {0}".format(i)) attrs = client.get_queue_attributes( QueueUrl=queue_url, AttributeNames=[ 'ApproximateNumberOfMessages', 'ApproximateNumberOfMessagesNotVisible' ])['Attributes'] print(attrs) waiting += int(attrs["ApproximateNumberOfMessages"]) running += int(attrs["ApproximateNumberOfMessagesNotVisible"]) print("SQS QUEUE STATUS Waiting {0}, Running {1}".format( waiting, running)) for i in range(repeats): p = program.get_progress() if (p is None): continue else: p = int(p) pc = int(np.random.choice(min(p, len(states)), 1)) node = states[pc] queue = sqs.Queue(program.queue_urls[0]) total_repeats -= 1 if (total_repeats > 0): print("Malicilously enqueueing node ", pc, node, total_repeats) queue.send_message(MessageBody=json.dumps(node)) time.sleep(1) #for p in jobs: # p.join() program.wait() program.free() L_npw = L_sharded.numpy() L = np.linalg.cholesky(A) z = np.argmax(np.abs(L - L_npw)) assert (np.allclose(L_npw, L))
def test_cholesky_multi_repeats(self): ''' Insert repeated instructions into PC queue avoid double increments ''' print("RUNNING MULTI") np.random.seed(1) size = 256 shard_size = 30 repeats = 15 total_repeats = 150 np.random.seed(2) print("Generating X") X = np.random.randn(size, 128) print("Generating A") A = X.dot(X.T) + size*np.eye(X.shape[0]) shard_sizes = (shard_size, shard_size) A_sharded = BigMatrix("cholesky_test_A_{0}".format( int(time.time())), shape=A.shape, shard_sizes=shard_sizes, write_header=True) A_sharded.free() shard_matrix(A_sharded, A) instructions, trailing, L_sharded = compiler._chol(A_sharded) all_nodes = instructions.unroll_program() L_sharded.free() pwex = pywren.default_executor() executor = pywren.lambda_executor config = npw.config.default() pywren_config = pwex.config program = lp.LambdaPackProgram( instructions, executor=executor, pywren_config=pywren_config, config=config, eager=True) print("PROGRAM HASH", program.hash) cores = 1 program.start() jobs = [] for c in range(cores): p = mp.Process(target=job_runner.lambdapack_run, args=( program,), kwargs={'timeout': 3600, 'pipeline_width': 5}) jobs.append(p) p.start() np.random.seed(0) try: while(program.program_status() == lp.PS.RUNNING): sqs = boto3.resource( 'sqs', region_name=program.control_plane.region) time.sleep(0.5) waiting = 0 running = 0 for i, queue_url in enumerate(program.queue_urls): client = boto3.client('sqs') print("Priority {0}".format(i)) attrs = client.get_queue_attributes(QueueUrl=queue_url, AttributeNames=[ 'ApproximateNumberOfMessages', 'ApproximateNumberOfMessagesNotVisible'])['Attributes'] print(attrs) waiting += int(attrs["ApproximateNumberOfMessages"]) running += int(attrs["ApproximateNumberOfMessagesNotVisible"]) print("SQS QUEUE STATUS Waiting {0}, Running {1}".format( waiting, running)) for i in range(repeats): p = program.get_progress() if (p is None): continue else: p = int(p) pc = int(np.random.choice(min(p, len(all_nodes)), 1)) node = all_nodes[pc] queue = sqs.Queue(program.queue_urls[0]) total_repeats -= 1 if (total_repeats > 0): print("Malicilously enqueueing node ", pc, node, total_repeats) queue.send_message(MessageBody=json.dumps(node)) time.sleep(1) # for p in jobs: # p.join() except: pass print("Program status") print(program.program_status()) for node in all_nodes: edge_sum = lp.get(program.control_plane.client, program._node_edge_sum_key(*node)) if (edge_sum == None): edge_sum = 0 edge_sum = int(edge_sum) parents = program.program.get_parents(*node) children = program.program.get_children(*node) indegree = len(parents) node_status = program.get_node_status(*node) redis_str = "Node: {0}, Edge Sum: {1}, Indegree: {2}, Node Status {3}".format( node, edge_sum, indegree, node_status) if (edge_sum != indegree): print(redis_str) for p in parents: p_status = program.get_node_status(*p) edge_key = program._edge_key(p[0], p[1], node[0], node[1]) edge_value = lp.get(program.control_plane.client, edge_key) child_str = "Parent Node: {0}, Parent Status: {1}, Edge Key: {2}".format( p, p_status, edge_value) print(child_str) #assert(edge_sum == indegree) program.free() L_npw = L_sharded.numpy() L = np.linalg.cholesky(A) z = np.argmax(np.abs(L - L_npw)) assert(np.allclose(L_npw, L))
def test_cholesky_multi_failures(self): ''' Insert repeated instructions into PC queue avoid double increments ''' print("RUNNING MULTI") np.random.seed(1) size = 256 shard_size = 64 failures = 4 np.random.seed(1) print("Generating X") X = np.random.randn(size, 128) print("Generating A") A = X.dot(X.T) + size*np.eye(X.shape[0]) shard_sizes = (shard_size, shard_size) A_sharded = BigMatrix("cholesky_test_A", shape=A.shape, shard_sizes=shard_sizes, write_header=True) A_sharded.free() shard_matrix(A_sharded, A) instructions, trailing, L_sharded = compiler._chol(A_sharded) pwex = pywren.default_executor() executor = pywren.lambda_executor pywren_config = pwex.config config = npw.config.default() program = lp.LambdaPackProgram( instructions, executor=executor, pywren_config=pywren_config, config=config, eager=False) cores = 16 program.start() jobs = [] for c in range(cores): p = mp.Process(target=job_runner.lambdapack_run, args=( program,), kwargs={'timeout': 3600, 'pipeline_width': 4}) jobs.append(p) p.start() np.random.seed(0) while(program.program_status() == lp.PS.RUNNING): sqs = boto3.resource( 'sqs', region_name=program.control_plane.region) waiting = 0 running = 0 for i, queue_url in enumerate(program.queue_urls): client = boto3.client('sqs') print("Priority {0}".format(i)) attrs = client.get_queue_attributes(QueueUrl=queue_url, AttributeNames=[ 'ApproximateNumberOfMessages', 'ApproximateNumberOfMessagesNotVisible'])['Attributes'] print(attrs) waiting += int(attrs["ApproximateNumberOfMessages"]) running += int(attrs["ApproximateNumberOfMessagesNotVisible"]) print("SQS QUEUE STATUS Waiting {0}, Running {1}".format( waiting, running)) time.sleep(10) if (np.random.random() > 0.65): for i in range(failures): core = int(np.random.choice(cores, 1)[0]) print("Maliciously Killing a job!") jobs[core].terminate() p = mp.Process(target=job_runner.lambdapack_run, args=( program,), kwargs={'timeout': 3600, 'pipeline_width': 4}) p.start() jobs[core] = p for p in jobs: p.join() print("Program status") print(program.program_status()) program.free() L_npw = L_sharded.numpy() L = np.linalg.cholesky(A) print(L_npw) print(L) print("MAX ", np.max(np.abs(L - L_npw))) assert(np.allclose(L_npw, L))