def main(args: argparse.Namespace): try: with open(args.file) as f: contents = f.read().splitlines() except FileNotFoundError: print(f"File {args.file} does not exist, exiting...") exit(1) day_module = importlib.import_module(f"day{args.day}") part_one_sol: TimedValue = timed(day_module.solve_part_one)( contents) #type: ignore part_two_sol: TimedValue = timed(day_module.solve_part_two)( contents) #type: ignore print( f'Part 1: {part_one_sol.value} ({part_one_sol.elapsed_time*1000:.3f}ms)' ) print( f'Part 2: {part_two_sol.value} ({part_two_sol.elapsed_time*1000:.3f}ms)' )
from multiprocessing.dummy import Pool as ThreadPool from common import timed NUM_WORKERS = 8 # four cores def fib(n): """Calculate the n-th element of Fibonacci.""" a, b = 1, 1 while n > 1: a, b = b, a + b n -= 1 return a timed(fib, 5000) ELEMENTS = [i * 1000 for i in range(1, NUM_WORKERS + 1)] def fibs(): """Calculate elements of Fibonacci in series.""" return [fib(n) for n in ELEMENTS] def fibp(pool): """Calculate elements of Fibonacci in parallel.""" return list(pool.map(fib, ELEMENTS)) with ThreadPool(NUM_WORKERS) as pool:
import socket from multiprocessing.dummy import Pool as ThreadPool from common import timed def get_random(n): sock = socket.create_connection(("::1", 8080)) sock.sendall(f"{n}\n".encode("utf-8")) resp = sock.recv(4096).decode("utf-8").strip() value = int(resp) return value print(f"got {get_random(20)}") timed(get_random, 20) def many_random(pool): inputs = [6] * 1024 results = list(pool.map(get_random, inputs)) with ThreadPool(32) as pool: timed(many_random, pool) with ThreadPool(64) as pool: timed(many_random, pool) with ThreadPool(128) as pool: timed(many_random, pool)
if __name__ == '__main__': argv = common.parse_flags() ffilename = FLAGS.parallel_corpus[0] efilename = FLAGS.parallel_corpus[1] afilename = FLAGS.parallel_corpus[2] ffile = open(ffilename) efile = open(efilename) afile = open(afilename) alignments = alignment.Alignment.reader_pharaoh(ffile, efile, afile) hgs = [] rule_dumper = RuleDumper() for i, a in enumerate(timed(select(alignments)), 1): a.write_visual(logger.file) #if i != 8: # continue #logger.writeln('--- %s ---' % i) #a.write_visual(logger.file) hg, a = phrase_decomposition_forest(a) hgs.append(hg) for node in hg.topo_order(): for edge in node.incoming: edge.rule = make_rule( [edge.head.fi, edge.head.fj, edge.head.ei, edge.head.ej], [[x.fi, x.fj, x.ei, x.ej] for x in edge.tail], a.fwords, a.ewords) #hg.show()
if flat_slice[j] < flat_slice[j + 1]: flat_slice[j], flat_slice[j + 1] = flat_slice[j + 1], flat_slice[j] if slice.size % 2 == 0: upper = int(slice.size / 2) lower = upper - 1 out[x, y, z] = (flat_slice[upper] + flat_slice[lower]) / 2 else: out[x, y, z] = flat_slice[slice.size // 2] im_array = np.array(Image.open("lab02img.bmp")).astype(np.uint8) with allocated_gpu() as gpu: threadsperblock = ( math.floor(math.sqrt(gpu.MAX_THREADS_PER_BLOCK / (2 * im_array.shape[2]))), math.floor(math.sqrt(gpu.MAX_THREADS_PER_BLOCK / (2 * im_array.shape[2]))), im_array.shape[2] ) blockspergrid = ( math.ceil(im_array.shape[0] / threadsperblock[0]), math.ceil(im_array.shape[1] / threadsperblock[1]) ) result = np.zeros(shape=(im_array.shape[0], im_array.shape[1], im_array.shape[2])) with cuda.profiling(), timed("on GPU"): calculate_gpu[blockspergrid, threadsperblock](im_array, result) Image.fromarray(result.astype(np.uint8), 'RGB').save('lab02result.bmp')
def calculate_gpu(A): i = cuda.grid(1) if i < A.size: tmp = 4 * A[i] ** 2 A[i] = tmp / (tmp - 1) def calculate_cpu(A): for i in range(A.size): tmp = 4 * A[i] ** 2 A[i] = tmp / (tmp - 1) with allocated_gpu() as gpu: threadsperblock = gpu.MAX_THREADS_PER_BLOCK blockspergrid = (N + (threadsperblock - 1)) // threadsperblock an_array = np.arange(N, dtype=np.float32) + 1 with cuda.profiling(), timed("on GPU"): calculate_gpu[blockspergrid, threadsperblock](an_array) print(np.prod(an_array)) an_array = np.arange(N, dtype=np.float32) + 1 with timed("on CPU"): calculate_cpu(an_array) with timed("to multiply"): print(np.prod(an_array))
A[i] = A[i] * A[i] * A[i] + A[i] * A[i] + A[i] def calculate_cpu(A): for i in range(A.size): A[i] = A[i] * A[i] * A[i] + A[i] * A[i] + A[i] with allocated_gpu() as gpu: threadsperblock = gpu.MAX_THREADS_PER_BLOCK blockspergrid = (N + (threadsperblock - 1)) // threadsperblock an_array = np.arange(N, dtype=np.float32) + 1 print(an_array[:10]) with timed("on GPU"): calculate_gpu[blockspergrid, threadsperblock](an_array) print(an_array[:10]) an_array = np.arange(N, dtype=np.float32) + 1 with timed("on CPU"): calculate_cpu(an_array) print(an_array[:10]) an_array = np.arange(N, dtype=np.float32) + 1 with cuda.profiling(): calculate_gpu[blockspergrid, threadsperblock](an_array)
import asyncio from common import timed async def get_random(n): r, w = await asyncio.open_connection("::1", 8080) w.write(f"{n}\n".encode("utf-8")) await w.drain() resp = await r.readline() value = int(resp.decode("utf-8").strip()) w.close() return value timed(get_random, 20) async def async_random(x): futs = [get_random(6) for _ in range(x)] results = await asyncio.gather(*futs) timed(async_random, 128) timed(async_random, 256) timed(async_random, 512) timed(async_random, 1024) # timed(async_random, 2048) # timed(async_random, 4096)