def gpu_dag_transfers(dag): """ Edges/jobs for moving data from gpu version dag to cpu version >>> dag = {c: {'fn': dot 'args': (a, b)}} >>> gpu_dag_transfers(dag) {c: {'fn': HostFromGpu(), 'args': (gpu_c,)}, gpu_a: {'fn': GpuFromHost(), 'args': (a, )}, gpu_b: {'fn': GpuFromHost(), 'args': (b, )}} """ recv_inputs = { cpu_to_gpu_var(var)[0].clone(): { 'fn': GpuFromHost(), 'args': (var, ) } for var in inputs_of(dag) if isinstance(var, theano.Variable) } send_outputs = { var: { 'fn': HostFromGpu(), 'args': (cpu_to_gpu_var(var)[0].clone(), ) } for var in outputs_of(dag) if isinstance(var, theano.Variable) } return merge(recv_inputs, send_outputs)
def gpu_dag(dag): """ The GPU version of a CPU dag - including gpu communication """ nc_dag, sent, recvd = non_comm_dag(dag) recvs = { cpu_to_gpu_var(var)[0].clone(): { 'fn': GpuFromHost(), 'args': (var, ) } for var, it in dag.items() if isrecv(it['fn']) } sends = { it['args'][0]: { 'fn': HostFromGpu(), 'args': (cpu_to_gpu_var(it['args'][0])[0].clone(), ) } for _, it in dag.items() if issend(it['fn']) } def gpu_item((k, v)): i, op, o = v['args'], v['fn'], (k, ) gi, gop, go = gpu_job(i, op, o) return (go[0], {'fn': gop, 'args': gi}) gdag = dict(map(gpu_item, nc_dag.items())) return merge(gdag, recvs, sends)
def _comm_dag(): x = theano.tensor.matrix('x') y = x + x; y.name = 'y' dag, inputs, outputs = dicdag.theano.theano_graph_to_dag((x,), (y,)) recv = {x: {'fn': ("recv", "A"), 'args':()}} send = {'t_y': {'fn': ("send", "A"), 'args': (y,)}} comm_dag = merge(dag, send, recv) return dag, comm_dag, inputs, outputs
def merge_dags(dags): """ Merge dags - remove send/recvs between them input: dags - dict mapping {machine: dag} output Just a single dag """ dag = merge(*dags.values()) return {k: v for k,v in dag.items() if not (issend(v['fn']) and v['fn'][1] in dags) and not (isrecv(v['fn']) and v['fn'][1] in dags)}
def merge_gpu_dags(dags, machines): is_gpu = lambda m : machines[m]['type'] == 'gpu' host = lambda gpu_name: machines[gpu_name]['host'] gpu_dags = {m for m in dags if is_gpu(m)} merge_dags = {host(g): merge_cpu_gpu_dags(host(g), dags[host(g)], g,dags[g]) for g in intersection(machines, dags) if is_gpu(g)} old_dags = {m: dags[m] for m in dags if not is_gpu(m) and not m in merge_dags} new_dags = merge(merge_dags, old_dags) return new_dags
def commtime_dict(network, *args, **kwargs): """ Estimate communicaiton times within a network Currently supported types: 'mpi', 'togpu', 'fromgpu' inputs: network - dict like {(A, B): {'type': 'mpi'}} outputs: network - dict like {(A, B): {'type': 'mpi', 'intercept':1, 'slope':2}} """ networks = [fn(network, *args, **kwargs) for fn in commtime_dict_fns] return merge(*networks)
def test_start_end_jobs(): x = theano.tensor.matrix('x') y = theano.tensor.dot(x, x); y.name = 'y' dag, dinputs, doutputs = dicdag.theano.theano_graph_to_dag((x,), (y,)) (dx,) = dinputs (dy,) = doutputs assert dx.name == x.name assert dy.name == y.name dag2 = merge(start_jobs(dinputs), end_jobs(doutputs), dag) assert dy in dag2 assert any(len(v['args'])==1 and v['args'][0] == dy for v in dag2.values()) unidag = dicdag.unidag.dag_to_unidag(dag2)
def merge_dags(dags): """ Merge dags - remove send/recvs between them input: dags - dict mapping {machine: dag} output Just a single dag """ dag = merge(*dags.values()) return { k: v for k, v in dag.items() if not (issend(v['fn']) and v['fn'][1] in dags) and not (isrecv(v['fn']) and v['fn'][1] in dags) }
def merge_cpu_gpu_dags(cpu_name, cdag, gpu_name, gdag): """ Merge a cpu and gpu dag - convert the gpu dag first """ if any((issend(v['fn']) or isrecv(v['fn'])) and v['fn'][1] != cpu_name for v in gdag.values()): raise Exception("The GPU wants to communicate to someone who isn't the" " host. We haven't yet built this functionality. TODO") dag = merge_dags({cpu_name: cdag, gpu_name: gpu_dag(gdag), "trans" : gpu_dag_transfers(gdag)}) result = unify_by_name(dag, tuple(variables(merge(cdag, non_comm_dag(gdag)[0])))) if any(not isinstance(x, str) and 'gpu' in x.name for x in inputs_of(result).union(outputs_of(result))): raise Exception("GPU inputs/outputs") return result
def test_start_end_jobs(): x = theano.tensor.matrix('x') y = theano.tensor.dot(x, x) y.name = 'y' dag, dinputs, doutputs = dicdag.theano.theano_graph_to_dag((x, ), (y, )) (dx, ) = dinputs (dy, ) = doutputs assert dx.name == x.name assert dy.name == y.name dag2 = merge(start_jobs(dinputs), end_jobs(doutputs), dag) assert dy in dag2 assert any( len(v['args']) == 1 and v['args'][0] == dy for v in dag2.values()) unidag = dicdag.unidag.dag_to_unidag(dag2)
def commtime_dict_mpi(network, nbytes=[10, 100, 1000, 10000]): """ inputs network - dict like {(A, B): {'type': 'mpi'}} nbytes - iterable of byte counts outputs network - dict like {(A, B): {'type': 'mpi', 'intercept':1, 'slope':2}} """ # TODO: This is incorrect. We're assuming that the network is a clique hosts = set(host for (send, recv) in network for host in (send, recv) if network[send, recv]['type'] is 'mpi') performance = model_dict_group(comm_times_group(nbytes, hosts)) # inject new information into network dict return {key: merge(network[key], performance[key]) for key in performance}
def merge_gpu_dags(dags, machines): is_gpu = lambda m: machines[m]['type'] == 'gpu' host = lambda gpu_name: machines[gpu_name]['host'] gpu_dags = {m for m in dags if is_gpu(m)} merge_dags = { host(g): merge_cpu_gpu_dags(host(g), dags[host(g)], g, dags[g]) for g in intersection(machines, dags) if is_gpu(g) } old_dags = { m: dags[m] for m in dags if not is_gpu(m) and not m in merge_dags } new_dags = merge(merge_dags, old_dags) return new_dags
def merge_cpu_gpu_dags(cpu_name, cdag, gpu_name, gdag): """ Merge a cpu and gpu dag - convert the gpu dag first """ if any((issend(v['fn']) or isrecv(v['fn'])) and v['fn'][1] != cpu_name for v in gdag.values()): raise Exception("The GPU wants to communicate to someone who isn't the" " host. We haven't yet built this functionality. TODO") dag = merge_dags({ cpu_name: cdag, gpu_name: gpu_dag(gdag), "trans": gpu_dag_transfers(gdag) }) result = unify_by_name( dag, tuple(variables(merge(cdag, non_comm_dag(gdag)[0])))) if any(not isinstance(x, str) and 'gpu' in x.name for x in inputs_of(result).union(outputs_of(result))): raise Exception("GPU inputs/outputs") return result
def gpu_dag_transfers(dag): """ Edges/jobs for moving data from gpu version dag to cpu version >>> dag = {c: {'fn': dot 'args': (a, b)}} >>> gpu_dag_transfers(dag) {c: {'fn': HostFromGpu(), 'args': (gpu_c,)}, gpu_a: {'fn': GpuFromHost(), 'args': (a, )}, gpu_b: {'fn': GpuFromHost(), 'args': (b, )}} """ recv_inputs = {cpu_to_gpu_var(var)[0].clone(): {'fn': GpuFromHost(), 'args': (var,)} for var in inputs_of(dag) if isinstance(var, theano.Variable)} send_outputs = {var: {'fn': HostFromGpu(), 'args': (cpu_to_gpu_var(var)[0].clone(),)} for var in outputs_of(dag) if isinstance(var, theano.Variable)} return merge(recv_inputs, send_outputs)
def gpu_dag(dag): """ The GPU version of a CPU dag - including gpu communication """ nc_dag, sent, recvd = non_comm_dag(dag) recvs = {cpu_to_gpu_var(var)[0].clone(): {'fn': GpuFromHost(), 'args': (var,)} for var, it in dag.items() if isrecv(it['fn'])} sends = {it['args'][0]: {'fn': HostFromGpu(), 'args':(cpu_to_gpu_var(it['args'][0])[0].clone(),)} for _, it in dag.items() if issend(it['fn'])} def gpu_item((k, v)): i, op, o = v['args'], v['fn'], (k,) gi, gop, go = gpu_job(i, op, o) return (go[0], {'fn': gop, 'args': gi}) gdag = dict(map(gpu_item, nc_dag.items())) return merge(gdag, recvs, sends)
def comptime_dict(i, o, input_shapes, niter, machines, machine_groups=None): """ Estimate record average computation times of tasks in a graph inputs: i - a theano.FunctionGraph describing the computation o - a theano.FunctionGraph describing the computation input_shapes - a dict {var: (shape)} for each input variable niter - the number of times to run each computation machines - a list of machines on which to run each computation machine_groups - an iterable of sets of identical machines - only a representative of each set will be used outputs: A dict mapping {{set-of-identical-machines}: {apply-node : runtime}} See Also: make_runtime_function - converts the output of this function into a - callable function """ dicts = (fn(i, o, input_shapes, niter, machines, machine_groups) for fn in comptime_dict_fns) return merge(*dicts)
def test_merge(): d = {1:2, 3:4} e = {4:5} assert merge(d, e) == {1:2, 3:4, 4:5}
def commtime_dict_togpu(network, nbytes=[10, 100, 1000, 10000]): return {(host, gpu): merge(network[host, gpu], model_from_values(comm_times_togpu(nbytes, host))) for host, gpu in network if network[host, gpu]['type'] == 'togpu'}
def test_merge(): d = {1: 2, 3: 4} e = {4: 5} assert merge(d, e) == {1: 2, 3: 4, 4: 5}
def commtime_dict_fromgpu(network, nbytes=[10, 100, 1000, 10000]): return {(gpu, host): merge(network[gpu, host], model_from_values(comm_times_fromgpu(nbytes, host))) for gpu, host in network if network[gpu, host]['type'] == 'fromgpu'}
from ape.util import merge machine_groups = (('ankaa.cs.uchicago.edu','mimosa.cs.uchicago.edu'), ('baconost.cs.uchicago.edu',), ('baconost.cs.uchicago.edu-gpu',)) cpu_machines = {a:{'type':'cpu'} for group in machine_groups for a in group if 'gpu' not in a} gpu_machines = {a:{'type':'gpu', 'host':a.replace('-gpu', '')} for group in machine_groups for a in group if 'gpu' in a} machines = merge(cpu_machines, gpu_machines) mpi_network = {(a,b):{'type':'mpi'} for a in machines for b in machines if a!=b if 'gpu' not in a and 'gpu' not in b} gpu_network = {('baconost.cs.uchicago.edu', 'baconost.cs.uchicago.edu-gpu'): {'type':'togpu'}, ('baconost.cs.uchicago.edu-gpu', 'baconost.cs.uchicago.edu'): {'type':'fromgpu'}} network = merge(mpi_network, gpu_network)