def create_send_recv_graph(): ax_a = ng.make_axis(length=10, name='A') ax_b = ng.make_axis(length=15, name='B') axes = ng.make_axes([ax_a, ax_b]) with ng.metadata(device=None, device_id=None, transformer=None, host_transformer=None): from_node = ng.placeholder(axes) to_node = ng.placeholder(axes) send_x = SendOp(from_node=from_node) recv_x = RecvOp(to_node=to_node, send_node=send_x) with ng.metadata(parallel=ax_a, device=None, device_id=None, transformer=None, host_transformer=None): x_plus_one = recv_x + 1 send_x_plus_one = SendOp(from_node=x_plus_one) recv_x_plus_one = RecvOp(to_node=to_node, send_node=send_x_plus_one) with ng.metadata(device=None, device_id=None, transformer=None, host_transformer=None): z = recv_x_plus_one + 2 return z, recv_x, recv_x_plus_one, send_x, x_plus_one, from_node, send_x_plus_one
def test_one_dot_bprop_allreduce(config): c = config pytest.xfail( "GPU child transformers generate errors during AssignLayouts graph pass #1651" ) H_axis = ng.make_axis(length=4, name='height') W_axis = ng.make_axis(length=6, name='width') with ng.metadata(step='input'): X = ng.placeholder(axes=[H_axis, W_axis]) target = ng.constant(1, axes=[W_axis]) with ng.metadata(device_id=c['device_id'], parallel=W_axis): W = ng.variable(axes=[H_axis], initial_value=UniformInit(1, 1)) dot = ng.dot(W, X) L = ng.squared_L2(target - dot, out_axes=()) grad = ng.deriv(L, W) grad.metadata['reduce_func'] = c['func'] update = (W - grad) with closing(ngt.make_transformer_factory('hetr')()) as hetr: out_comp = hetr.computation([update], X) result = out_comp(c['input']) np.testing.assert_array_equal(result, c['expected_result'])
def test_scatter_gather_graph(hetr_device): # Build the graph W = ng.make_axis(length=6, name='width') with ng.metadata(device=hetr_device, device_id='0'): x = ng.placeholder(()) z = ng.placeholder(()) with ng.metadata(device=hetr_device, device_id=('0', '1'), parallel=W): y = ng.placeholder(()) x_plus_z = x + z # Does not create a recv node x_plus_y = x + y # creates a gather recv node # Build the graph metadata graph_ops = OrderedSet([x, y, z, x_plus_z, x_plus_y]) graph_op_metadata = {op: list() for op in graph_ops} graph_op_metadata[x] = [hetr_device, '0'] graph_op_metadata[z] = [hetr_device, '0'] graph_op_metadata[y] = [hetr_device, ('0', '1')] graph_op_metadata[x_plus_z] = [hetr_device, '0'] graph_op_metadata[x_plus_y] = [hetr_device, '0'] check_device_assign_pass(hetr_device, "0", graph_op_metadata, graph_ops) check_communication_pass(ops_to_transform=graph_ops, expected_recv_nodes=[x_plus_y])
def test_scatter_gather_node_axes(config): t = config axes = ng.make_axes([ng.make_axis(length) for length in t['axes']]) parallel_axis = axes[t['parallel_axis']] hetr_axes = parallel_axis + (axes - parallel_axis) with ng.metadata(device=None, device_id='0', transformer='cpu0', host_transformer=None): from_node = ng.placeholder(axes=axes) to_node = ng.placeholder(axes=axes) with ng.metadata(device=None, device_id=t['device_id'], transformer=None, parallel=parallel_axis, host_transformer=None): par_node = ng.placeholder(axes=axes) scatter_send_op = ScatterSendOp(from_node=from_node, to_node=par_node) assert hetr_axes == scatter_send_op.axes assert t['slices'] == scatter_send_op.slices scatter_recv_op = ScatterRecvOp(to_node=par_node, send_node=scatter_send_op) for sct_a, a in zip(scatter_recv_op.axes, hetr_axes): assert sct_a.length == a.length gather_send_op = GatherSendOp(from_node=scatter_recv_op) assert_axes_eq_len(scatter_recv_op.axes, gather_send_op.axes) gather_recv_op = GatherRecvOp(from_node=par_node, to_node=to_node, send_node=gather_send_op) assert_axes_eq_len(hetr_axes, gather_recv_op.axes) assert t['slices'] == gather_recv_op.slices
def test_gpu_graph(config): pytest.xfail("Multi-GPU testing not enabled yet") if 'gpu' not in ngt.transformer_choices(): pytest.skip('GPUTransformer not available!') t = config with ng.metadata(device='gpu'): x = ng.placeholder(axes=t['axes']) with ng.metadata(device='gpu', device_id=t['device_id'], parallel=t['parallel_axis']): x_plus_one = x + 1 with ng.metadata(device='gpu'): x_plus_two = x_plus_one + 1 os.environ["HETR_SERVER_GPU_NUM"] = str(len(t['device_id'])) np_x = np.random.randint(100, size=t['axes'].full_lengths) with closing(ngt.make_transformer_factory('hetr')()) as transformer: computation = transformer.computation(x_plus_two, x) res = computation(np_x) np.testing.assert_array_equal(res, np_x + 2)
def test_gpu_send_and_recv(hetr_device): pytest.xfail( "GitHub issue: #2007, Unknown error - investigation is needed") # put x+1 on cpu numpy with ng.metadata(device='cpu'): x = ng.placeholder(()) x_plus_one = x + 1 # put x+2 on gpu numpy with ng.metadata(device='gpu'): x_plus_two = x_plus_one + 1 with ExecutorFactory() as ex: computation = ex.executor(x_plus_two, x) for i in [10, 20, 30]: assert computation(i) == i + 2 # put x+1 on gpu numpy with ng.metadata(device='gpu'): x = ng.placeholder(()) x_plus_one = x + 1 # put x+2 on cpu numpy with ng.metadata(device='cpu'): x_plus_two = x_plus_one + 1 with ExecutorFactory() as ex: computation = ex.executor(x_plus_two, x) for i in [10, 20, 30]: assert computation(i) == i + 2
def test_gpu_send_and_recv(): # First check whether do we have gputransformer available, if not, xfail if 'gpu' not in transformer_choices(): pytest.skip("GPUTransformer not available") # put x+1 on cpu numpy with ng.metadata(device='numpy'): x = ng.placeholder(()) x_plus_one = x + 1 # put x+2 on gpu numpy with ng.metadata(device='gpu'): x_plus_two = x_plus_one + 1 check_result_values(input_vector=[10, 20, 30], result_expected=[(12), (22), (32)], placeholder=x, ops=OrderedSet([x_plus_two])) # put x+1 on gpu numpy with ng.metadata(device='gpu'): x = ng.placeholder(()) x_plus_one = x + 1 # put x+2 on cpu numpy with ng.metadata(device='numpy'): x_plus_two = x_plus_one + 1 check_result_values(input_vector=[10, 20, 30], result_expected=[(12), (22), (32)], placeholder=x, ops=OrderedSet([x_plus_two]))
def test_gpu_send_and_recv(): # put x+1 on cpu numpy with ng.metadata(device='cpu'): x = ng.placeholder(()) x_plus_one = x + 1 # put x+2 on gpu numpy with ng.metadata(device='gpu'): x_plus_two = x_plus_one + 1 with ExecutorFactory() as ex: computation = ex.executor(x_plus_two, x) for i in [10, 20, 30]: assert computation(i) == i + 2 # put x+1 on gpu numpy with ng.metadata(device='gpu'): x = ng.placeholder(()) x_plus_one = x + 1 # put x+2 on cpu numpy with ng.metadata(device='cpu'): x_plus_two = x_plus_one + 1 with ExecutorFactory() as ex: computation = ex.executor(x_plus_two, x) for i in [10, 20, 30]: assert computation(i) == i + 2
def __call__(self, in_obj, init_state=None): """ Sets shape based parameters of this layer given an input tuple or int or input layer. Arguments: in_obj (int, tuple, Layer or Tensor): object that provides shape information for layer init_state (Tensor or list): object that provides initial state Returns: if sum_out or concat_out - rnn_out (Tensor): output otherwise - rnn_out (list of Tensors): list of length 2 """ if isinstance(in_obj, collections.Sequence): if len(in_obj) != 2: raise ValueError("If in_obj is a sequence, it must have length 2") if in_obj[0].axes != in_obj[1].axes: raise ValueError("If in_obj is a sequence, each element must have the same axes") fwd_in = in_obj[0] bwd_in = in_obj[1] else: fwd_in = in_obj bwd_in = in_obj if isinstance(init_state, collections.Sequence): if len(init_state) != 2: raise ValueError("If init_state is a sequence, it must have length 2") if init_state[0].axes != init_state[1].axes: raise ValueError("If init_state is a sequence, " + "each element must have the same axes") fwd_init = init_state[0] bwd_init = init_state[1] else: fwd_init = init_state bwd_init = init_state with ng.metadata(direction="fwd"): fwd_out = self.fwd_rnn(fwd_in, fwd_init) with ng.metadata(direction="bwd"): bwd_out = ng.cast_role(self.bwd_rnn(bwd_in, bwd_init), fwd_out.axes) if self.sum_out: return fwd_out + bwd_out elif self.concat_out: ax = fwd_out.axes.feature_axes() if len(ax) == 1: ax = ax[0] else: raise ValueError(("Multiple hidden axes: {}. " "Unable to concatenate automatically").format(ax)) return ng.concat_along_axis([fwd_out, bwd_out], ax) else: return fwd_out, bwd_out
def test_comm_path_exists(): axes = ng.make_axes([ng.make_axis(length=10, name='A'), ng.make_axis(length=15, name='B')]) with ng.metadata(device=None, device_id=None, transformer=None, host_transformer=None): from_node = ng.placeholder(axes) to_node = ng.placeholder(axes) send_x = SendOp(from_node=from_node) recv_x = RecvOp(to_node=to_node, send_node=send_x) with ng.metadata(device=None, device_id=None, transformer=None, host_transformer=None): x_plus_one = recv_x + 1 assert comm_path_exists(recv_x, send_x) assert comm_path_exists(x_plus_one, send_x)
def test_allreduce_hint(hetr_device, config): if hetr_device == 'gpu': if 'gpu' not in ngt.transformer_choices(): pytest.skip("GPUTransformer not available") input = config['input'] device_id = config['device_id'] axis_A = ng.make_axis(length=4, name='axis_A') parallel_axis = ng.make_axis(name='axis_parallel', length=16) with ng.metadata(device=hetr_device, device_id=device_id, parallel=parallel_axis): var_A = ng.variable(axes=[axis_A], initial_value=UniformInit(1, 1)) var_B = ng.variable(axes=[axis_A], initial_value=UniformInit(input, input)) var_B.metadata['reduce_func'] = 'sum' var_B_mean = var_B / len(device_id) var_minus = (var_A - var_B_mean) with closing(ngt.make_transformer_factory('hetr', device=hetr_device)()) as hetr: out_comp = hetr.computation(var_minus) result = out_comp() np_result = np.full((axis_A.length), config['expected_result'], np.float32) np.testing.assert_array_equal(result, np_result)
def scope_ops(name=None, mode=None, subgraph=None, metadata=None): """ All ops created within the context manager will be added to a subgraph Arguments: name (str): variable scope to use for all created ops mode (str): mode (e.g. "inference", "training") to annotate on all created ops subgraph (SubGraph): subgraph instance to add ops to. If not provided, one will be created metadata (dict): a dictionary of metadata to add to all created ops Yields: instance of SubGraph """ if subgraph is None: subgraph = SubGraph() if metadata is None: metadata = dict() if mode is not None: metadata["mode"] = mode with name_scope(name=name, reuse_scope=True): with ng.Op.all_ops() as ops: with ng.metadata(**metadata): yield (subgraph) subgraph.ops.extend(ops)
def test_hetr_graph_passes(): # Build the graph with ng.metadata(device_id='1'): x = ng.placeholder(()) y = ng.placeholder(()) x_plus_y = x + y # Build the graph metadata graph_ops = OrderedSet([x_plus_y, x, y]) graph_op_metadata = {op: list() for op in graph_ops} graph_op_metadata[x] = ["numpy", '1'] graph_op_metadata[y] = ["numpy", '0'] graph_op_metadata[x_plus_y] = ["numpy", '0'] transformer_list = ["numpy1", "numpy0"] # Run the hetr passes one by one, and verify they did the expected things to the graph check_device_assign_pass("numpy", "0", graph_op_metadata, graph_ops) check_communication_pass(ops_to_transform=graph_ops, expected_recv_nodes=[x_plus_y]) # Check if the hetr pass (childTransfromer pass) generates the expected transformer list obj = ChildTransformerPass([]) transformer = ngt.make_transformer_factory('hetr')() obj.do_pass(graph_ops, transformer) transformer.close() assert set(transformer_list) == set(obj.transformer_list)
def get_mini_resnet(inputs, dataset, device, device_id, stage_depth=1, batch_norm=False, activation=True, preprocess=False): en_bottleneck = False num_resnet_mods = 0 if dataset == 'i1k': ax.Y.length = 1000 if stage_depth > 34: en_bottleneck = True if dataset == 'cifar10': ax.Y.length = 10 num_resnet_mods = (stage_depth - 2) // 6 model = BuildResnet(dataset, stage_depth, en_bottleneck, num_resnet_mods, batch_norm=batch_norm) with ng.metadata(device=device, device_id=device_id, parallel=ax.N): model_out = model(inputs['image']) return model_out
def __init__(self, inputs, stage_depth, batch_norm=True, activation=True, preprocess=True): nfms = [ 2**(stage + 4) for stage in sorted(list(range(3)) * stage_depth) ] strides = [ 1 if cur == prev else 2 for cur, prev in zip(nfms[1:], nfms[:-1]) ] layers = [] if preprocess: layers = Preprocess(functor=cifar_mean_subtract) parallel_axis = inputs['image'].axes.batch_axes() with ng.metadata(device_id=('1', '2'), parallel=parallel_axis[0]): layers.append( Convolution(**conv_params(3, 16, batch_norm=batch_norm))) layers.append(f_module(nfms[0], first=True)) for nfm, stride in zip(nfms[1:], strides): layers.append(f_module(nfm, strides=stride)) if batch_norm: layers.append(BatchNorm()) if activation: layers.append(Activation(Rectlin())) layers.append(Pool2D(8, strides=2, op='avg')) layers.append( Affine(axes=ax.Y, weight_init=KaimingInit(), batch_norm=batch_norm, activation=Softmax())) self.layers = layers
def test_gpu_graph(config): t = config with ng.metadata(device='gpu'): x = ng.placeholder(axes=t['axes']) with ng.metadata(device='gpu', device_id=t['device_id'], parallel=t['parallel_axis']): x_plus_one = x + 1 with ng.metadata(device='gpu'): x_plus_two = x_plus_one + 1 np_x = np.random.randint(100, size=t['axes'].full_lengths) with ExecutorFactory() as ex: computation = ex.executor(x_plus_two, x) res = computation(np_x) np.testing.assert_array_equal(res, np_x + 2)
def test_distributed_dot_parallel_second_axis(hetr_device): if hetr_device == 'gpu': pytest.xfail( "Axes Layout needs to be fixed for GPUs after changes to make\ parallel_axis the least contiguous axis for scatter/gather communication ops" ) H = ng.make_axis(length=6, name='height') N = ng.make_axis(length=8, name='batch') W1 = ng.make_axis(length=2, name='W1') W2 = ng.make_axis(length=4, name='W2') x = ng.placeholder(axes=[H, N]) w2 = ng.placeholder(axes=[W2, W1]) with ng.metadata(device=hetr_device, device_id=('0', '1'), parallel=N): w1 = ng.placeholder(axes=[W1, H]) dot1 = ng.dot(w1, x).named("dot1") dot2 = ng.dot(w2, dot1).named("dot2") np_x = np.random.randint(100, size=[H.length, N.length]) np_w1 = np.random.randint(100, size=[W1.length, H.length]) np_w2 = np.random.randint(100, size=[W2.length, W1.length]) with closing(ngt.make_transformer_factory( 'hetr', device=hetr_device)()) as transformer: computation = transformer.computation([dot2, dot1], x, w1, w2) res2, res1 = computation(np_x, np_w1, np_w2) np.testing.assert_array_equal(res1, np.dot(np_w1, np_x)) np.testing.assert_array_equal(res2, np.dot(np_w2, np.dot(np_w1, np_x))) computation2 = transformer.computation([dot1, dot2], x, w1, w2) res1, res2 = computation2(np_x, np_w1, np_w2) np.testing.assert_array_equal(res1, np.dot(np_w1, np_x)) np.testing.assert_array_equal(res2, np.dot(np_w2, np.dot(np_w1, np_x)))
def test_distributed_plus_one(hetr_device, config): device_id = config['device_id'] axes = config['axes'] parallel_axis = config['parallel_axis'] with ng.metadata(device=hetr_device): x = ng.placeholder(axes=axes) with ng.metadata(device_id=device_id, parallel=parallel_axis): x_plus_one = x + 1 np_x = np.random.randint(100, size=axes.lengths) with closing(ngt.make_transformer_factory( 'hetr', device=hetr_device)()) as transformer: computation = transformer.computation(x_plus_one, x) res = computation(np_x) np.testing.assert_array_equal(res, np_x + 1)
def test_to_and_from_device(hetr_device, config): axes = config['axes'] with ng.metadata(device=hetr_device): x = ng.placeholder(axes=axes) if axes else ng.placeholder(()) with ng.metadata(device_id='1'): x_plus_one = x + 1 x_plus_two = x_plus_one * 2 np_x = np.random.randint(100, size=axes.lengths) if axes else random.random() with closing(ngt.make_transformer_factory( 'hetr', device=hetr_device)()) as transformer: computation = transformer.computation([x_plus_one, x_plus_two], x) res = computation(np_x) np.testing.assert_allclose(res[0], np_x + 1.0) np.testing.assert_allclose(res[1], (np_x + 1.0) * 2.0)
def test_allreduce_hint_gpu(config): pytest.xfail("Multi-GPU testing not enabled yet") if 'gpu' not in ngt.transformer_choices(): pytest.skip("GPUTransformer not available") c = config os.environ["HETR_SERVER_GPU_NUM"] = str(len(c['device_id'])) ax_A_length = 32 ax_B_length = 16 np_result = [np.full((ax_A_length, ax_B_length), c['expected_result'], np.float32)] parallel_axis = ng.make_axis(name='axis_parallel', length=16) with ng.metadata(device_id=c['device_id'], parallel=parallel_axis): axis_A = ng.make_axis(length=ax_A_length, name='axis_A') axis_B = ng.make_axis(length=ax_B_length, name='axis_B') var_A = ng.variable(axes=[axis_A], initial_value=UniformInit(1, 1)).named('var_A') var_B = ng.variable(initial_value=UniformInit(c['input'], c['input']), axes=[axis_B]).named('var_B') var_B.metadata['reduce_func'] = c['func'] var_minus = (var_A - var_B).named('var_minus') with closing(ngt.make_transformer_factory('hetr', device='gpu')()) as hetr: out_comp = hetr.computation([var_minus]).named('out_comp') result = out_comp() np.testing.assert_array_equal(result, np_result)
def test_reduce_vector(hetr_device): """ A whole vector is produced on each worker and should be reduced before being returned, but not along its axes since it does not have the parallel axis in its axes """ if hetr_device == 'gpu': pytest.xfail("broadcast communication ops not yet supported on gpus") H = ng.make_axis(length=4, name='height') N = ng.make_axis(length=8, name='batch') weight = ng.make_axis(length=2, name='weight') x = ng.placeholder(axes=[N, H]) w = ng.placeholder(axes=[H, weight]) with ng.metadata(device=hetr_device, device_id=('0', '1'), parallel=N): dot = ng.dot(x, w) out = ng.sum(dot, N) np_x = np.random.randint(100, size=[N.length, H.length]) np_weight = np.random.randint(100, size=[H.length, weight.length]) with closing(ngt.make_transformer_factory( 'hetr', device=hetr_device)()) as transformer: computation = transformer.computation(out, x, w) res = computation(np_x, np_weight) # TODO should the reduce infer a sum or mean? expected = np.sum(np.dot(np_x, np_weight), 0) / 2. np.testing.assert_array_equal(res, expected)
def train_outputs(self, in_obj, init_state=None): """ Sets shape based parameters of this layer given an input tuple or int or input layer. Arguments: in_obj (int, tuple, Layer or Tensor): object that provides shape information for layer init_state (Tensor): object that provides initial state Returns: rnn_out (Tensor): output """ # try to understand the axes from the input self.interpret_axes(in_obj, init_state) # initialize the hidden states if init_state is not None: self.h_init = init_state else: if self.reset_cells: self.h_init = ng.constant( const=0, axes=self.hidden_state_axes).named('h_init') else: self.h_init = ng.variable( initial_value=0, axes=self.hidden_state_axes).named('h_init') self.W_input = ng.variable(axes=self.w_in_axes, initial_value=self.init).named("W_in") self.W_recur = ng.variable(axes=self.w_re_axes, initial_value=self.init_inner).named("W_re") self.b = ng.variable(axes=self.hidden_axes, initial_value=0).named("bias") h = self.h_init h_list = [] # slice the inputs into time slices in_s = get_steps(in_obj, self.recurrent_axis, self.backward) # unrolling computations for i in range(self.recurrent_axis.length): with ng.metadata(recurrent_step=str(i)): h = self._step(in_s[i], h) h_list.append(h) if self.return_sequence is True: # only when returning a sequence, need to reverse the output h_list = h_list[::-1] if self.backward else h_list rnn_out = ng.stack(h_list, self.recurrent_axis, pos=self.recurrent_axis_idx) else: rnn_out = h_list[-1] return rnn_out
def train_outputs(self, in_obj): """ Sets shape based parameters of this layer given an input tuple or int or input layer. Arguments: in_obj (int, tuple, Layer or Tensor): object that provides shape information for layer Returns: (Tensor): output """ in_axes = in_obj.axes self.time_axis = in_axes.recurrent_axes()[0] def get_steps(x, time_axis): return [ ng.slice_along_axis(x, time_axis, i) for i in range(time_axis.length) ] if self.axes is not None: hidden_axes = self.axes - self.axes.recurrent_axes() else: hidden_axes = ng.make_axes( [ng.make_axis(self.nout).named('Hidden_in')]) w_in_axes = hidden_axes + [ axis - 1 for axis in in_axes.sample_axes() - in_axes.recurrent_axes() ] w_re_axes = hidden_axes + [axis - 1 for axis in hidden_axes] self.W_input = ng.variable(axes=w_in_axes, initial_value=self.init( w_in_axes.lengths)).named("W_in") self.W_recur = ng.variable(axes=w_re_axes, initial_value=self.init_inner( w_re_axes.lengths)).named("W_re") self.b = ng.variable(axes=hidden_axes, initial_value=0).named("bias") h_ff_buf = ng.dot(self.W_input, in_obj).named("W_in_dot_in") h_ff_s = get_steps(h_ff_buf, self.time_axis) self.h_init = ng.constant(np.zeros(h_ff_s[0].axes.lengths), axes=h_ff_s[0].axes).named('h_init') hprev = [self.h_init] for i in range(self.time_axis.length): with ng.metadata(recurrent_step=str(i)): d = ng.dot(self.W_recur, hprev[i]).named("W_rec_dot_h{}".format(i)) h = self.activation(d + h_ff_s[i] + self.b) h.name = "activ{}".format(i) hprev.append(h) rnn_out = ng.stack(hprev[1:], self.time_axis, pos=1) return rnn_out
def test_get_layouts(config): test_transformer = ngt.make_transformer_factory('gpu')() t = config with ng.metadata(parallel=t['parallel_axis']): test_ops = [ GPUCudaScatterSendOp( TensorValueOp(ng.placeholder(t['axes']), metadata=dict(device='gpu', device_id='0', parallel=t['parallel_axis'], transformer='gpu0', host_transformer=None)), ng.Op(metadata=dict(device='gpu', device_id=('0', '1'), parallel=t['parallel_axis'], transformer=['gpu0', 'gpu1'], host_transformer=None)) ), GPUCudaScatterRecvOp( ng.Op(metadata=dict(device='gpu', device_id=('0', '1'), parallel=t['parallel_axis'], transformer=['gpu0', 'gpu1'], host_transformer=None)), GPUCudaScatterSendOp( TensorValueOp(ng.placeholder(t['axes']), metadata=dict(device='gpu', device_id='0', parallel=t['parallel_axis'], transformer='gpu0', host_transformer=None)), ng.Op(metadata=dict(device='gpu', device_id=('0', '1'), parallel=t['parallel_axis'], transformer=['gpu0', 'gpu1'], host_transformer=None)) ) ), GPUCudaGatherRecvOp( ng.Op(metadata=dict(device='gpu', device_id=('0', '1'), parallel=t['parallel_axis'], transformer=['gpu0', 'gpu1'], host_transformer=None)), ng.Op(metadata=dict(device='gpu', device_id='0', parallel=t['parallel_axis'], transformer='gpu0', host_transformer=None)), GPUCudaScatterSendOp( TensorValueOp(ng.placeholder(t['axes']), metadata=dict(device='gpu', device_id='0', parallel=t['parallel_axis'], transformer='gpu0', host_transformer=None)), ng.Op(metadata=dict(device='gpu', device_id=('0', '1'), parallel=t['parallel_axis'], transformer=['gpu0', 'gpu1'], host_transformer=None)) ) ), GPUCudaGatherSendOp( TensorValueOp(ng.placeholder(t['axes']), metadata=dict(device='gpu', device_id='0', transformer='gpu0', host_transformer=None, parallel=t['parallel_axis'])) ), GPUCudaAllReduceOp( input_node=TensorValueOp(ng.placeholder(t['axes']), metadata=dict(device='gpu', device_id='0', transformer='gpu0', host_transformer=None, parallel=t['parallel_axis'])), func='sum' ) ] test_layouts = [] for op in test_ops: test_layouts.append(test_transformer.get_layouts(op)[0].axes) np.testing.assert_array_equal(test_layouts, t['expected_layouts'])
def test_singleton_device_id(hetr_device): with ng.metadata(device_id=(['1'])): x = ng.placeholder(()) graph_ops = OrderedSet([x]) graph_op_metadata = {op: list() for op in graph_ops} graph_op_metadata[x] = [hetr_device, '1'] check_device_assign_pass(hetr_device, "0", graph_op_metadata, graph_ops)
def test_from_device(transformer_factory): with ng.metadata(device_id='1'): x = ng.placeholder(()) x_plus_one = x + 1 with ExecutorFactory() as ex: computation = ex.executor(x_plus_one, x) for i in [10, 20, 30]: assert computation(i) == i + 1
def test_singleton_device_id(transformer_factory): with ng.metadata(device_id=(['1'])): x = ng.placeholder(()) graph_ops = OrderedSet([x]) graph_op_metadata = {op: list() for op in graph_ops} graph_op_metadata[x] = ["cpu", '1'] check_device_assign_pass("cpu", "0", graph_op_metadata, graph_ops)
def test_to_and_from_device(): x = ng.placeholder(()) with ng.metadata(device_id='1'): x_plus_one = x + 1 x_plus_two = x_plus_one + 1 with ExecutorFactory() as ex: computation = ex.executor(x_plus_two, x) for i in [10, 20, 30]: assert computation(i) == i + 2
def test_computation_return_list(transformer_factory): with ng.metadata(device_id='1'): x = ng.placeholder(()) x_plus_one = x + 1 x_plus_two = x + 2 x_mul_three = x * 3 with ExecutorFactory() as ex: computation = ex.executor([x_plus_one, x_plus_two, x_mul_three], x) for i in [10, 20, 30]: assert computation(i) == (i + 1, i + 2, i * 3)
def test_process_leak(transformer_factory): baseline = active_children() with ng.metadata(device_id=('2')): x = ng.constant(2) assert len(active_children()) == 0 with ExecutorFactory() as ex: comp = ex.executor(x) assert len(active_children()) == 1 comp() assert len(active_children()) == 2 assert len(active_children()) == len(baseline)