예제 #1
0
def create_send_recv_graph():
    ax_a = ng.make_axis(length=10, name='A')
    ax_b = ng.make_axis(length=15, name='B')
    axes = ng.make_axes([ax_a, ax_b])

    with ng.metadata(device=None,
                     device_id=None,
                     transformer=None,
                     host_transformer=None):
        from_node = ng.placeholder(axes)
        to_node = ng.placeholder(axes)
    send_x = SendOp(from_node=from_node)
    recv_x = RecvOp(to_node=to_node, send_node=send_x)

    with ng.metadata(parallel=ax_a,
                     device=None,
                     device_id=None,
                     transformer=None,
                     host_transformer=None):
        x_plus_one = recv_x + 1

    send_x_plus_one = SendOp(from_node=x_plus_one)
    recv_x_plus_one = RecvOp(to_node=to_node, send_node=send_x_plus_one)

    with ng.metadata(device=None,
                     device_id=None,
                     transformer=None,
                     host_transformer=None):
        z = recv_x_plus_one + 2
    return z, recv_x, recv_x_plus_one, send_x, x_plus_one, from_node, send_x_plus_one
예제 #2
0
def test_one_dot_bprop_allreduce(config):
    c = config

    pytest.xfail(
        "GPU child transformers generate errors during AssignLayouts graph pass #1651"
    )

    H_axis = ng.make_axis(length=4, name='height')
    W_axis = ng.make_axis(length=6, name='width')
    with ng.metadata(step='input'):
        X = ng.placeholder(axes=[H_axis, W_axis])
        target = ng.constant(1, axes=[W_axis])
    with ng.metadata(device_id=c['device_id'], parallel=W_axis):
        W = ng.variable(axes=[H_axis], initial_value=UniformInit(1, 1))
        dot = ng.dot(W, X)
        L = ng.squared_L2(target - dot, out_axes=())
        grad = ng.deriv(L, W)
        grad.metadata['reduce_func'] = c['func']
        update = (W - grad)

    with closing(ngt.make_transformer_factory('hetr')()) as hetr:
        out_comp = hetr.computation([update], X)
        result = out_comp(c['input'])

        np.testing.assert_array_equal(result, c['expected_result'])
예제 #3
0
def test_scatter_gather_graph(hetr_device):
    # Build the graph
    W = ng.make_axis(length=6, name='width')

    with ng.metadata(device=hetr_device, device_id='0'):
        x = ng.placeholder(())
        z = ng.placeholder(())

    with ng.metadata(device=hetr_device, device_id=('0', '1'), parallel=W):
        y = ng.placeholder(())

    x_plus_z = x + z  # Does not create a recv node
    x_plus_y = x + y  # creates a gather recv node

    # Build the graph metadata
    graph_ops = OrderedSet([x, y, z, x_plus_z, x_plus_y])

    graph_op_metadata = {op: list() for op in graph_ops}
    graph_op_metadata[x] = [hetr_device, '0']
    graph_op_metadata[z] = [hetr_device, '0']
    graph_op_metadata[y] = [hetr_device, ('0', '1')]
    graph_op_metadata[x_plus_z] = [hetr_device, '0']
    graph_op_metadata[x_plus_y] = [hetr_device, '0']

    check_device_assign_pass(hetr_device, "0", graph_op_metadata, graph_ops)

    check_communication_pass(ops_to_transform=graph_ops,
                             expected_recv_nodes=[x_plus_y])
예제 #4
0
def test_scatter_gather_node_axes(config):
    t = config
    axes = ng.make_axes([ng.make_axis(length) for length in t['axes']])
    parallel_axis = axes[t['parallel_axis']]
    hetr_axes = parallel_axis + (axes - parallel_axis)
    with ng.metadata(device=None, device_id='0', transformer='cpu0', host_transformer=None):
        from_node = ng.placeholder(axes=axes)
        to_node = ng.placeholder(axes=axes)

    with ng.metadata(device=None, device_id=t['device_id'], transformer=None,
                     parallel=parallel_axis, host_transformer=None):
        par_node = ng.placeholder(axes=axes)

    scatter_send_op = ScatterSendOp(from_node=from_node,
                                    to_node=par_node)
    assert hetr_axes == scatter_send_op.axes
    assert t['slices'] == scatter_send_op.slices

    scatter_recv_op = ScatterRecvOp(to_node=par_node,
                                    send_node=scatter_send_op)

    for sct_a, a in zip(scatter_recv_op.axes, hetr_axes):
        assert sct_a.length == a.length

    gather_send_op = GatherSendOp(from_node=scatter_recv_op)
    assert_axes_eq_len(scatter_recv_op.axes, gather_send_op.axes)

    gather_recv_op = GatherRecvOp(from_node=par_node,
                                  to_node=to_node,
                                  send_node=gather_send_op)
    assert_axes_eq_len(hetr_axes, gather_recv_op.axes)

    assert t['slices'] == gather_recv_op.slices
예제 #5
0
def test_gpu_graph(config):
    pytest.xfail("Multi-GPU testing not enabled yet")

    if 'gpu' not in ngt.transformer_choices():
        pytest.skip('GPUTransformer not available!')

    t = config
    with ng.metadata(device='gpu'):
        x = ng.placeholder(axes=t['axes'])

    with ng.metadata(device='gpu',
                     device_id=t['device_id'],
                     parallel=t['parallel_axis']):
        x_plus_one = x + 1

    with ng.metadata(device='gpu'):
        x_plus_two = x_plus_one + 1

    os.environ["HETR_SERVER_GPU_NUM"] = str(len(t['device_id']))

    np_x = np.random.randint(100, size=t['axes'].full_lengths)
    with closing(ngt.make_transformer_factory('hetr')()) as transformer:
        computation = transformer.computation(x_plus_two, x)
        res = computation(np_x)
        np.testing.assert_array_equal(res, np_x + 2)
예제 #6
0
def test_gpu_send_and_recv(hetr_device):
    pytest.xfail(
        "GitHub issue: #2007, Unknown error - investigation is needed")
    # put x+1 on cpu numpy
    with ng.metadata(device='cpu'):
        x = ng.placeholder(())
        x_plus_one = x + 1
    # put x+2 on gpu numpy
    with ng.metadata(device='gpu'):
        x_plus_two = x_plus_one + 1

    with ExecutorFactory() as ex:
        computation = ex.executor(x_plus_two, x)
        for i in [10, 20, 30]:
            assert computation(i) == i + 2

    # put x+1 on gpu numpy
    with ng.metadata(device='gpu'):
        x = ng.placeholder(())
        x_plus_one = x + 1
    # put x+2 on cpu numpy
    with ng.metadata(device='cpu'):
        x_plus_two = x_plus_one + 1

    with ExecutorFactory() as ex:
        computation = ex.executor(x_plus_two, x)
        for i in [10, 20, 30]:
            assert computation(i) == i + 2
예제 #7
0
def test_gpu_send_and_recv():
    # First check whether do we have gputransformer available, if not, xfail
    if 'gpu' not in transformer_choices():
        pytest.skip("GPUTransformer not available")

    # put x+1 on cpu numpy
    with ng.metadata(device='numpy'):
        x = ng.placeholder(())
        x_plus_one = x + 1
    # put x+2 on gpu numpy
    with ng.metadata(device='gpu'):
        x_plus_two = x_plus_one + 1

    check_result_values(input_vector=[10, 20, 30],
                        result_expected=[(12), (22), (32)],
                        placeholder=x,
                        ops=OrderedSet([x_plus_two]))

    # put x+1 on gpu numpy
    with ng.metadata(device='gpu'):
        x = ng.placeholder(())
        x_plus_one = x + 1
    # put x+2 on cpu numpy
    with ng.metadata(device='numpy'):
        x_plus_two = x_plus_one + 1

    check_result_values(input_vector=[10, 20, 30],
                        result_expected=[(12), (22), (32)],
                        placeholder=x,
                        ops=OrderedSet([x_plus_two]))
예제 #8
0
def test_gpu_send_and_recv():
    # put x+1 on cpu numpy
    with ng.metadata(device='cpu'):
        x = ng.placeholder(())
        x_plus_one = x + 1
    # put x+2 on gpu numpy
    with ng.metadata(device='gpu'):
        x_plus_two = x_plus_one + 1

    with ExecutorFactory() as ex:
        computation = ex.executor(x_plus_two, x)
        for i in [10, 20, 30]:
            assert computation(i) == i + 2

    # put x+1 on gpu numpy
    with ng.metadata(device='gpu'):
        x = ng.placeholder(())
        x_plus_one = x + 1
    # put x+2 on cpu numpy
    with ng.metadata(device='cpu'):
        x_plus_two = x_plus_one + 1

    with ExecutorFactory() as ex:
        computation = ex.executor(x_plus_two, x)
        for i in [10, 20, 30]:
            assert computation(i) == i + 2
예제 #9
0
    def __call__(self, in_obj, init_state=None):
        """
        Sets shape based parameters of this layer given an input tuple or int
        or input layer.

        Arguments:
            in_obj (int, tuple, Layer or Tensor): object that provides shape
                                                 information for layer
            init_state (Tensor or list): object that provides initial state

        Returns:
            if sum_out or concat_out - rnn_out (Tensor): output
            otherwise - rnn_out (list of Tensors): list of length 2

        """
        if isinstance(in_obj, collections.Sequence):
            if len(in_obj) != 2:
                raise ValueError("If in_obj is a sequence, it must have length 2")
            if in_obj[0].axes != in_obj[1].axes:
                raise ValueError("If in_obj is a sequence, each element must have the same axes")
            fwd_in = in_obj[0]
            bwd_in = in_obj[1]
        else:
            fwd_in = in_obj
            bwd_in = in_obj

        if isinstance(init_state, collections.Sequence):
            if len(init_state) != 2:
                raise ValueError("If init_state is a sequence, it must have length 2")
            if init_state[0].axes != init_state[1].axes:
                raise ValueError("If init_state is a sequence, " +
                                 "each element must have the same axes")
            fwd_init = init_state[0]
            bwd_init = init_state[1]
        else:
            fwd_init = init_state
            bwd_init = init_state

        with ng.metadata(direction="fwd"):
            fwd_out = self.fwd_rnn(fwd_in, fwd_init)
        with ng.metadata(direction="bwd"):
            bwd_out = ng.cast_role(self.bwd_rnn(bwd_in, bwd_init), fwd_out.axes)

        if self.sum_out:
            return fwd_out + bwd_out
        elif self.concat_out:
            ax = fwd_out.axes.feature_axes()
            if len(ax) == 1:
                ax = ax[0]
            else:
                raise ValueError(("Multiple hidden axes: {}. "
                                  "Unable to concatenate automatically").format(ax))
            return ng.concat_along_axis([fwd_out, bwd_out], ax)
        else:
            return fwd_out, bwd_out
예제 #10
0
def test_comm_path_exists():
    axes = ng.make_axes([ng.make_axis(length=10, name='A'), ng.make_axis(length=15, name='B')])
    with ng.metadata(device=None, device_id=None, transformer=None, host_transformer=None):
        from_node = ng.placeholder(axes)
        to_node = ng.placeholder(axes)
    send_x = SendOp(from_node=from_node)
    recv_x = RecvOp(to_node=to_node, send_node=send_x)

    with ng.metadata(device=None, device_id=None, transformer=None, host_transformer=None):
        x_plus_one = recv_x + 1

    assert comm_path_exists(recv_x, send_x)
    assert comm_path_exists(x_plus_one, send_x)
예제 #11
0
def test_allreduce_hint(hetr_device, config):
    if hetr_device == 'gpu':
        if 'gpu' not in ngt.transformer_choices():
            pytest.skip("GPUTransformer not available")

    input = config['input']
    device_id = config['device_id']
    axis_A = ng.make_axis(length=4, name='axis_A')
    parallel_axis = ng.make_axis(name='axis_parallel', length=16)

    with ng.metadata(device=hetr_device,
                     device_id=device_id,
                     parallel=parallel_axis):
        var_A = ng.variable(axes=[axis_A], initial_value=UniformInit(1, 1))
        var_B = ng.variable(axes=[axis_A],
                            initial_value=UniformInit(input, input))
        var_B.metadata['reduce_func'] = 'sum'
        var_B_mean = var_B / len(device_id)
        var_minus = (var_A - var_B_mean)

    with closing(ngt.make_transformer_factory('hetr',
                                              device=hetr_device)()) as hetr:
        out_comp = hetr.computation(var_minus)
        result = out_comp()
        np_result = np.full((axis_A.length), config['expected_result'],
                            np.float32)
        np.testing.assert_array_equal(result, np_result)
예제 #12
0
def scope_ops(name=None, mode=None, subgraph=None, metadata=None):
    """
    All ops created within the context manager will be added to a subgraph

    Arguments:
        name (str): variable scope to use for all created ops
        mode (str): mode (e.g. "inference", "training") to annotate on all created ops
        subgraph (SubGraph): subgraph instance to add ops to. If not provided, one will be created
        metadata (dict): a dictionary of metadata to add to all created ops

    Yields:
        instance of SubGraph
    """
    if subgraph is None:
        subgraph = SubGraph()

    if metadata is None:
        metadata = dict()

    if mode is not None:
        metadata["mode"] = mode

    with name_scope(name=name, reuse_scope=True):
        with ng.Op.all_ops() as ops:
            with ng.metadata(**metadata):
                yield (subgraph)

    subgraph.ops.extend(ops)
예제 #13
0
def test_hetr_graph_passes():

    # Build the graph
    with ng.metadata(device_id='1'):
        x = ng.placeholder(())

    y = ng.placeholder(())
    x_plus_y = x + y

    # Build the graph metadata
    graph_ops = OrderedSet([x_plus_y, x, y])

    graph_op_metadata = {op: list() for op in graph_ops}
    graph_op_metadata[x] = ["numpy", '1']
    graph_op_metadata[y] = ["numpy", '0']
    graph_op_metadata[x_plus_y] = ["numpy", '0']

    transformer_list = ["numpy1", "numpy0"]

    # Run the hetr passes one by one, and verify they did the expected things to the graph
    check_device_assign_pass("numpy", "0", graph_op_metadata, graph_ops)
    check_communication_pass(ops_to_transform=graph_ops,
                             expected_recv_nodes=[x_plus_y])

    # Check if the hetr pass (childTransfromer pass) generates the expected transformer list
    obj = ChildTransformerPass([])
    transformer = ngt.make_transformer_factory('hetr')()
    obj.do_pass(graph_ops, transformer)
    transformer.close()
    assert set(transformer_list) == set(obj.transformer_list)
예제 #14
0
def get_mini_resnet(inputs,
                    dataset,
                    device,
                    device_id,
                    stage_depth=1,
                    batch_norm=False,
                    activation=True,
                    preprocess=False):
    en_bottleneck = False
    num_resnet_mods = 0
    if dataset == 'i1k':
        ax.Y.length = 1000
        if stage_depth > 34:
            en_bottleneck = True
    if dataset == 'cifar10':
        ax.Y.length = 10
        num_resnet_mods = (stage_depth - 2) // 6
    model = BuildResnet(dataset,
                        stage_depth,
                        en_bottleneck,
                        num_resnet_mods,
                        batch_norm=batch_norm)
    with ng.metadata(device=device, device_id=device_id, parallel=ax.N):
        model_out = model(inputs['image'])
    return model_out
예제 #15
0
    def __init__(self,
                 inputs,
                 stage_depth,
                 batch_norm=True,
                 activation=True,
                 preprocess=True):
        nfms = [
            2**(stage + 4) for stage in sorted(list(range(3)) * stage_depth)
        ]
        strides = [
            1 if cur == prev else 2 for cur, prev in zip(nfms[1:], nfms[:-1])
        ]
        layers = []
        if preprocess:
            layers = Preprocess(functor=cifar_mean_subtract)
        parallel_axis = inputs['image'].axes.batch_axes()
        with ng.metadata(device_id=('1', '2'), parallel=parallel_axis[0]):
            layers.append(
                Convolution(**conv_params(3, 16, batch_norm=batch_norm)))
            layers.append(f_module(nfms[0], first=True))

            for nfm, stride in zip(nfms[1:], strides):
                layers.append(f_module(nfm, strides=stride))

        if batch_norm:
            layers.append(BatchNorm())
        if activation:
            layers.append(Activation(Rectlin()))
        layers.append(Pool2D(8, strides=2, op='avg'))
        layers.append(
            Affine(axes=ax.Y,
                   weight_init=KaimingInit(),
                   batch_norm=batch_norm,
                   activation=Softmax()))
        self.layers = layers
예제 #16
0
def test_gpu_graph(config):
    t = config
    with ng.metadata(device='gpu'):
        x = ng.placeholder(axes=t['axes'])

    with ng.metadata(device='gpu', device_id=t['device_id'], parallel=t['parallel_axis']):
        x_plus_one = x + 1

    with ng.metadata(device='gpu'):
        x_plus_two = x_plus_one + 1

    np_x = np.random.randint(100, size=t['axes'].full_lengths)
    with ExecutorFactory() as ex:
        computation = ex.executor(x_plus_two, x)
        res = computation(np_x)
        np.testing.assert_array_equal(res, np_x + 2)
예제 #17
0
def test_distributed_dot_parallel_second_axis(hetr_device):
    if hetr_device == 'gpu':
        pytest.xfail(
            "Axes Layout needs to be fixed for GPUs after changes to make\
        parallel_axis the least contiguous axis for scatter/gather communication ops"
        )

    H = ng.make_axis(length=6, name='height')
    N = ng.make_axis(length=8, name='batch')
    W1 = ng.make_axis(length=2, name='W1')
    W2 = ng.make_axis(length=4, name='W2')
    x = ng.placeholder(axes=[H, N])
    w2 = ng.placeholder(axes=[W2, W1])
    with ng.metadata(device=hetr_device, device_id=('0', '1'), parallel=N):
        w1 = ng.placeholder(axes=[W1, H])
        dot1 = ng.dot(w1, x).named("dot1")
    dot2 = ng.dot(w2, dot1).named("dot2")

    np_x = np.random.randint(100, size=[H.length, N.length])
    np_w1 = np.random.randint(100, size=[W1.length, H.length])
    np_w2 = np.random.randint(100, size=[W2.length, W1.length])
    with closing(ngt.make_transformer_factory(
            'hetr', device=hetr_device)()) as transformer:
        computation = transformer.computation([dot2, dot1], x, w1, w2)
        res2, res1 = computation(np_x, np_w1, np_w2)
        np.testing.assert_array_equal(res1, np.dot(np_w1, np_x))
        np.testing.assert_array_equal(res2, np.dot(np_w2, np.dot(np_w1, np_x)))

        computation2 = transformer.computation([dot1, dot2], x, w1, w2)
        res1, res2 = computation2(np_x, np_w1, np_w2)
        np.testing.assert_array_equal(res1, np.dot(np_w1, np_x))
        np.testing.assert_array_equal(res2, np.dot(np_w2, np.dot(np_w1, np_x)))
예제 #18
0
def test_distributed_plus_one(hetr_device, config):
    device_id = config['device_id']
    axes = config['axes']
    parallel_axis = config['parallel_axis']

    with ng.metadata(device=hetr_device):
        x = ng.placeholder(axes=axes)
        with ng.metadata(device_id=device_id, parallel=parallel_axis):
            x_plus_one = x + 1

    np_x = np.random.randint(100, size=axes.lengths)
    with closing(ngt.make_transformer_factory(
            'hetr', device=hetr_device)()) as transformer:
        computation = transformer.computation(x_plus_one, x)
        res = computation(np_x)
        np.testing.assert_array_equal(res, np_x + 1)
예제 #19
0
def test_to_and_from_device(hetr_device, config):
    axes = config['axes']
    with ng.metadata(device=hetr_device):
        x = ng.placeholder(axes=axes) if axes else ng.placeholder(())
        with ng.metadata(device_id='1'):
            x_plus_one = x + 1
        x_plus_two = x_plus_one * 2

    np_x = np.random.randint(100,
                             size=axes.lengths) if axes else random.random()
    with closing(ngt.make_transformer_factory(
            'hetr', device=hetr_device)()) as transformer:
        computation = transformer.computation([x_plus_one, x_plus_two], x)
        res = computation(np_x)
        np.testing.assert_allclose(res[0], np_x + 1.0)
        np.testing.assert_allclose(res[1], (np_x + 1.0) * 2.0)
예제 #20
0
def test_allreduce_hint_gpu(config):
    pytest.xfail("Multi-GPU testing not enabled yet")

    if 'gpu' not in ngt.transformer_choices():
        pytest.skip("GPUTransformer not available")

    c = config
    os.environ["HETR_SERVER_GPU_NUM"] = str(len(c['device_id']))

    ax_A_length = 32
    ax_B_length = 16

    np_result = [np.full((ax_A_length, ax_B_length), c['expected_result'], np.float32)]
    parallel_axis = ng.make_axis(name='axis_parallel', length=16)
    with ng.metadata(device_id=c['device_id'], parallel=parallel_axis):
        axis_A = ng.make_axis(length=ax_A_length, name='axis_A')
        axis_B = ng.make_axis(length=ax_B_length, name='axis_B')
        var_A = ng.variable(axes=[axis_A], initial_value=UniformInit(1, 1)).named('var_A')
        var_B = ng.variable(initial_value=UniformInit(c['input'], c['input']),
                            axes=[axis_B]).named('var_B')
        var_B.metadata['reduce_func'] = c['func']
        var_minus = (var_A - var_B).named('var_minus')
    with closing(ngt.make_transformer_factory('hetr', device='gpu')()) as hetr:
        out_comp = hetr.computation([var_minus]).named('out_comp')
        result = out_comp()
        np.testing.assert_array_equal(result, np_result)
예제 #21
0
def test_reduce_vector(hetr_device):
    """
    A whole vector is produced on each worker and should be reduced
    before being returned, but not along its axes since it
    does not have the parallel axis in its axes
    """
    if hetr_device == 'gpu':
        pytest.xfail("broadcast communication ops not yet supported on gpus")

    H = ng.make_axis(length=4, name='height')
    N = ng.make_axis(length=8, name='batch')
    weight = ng.make_axis(length=2, name='weight')
    x = ng.placeholder(axes=[N, H])
    w = ng.placeholder(axes=[H, weight])
    with ng.metadata(device=hetr_device, device_id=('0', '1'), parallel=N):
        dot = ng.dot(x, w)
        out = ng.sum(dot, N)

    np_x = np.random.randint(100, size=[N.length, H.length])
    np_weight = np.random.randint(100, size=[H.length, weight.length])
    with closing(ngt.make_transformer_factory(
            'hetr', device=hetr_device)()) as transformer:
        computation = transformer.computation(out, x, w)
        res = computation(np_x, np_weight)
        # TODO should the reduce infer a sum or mean?
        expected = np.sum(np.dot(np_x, np_weight), 0) / 2.
        np.testing.assert_array_equal(res, expected)
예제 #22
0
파일: layer.py 프로젝트: kkasravi/ngraph
    def train_outputs(self, in_obj, init_state=None):
        """
        Sets shape based parameters of this layer given an input tuple or int
        or input layer.

        Arguments:
            in_obj (int, tuple, Layer or Tensor): object that provides shape
                                                 information for layer
            init_state (Tensor): object that provides initial state

        Returns:
            rnn_out (Tensor): output

        """
        # try to understand the axes from the input

        self.interpret_axes(in_obj, init_state)

        # initialize the hidden states
        if init_state is not None:
            self.h_init = init_state
        else:
            if self.reset_cells:
                self.h_init = ng.constant(
                    const=0, axes=self.hidden_state_axes).named('h_init')
            else:
                self.h_init = ng.variable(
                    initial_value=0,
                    axes=self.hidden_state_axes).named('h_init')

        self.W_input = ng.variable(axes=self.w_in_axes,
                                   initial_value=self.init).named("W_in")
        self.W_recur = ng.variable(axes=self.w_re_axes,
                                   initial_value=self.init_inner).named("W_re")
        self.b = ng.variable(axes=self.hidden_axes,
                             initial_value=0).named("bias")

        h = self.h_init
        h_list = []

        # slice the inputs into time slices
        in_s = get_steps(in_obj, self.recurrent_axis, self.backward)

        # unrolling computations
        for i in range(self.recurrent_axis.length):
            with ng.metadata(recurrent_step=str(i)):
                h = self._step(in_s[i], h)
                h_list.append(h)

        if self.return_sequence is True:
            # only when returning a sequence, need to reverse the output
            h_list = h_list[::-1] if self.backward else h_list
            rnn_out = ng.stack(h_list,
                               self.recurrent_axis,
                               pos=self.recurrent_axis_idx)
        else:
            rnn_out = h_list[-1]

        return rnn_out
예제 #23
0
파일: layer.py 프로젝트: wanjinchang/ngraph
    def train_outputs(self, in_obj):
        """
        Sets shape based parameters of this layer given an input tuple or int
        or input layer.

        Arguments:
           in_obj (int, tuple, Layer or Tensor): object that provides shape
                                                 information for layer

        Returns:
           (Tensor): output

        """
        in_axes = in_obj.axes
        self.time_axis = in_axes.recurrent_axes()[0]

        def get_steps(x, time_axis):
            return [
                ng.slice_along_axis(x, time_axis, i)
                for i in range(time_axis.length)
            ]

        if self.axes is not None:
            hidden_axes = self.axes - self.axes.recurrent_axes()
        else:
            hidden_axes = ng.make_axes(
                [ng.make_axis(self.nout).named('Hidden_in')])

        w_in_axes = hidden_axes + [
            axis - 1
            for axis in in_axes.sample_axes() - in_axes.recurrent_axes()
        ]
        w_re_axes = hidden_axes + [axis - 1 for axis in hidden_axes]

        self.W_input = ng.variable(axes=w_in_axes,
                                   initial_value=self.init(
                                       w_in_axes.lengths)).named("W_in")
        self.W_recur = ng.variable(axes=w_re_axes,
                                   initial_value=self.init_inner(
                                       w_re_axes.lengths)).named("W_re")
        self.b = ng.variable(axes=hidden_axes, initial_value=0).named("bias")

        h_ff_buf = ng.dot(self.W_input, in_obj).named("W_in_dot_in")
        h_ff_s = get_steps(h_ff_buf, self.time_axis)
        self.h_init = ng.constant(np.zeros(h_ff_s[0].axes.lengths),
                                  axes=h_ff_s[0].axes).named('h_init')

        hprev = [self.h_init]

        for i in range(self.time_axis.length):
            with ng.metadata(recurrent_step=str(i)):
                d = ng.dot(self.W_recur,
                           hprev[i]).named("W_rec_dot_h{}".format(i))
                h = self.activation(d + h_ff_s[i] + self.b)
                h.name = "activ{}".format(i)
                hprev.append(h)

        rnn_out = ng.stack(hprev[1:], self.time_axis, pos=1)
        return rnn_out
예제 #24
0
def test_get_layouts(config):
    test_transformer = ngt.make_transformer_factory('gpu')()

    t = config
    with ng.metadata(parallel=t['parallel_axis']):
        test_ops = [
            GPUCudaScatterSendOp(
                TensorValueOp(ng.placeholder(t['axes']), metadata=dict(device='gpu',
                              device_id='0', parallel=t['parallel_axis'],
                              transformer='gpu0', host_transformer=None)),
                ng.Op(metadata=dict(device='gpu', device_id=('0', '1'),
                      parallel=t['parallel_axis'], transformer=['gpu0', 'gpu1'],
                      host_transformer=None))
            ),
            GPUCudaScatterRecvOp(
                ng.Op(metadata=dict(device='gpu', device_id=('0', '1'),
                      parallel=t['parallel_axis'], transformer=['gpu0', 'gpu1'],
                      host_transformer=None)),
                GPUCudaScatterSendOp(
                    TensorValueOp(ng.placeholder(t['axes']), metadata=dict(device='gpu',
                                  device_id='0', parallel=t['parallel_axis'],
                                  transformer='gpu0', host_transformer=None)),
                    ng.Op(metadata=dict(device='gpu', device_id=('0', '1'),
                          parallel=t['parallel_axis'], transformer=['gpu0', 'gpu1'],
                          host_transformer=None))
                )
            ),
            GPUCudaGatherRecvOp(
                ng.Op(metadata=dict(device='gpu', device_id=('0', '1'),
                                    parallel=t['parallel_axis'], transformer=['gpu0', 'gpu1'],
                                    host_transformer=None)),
                ng.Op(metadata=dict(device='gpu', device_id='0', parallel=t['parallel_axis'],
                      transformer='gpu0', host_transformer=None)),
                GPUCudaScatterSendOp(
                    TensorValueOp(ng.placeholder(t['axes']), metadata=dict(device='gpu',
                                  device_id='0', parallel=t['parallel_axis'],
                                  transformer='gpu0', host_transformer=None)),
                    ng.Op(metadata=dict(device='gpu', device_id=('0', '1'),
                          parallel=t['parallel_axis'], transformer=['gpu0', 'gpu1'],
                          host_transformer=None))
                )
            ),
            GPUCudaGatherSendOp(
                TensorValueOp(ng.placeholder(t['axes']), metadata=dict(device='gpu',
                              device_id='0', transformer='gpu0',
                              host_transformer=None, parallel=t['parallel_axis']))
            ),
            GPUCudaAllReduceOp(
                input_node=TensorValueOp(ng.placeholder(t['axes']), metadata=dict(device='gpu',
                                         device_id='0', transformer='gpu0', host_transformer=None,
                                         parallel=t['parallel_axis'])),
                func='sum'
            )
        ]
    test_layouts = []
    for op in test_ops:
        test_layouts.append(test_transformer.get_layouts(op)[0].axes)
    np.testing.assert_array_equal(test_layouts, t['expected_layouts'])
예제 #25
0
def test_singleton_device_id(hetr_device):
    with ng.metadata(device_id=(['1'])):
        x = ng.placeholder(())
    graph_ops = OrderedSet([x])

    graph_op_metadata = {op: list() for op in graph_ops}
    graph_op_metadata[x] = [hetr_device, '1']

    check_device_assign_pass(hetr_device, "0", graph_op_metadata, graph_ops)
예제 #26
0
def test_from_device(transformer_factory):
    with ng.metadata(device_id='1'):
        x = ng.placeholder(())
    x_plus_one = x + 1

    with ExecutorFactory() as ex:
        computation = ex.executor(x_plus_one, x)
        for i in [10, 20, 30]:
            assert computation(i) == i + 1
예제 #27
0
def test_singleton_device_id(transformer_factory):
    with ng.metadata(device_id=(['1'])):
        x = ng.placeholder(())
    graph_ops = OrderedSet([x])

    graph_op_metadata = {op: list() for op in graph_ops}
    graph_op_metadata[x] = ["cpu", '1']

    check_device_assign_pass("cpu", "0", graph_op_metadata, graph_ops)
예제 #28
0
def test_to_and_from_device():
    x = ng.placeholder(())
    with ng.metadata(device_id='1'):
        x_plus_one = x + 1
    x_plus_two = x_plus_one + 1

    with ExecutorFactory() as ex:
        computation = ex.executor(x_plus_two, x)
        for i in [10, 20, 30]:
            assert computation(i) == i + 2
예제 #29
0
def test_computation_return_list(transformer_factory):
    with ng.metadata(device_id='1'):
        x = ng.placeholder(())
    x_plus_one = x + 1
    x_plus_two = x + 2
    x_mul_three = x * 3

    with ExecutorFactory() as ex:
        computation = ex.executor([x_plus_one, x_plus_two, x_mul_three], x)
        for i in [10, 20, 30]:
            assert computation(i) == (i + 1, i + 2, i * 3)
예제 #30
0
def test_process_leak(transformer_factory):
    baseline = active_children()
    with ng.metadata(device_id=('2')):
        x = ng.constant(2)
    assert len(active_children()) == 0
    with ExecutorFactory() as ex:
        comp = ex.executor(x)
        assert len(active_children()) == 1
        comp()
        assert len(active_children()) == 2
    assert len(active_children()) == len(baseline)