Example #1
0
def transformer_factory(request):
    factory = ngt.make_transformer_factory(request.param)
    ngt.set_transformer_factory(factory)
    yield factory

    # Reset transformer factory to default
    ngt.set_transformer_factory(ngt.make_transformer_factory("numpy"))
Example #2
0
    def make_and_set_transformer_factory(args):

        flex_args = ('fixed_point', 'flex_verbose', 'collect_flex_data')
        # default value for all flex args if not given, confusing with store_true in add_argument
        default = False

        if args.backend == flex_gpu_transformer_name:
            flex_args_dict = dict(
                (a, getattr(args, a, default)) for a in flex_args)
            factory = ngt.make_transformer_factory(args.backend,
                                                   **flex_args_dict)
        else:
            factory = ngt.make_transformer_factory(args.backend)

        ngt.set_transformer_factory(factory)
Example #3
0
def test_hetr_graph_passes():

    # Build the graph
    with ng.metadata(device_id='1'):
        x = ng.placeholder(())

    y = ng.placeholder(())
    x_plus_y = x + y

    # Build the graph metadata
    graph_ops = OrderedSet([x_plus_y, x, y])

    graph_op_metadata = {op: list() for op in graph_ops}
    graph_op_metadata[x] = ["numpy", '1']
    graph_op_metadata[y] = ["numpy", '0']
    graph_op_metadata[x_plus_y] = ["numpy", '0']

    transformer_list = ["numpy1", "numpy0"]

    # Run the hetr passes one by one, and verify they did the expected things to the graph
    check_device_assign_pass("numpy", "0", graph_op_metadata, graph_ops)
    check_communication_pass(ops_to_transform=graph_ops,
                             expected_recv_nodes=[x_plus_y])

    # Check if the hetr pass (childTransfromer pass) generates the expected transformer list
    obj = ChildTransformerPass([])
    transformer = ngt.make_transformer_factory('hetr')()
    obj.do_pass(graph_ops, transformer)
    transformer.close()
    assert set(transformer_list) == set(obj.transformer_list)
Example #4
0
        def run(self):
            with closing(ngt.make_transformer_factory('cpu')()) as t:
                comp = t.computation(self.y)
                self.results_qs.put(comp())

            while not self.exit.is_set():
                time.sleep(0.1)
Example #5
0
    def time(self, n_iterations, n_skip, computation_name, visualize,
             subgraph_attr=None, preprocess=False):
        """
        This runs _any_ computation repeatedly with data from feed_dict, and times it

        (Nothing model-specific inside, can be reused)
        """
        times = DefaultOrderedDict()
        feed_dict = self.fill_feed_dict(self.train_set, self.inputs, preprocess)
        start = Benchmark.marker.init_mark()
        end = Benchmark.marker.init_mark()
        with closing(ngt.make_transformer_factory(self.transformer,
                                                  device=self.device)()) as transformer:
            if visualize:
                nviz = ngraph.transformers.passes.nviz.VizPass(show_axes=True,
                                                               show_all_metadata=True,
                                                               subgraph_attr=subgraph_attr)
                transformer.register_graph_pass(nviz)
            model_out_computation = transformer.add_computation(self.computation)
            for i in range(n_skip):
                model_out_computation(feed_dict=feed_dict)
            for i in range(n_skip, n_iterations):
                Benchmark.marker.record_mark(start)
                model_out_computation(feed_dict=feed_dict)
                Benchmark.marker.record_mark(end)
                times[computation_name][i] = Benchmark.marker.get_time(start, end)
        return times
Example #6
0
def test_distributed_dot_parallel_second_axis(hetr_device):
    if hetr_device == 'gpu':
        pytest.xfail(
            "Axes Layout needs to be fixed for GPUs after changes to make\
        parallel_axis the least contiguous axis for scatter/gather communication ops"
        )

    H = ng.make_axis(length=6, name='height')
    N = ng.make_axis(length=8, name='batch')
    W1 = ng.make_axis(length=2, name='W1')
    W2 = ng.make_axis(length=4, name='W2')
    x = ng.placeholder(axes=[H, N])
    w2 = ng.placeholder(axes=[W2, W1])
    with ng.metadata(device=hetr_device, device_id=('0', '1'), parallel=N):
        w1 = ng.placeholder(axes=[W1, H])
        dot1 = ng.dot(w1, x).named("dot1")
    dot2 = ng.dot(w2, dot1).named("dot2")

    np_x = np.random.randint(100, size=[H.length, N.length])
    np_w1 = np.random.randint(100, size=[W1.length, H.length])
    np_w2 = np.random.randint(100, size=[W2.length, W1.length])
    with closing(ngt.make_transformer_factory(
            'hetr', device=hetr_device)()) as transformer:
        computation = transformer.computation([dot2, dot1], x, w1, w2)
        res2, res1 = computation(np_x, np_w1, np_w2)
        np.testing.assert_array_equal(res1, np.dot(np_w1, np_x))
        np.testing.assert_array_equal(res2, np.dot(np_w2, np.dot(np_w1, np_x)))

        computation2 = transformer.computation([dot1, dot2], x, w1, w2)
        res1, res2 = computation2(np_x, np_w1, np_w2)
        np.testing.assert_array_equal(res1, np.dot(np_w1, np_x))
        np.testing.assert_array_equal(res2, np.dot(np_w2, np.dot(np_w1, np_x)))
Example #7
0
def test_one_dot_bprop_allreduce(config):
    c = config

    pytest.xfail(
        "GPU child transformers generate errors during AssignLayouts graph pass #1651"
    )

    H_axis = ng.make_axis(length=4, name='height')
    W_axis = ng.make_axis(length=6, name='width')
    with ng.metadata(step='input'):
        X = ng.placeholder(axes=[H_axis, W_axis])
        target = ng.constant(1, axes=[W_axis])
    with ng.metadata(device_id=c['device_id'], parallel=W_axis):
        W = ng.variable(axes=[H_axis], initial_value=UniformInit(1, 1))
        dot = ng.dot(W, X)
        L = ng.squared_L2(target - dot, out_axes=())
        grad = ng.deriv(L, W)
        grad.metadata['reduce_func'] = c['func']
        update = (W - grad)

    with closing(ngt.make_transformer_factory('hetr')()) as hetr:
        out_comp = hetr.computation([update], X)
        result = out_comp(c['input'])

        np.testing.assert_array_equal(result, c['expected_result'])
Example #8
0
def test_allreduce_hint(hetr_device, config):
    if hetr_device == 'gpu':
        if 'gpu' not in ngt.transformer_choices():
            pytest.skip("GPUTransformer not available")

    input = config['input']
    device_id = config['device_id']
    axis_A = ng.make_axis(length=4, name='axis_A')
    parallel_axis = ng.make_axis(name='axis_parallel', length=16)

    with ng.metadata(device=hetr_device,
                     device_id=device_id,
                     parallel=parallel_axis):
        var_A = ng.variable(axes=[axis_A], initial_value=UniformInit(1, 1))
        var_B = ng.variable(axes=[axis_A],
                            initial_value=UniformInit(input, input))
        var_B.metadata['reduce_func'] = 'sum'
        var_B_mean = var_B / len(device_id)
        var_minus = (var_A - var_B_mean)

    with closing(ngt.make_transformer_factory('hetr',
                                              device=hetr_device)()) as hetr:
        out_comp = hetr.computation(var_minus)
        result = out_comp()
        np_result = np.full((axis_A.length), config['expected_result'],
                            np.float32)
        np.testing.assert_array_equal(result, np_result)
Example #9
0
def transformer_factory(request):
    def set_and_get_factory(transformer_name):
        factory = ngt.make_transformer_factory(transformer_name)
        ngt.set_transformer_factory(factory)
        return factory

    transformer_name = request.param

    if pytest.config.getoption("--enable_flex"):
        if transformer_name == flex_gpu_transformer_name:
            if flex_gpu_transformer_name in ngt.transformer_choices():
                yield set_and_get_factory(transformer_name)
            else:
                raise ValueError("GPU not found, should not set --enable_flex"
                                 "flag for py.test.")
        else:
            pytest.skip(
                'Skip all other transformers since --enable_flex is set.')
    else:
        if transformer_name == flex_gpu_transformer_name:
            pytest.skip('Skip flex test since --enable_flex is not set.')
        else:
            yield set_and_get_factory(transformer_name)

    # Reset transformer factory to default
    ngt.set_transformer_factory(ngt.make_transformer_factory("numpy"))
Example #10
0
def test_gpu_graph(config):
    pytest.xfail("Multi-GPU testing not enabled yet")

    if 'gpu' not in ngt.transformer_choices():
        pytest.skip('GPUTransformer not available!')

    t = config
    with ng.metadata(device='gpu'):
        x = ng.placeholder(axes=t['axes'])

    with ng.metadata(device='gpu',
                     device_id=t['device_id'],
                     parallel=t['parallel_axis']):
        x_plus_one = x + 1

    with ng.metadata(device='gpu'):
        x_plus_two = x_plus_one + 1

    os.environ["HETR_SERVER_GPU_NUM"] = str(len(t['device_id']))

    np_x = np.random.randint(100, size=t['axes'].full_lengths)
    with closing(ngt.make_transformer_factory('hetr')()) as transformer:
        computation = transformer.computation(x_plus_two, x)
        res = computation(np_x)
        np.testing.assert_array_equal(res, np_x + 2)
Example #11
0
def test_reduce_vector(hetr_device):
    """
    A whole vector is produced on each worker and should be reduced
    before being returned, but not along its axes since it
    does not have the parallel axis in its axes
    """
    if hetr_device == 'gpu':
        pytest.xfail("broadcast communication ops not yet supported on gpus")

    H = ng.make_axis(length=4, name='height')
    N = ng.make_axis(length=8, name='batch')
    weight = ng.make_axis(length=2, name='weight')
    x = ng.placeholder(axes=[N, H])
    w = ng.placeholder(axes=[H, weight])
    with ng.metadata(device=hetr_device, device_id=('0', '1'), parallel=N):
        dot = ng.dot(x, w)
        out = ng.sum(dot, N)

    np_x = np.random.randint(100, size=[N.length, H.length])
    np_weight = np.random.randint(100, size=[H.length, weight.length])
    with closing(ngt.make_transformer_factory(
            'hetr', device=hetr_device)()) as transformer:
        computation = transformer.computation(out, x, w)
        res = computation(np_x, np_weight)
        # TODO should the reduce infer a sum or mean?
        expected = np.sum(np.dot(np_x, np_weight), 0) / 2.
        np.testing.assert_array_equal(res, expected)
Example #12
0
def test_allreduce_hint_gpu(config):
    pytest.xfail("Multi-GPU testing not enabled yet")

    if 'gpu' not in ngt.transformer_choices():
        pytest.skip("GPUTransformer not available")

    c = config
    os.environ["HETR_SERVER_GPU_NUM"] = str(len(c['device_id']))

    ax_A_length = 32
    ax_B_length = 16

    np_result = [np.full((ax_A_length, ax_B_length), c['expected_result'], np.float32)]
    parallel_axis = ng.make_axis(name='axis_parallel', length=16)
    with ng.metadata(device_id=c['device_id'], parallel=parallel_axis):
        axis_A = ng.make_axis(length=ax_A_length, name='axis_A')
        axis_B = ng.make_axis(length=ax_B_length, name='axis_B')
        var_A = ng.variable(axes=[axis_A], initial_value=UniformInit(1, 1)).named('var_A')
        var_B = ng.variable(initial_value=UniformInit(c['input'], c['input']),
                            axes=[axis_B]).named('var_B')
        var_B.metadata['reduce_func'] = c['func']
        var_minus = (var_A - var_B).named('var_minus')
    with closing(ngt.make_transformer_factory('hetr', device='gpu')()) as hetr:
        out_comp = hetr.computation([var_minus]).named('out_comp')
        result = out_comp()
        np.testing.assert_array_equal(result, np_result)
Example #13
0
def test_get_layouts(config):
    test_transformer = ngt.make_transformer_factory('gpu')()

    t = config
    with ng.metadata(parallel=t['parallel_axis']):
        test_ops = [
            GPUCudaScatterSendOp(
                TensorValueOp(ng.placeholder(t['axes']), metadata=dict(device='gpu',
                              device_id='0', parallel=t['parallel_axis'],
                              transformer='gpu0', host_transformer=None)),
                ng.Op(metadata=dict(device='gpu', device_id=('0', '1'),
                      parallel=t['parallel_axis'], transformer=['gpu0', 'gpu1'],
                      host_transformer=None))
            ),
            GPUCudaScatterRecvOp(
                ng.Op(metadata=dict(device='gpu', device_id=('0', '1'),
                      parallel=t['parallel_axis'], transformer=['gpu0', 'gpu1'],
                      host_transformer=None)),
                GPUCudaScatterSendOp(
                    TensorValueOp(ng.placeholder(t['axes']), metadata=dict(device='gpu',
                                  device_id='0', parallel=t['parallel_axis'],
                                  transformer='gpu0', host_transformer=None)),
                    ng.Op(metadata=dict(device='gpu', device_id=('0', '1'),
                          parallel=t['parallel_axis'], transformer=['gpu0', 'gpu1'],
                          host_transformer=None))
                )
            ),
            GPUCudaGatherRecvOp(
                ng.Op(metadata=dict(device='gpu', device_id=('0', '1'),
                                    parallel=t['parallel_axis'], transformer=['gpu0', 'gpu1'],
                                    host_transformer=None)),
                ng.Op(metadata=dict(device='gpu', device_id='0', parallel=t['parallel_axis'],
                      transformer='gpu0', host_transformer=None)),
                GPUCudaScatterSendOp(
                    TensorValueOp(ng.placeholder(t['axes']), metadata=dict(device='gpu',
                                  device_id='0', parallel=t['parallel_axis'],
                                  transformer='gpu0', host_transformer=None)),
                    ng.Op(metadata=dict(device='gpu', device_id=('0', '1'),
                          parallel=t['parallel_axis'], transformer=['gpu0', 'gpu1'],
                          host_transformer=None))
                )
            ),
            GPUCudaGatherSendOp(
                TensorValueOp(ng.placeholder(t['axes']), metadata=dict(device='gpu',
                              device_id='0', transformer='gpu0',
                              host_transformer=None, parallel=t['parallel_axis']))
            ),
            GPUCudaAllReduceOp(
                input_node=TensorValueOp(ng.placeholder(t['axes']), metadata=dict(device='gpu',
                                         device_id='0', transformer='gpu0', host_transformer=None,
                                         parallel=t['parallel_axis'])),
                func='sum'
            )
        ]
    test_layouts = []
    for op in test_ops:
        test_layouts.append(test_transformer.get_layouts(op)[0].axes)
    np.testing.assert_array_equal(test_layouts, t['expected_layouts'])
Example #14
0
        def run(self):
            with closing(
                    ngt.make_transformer_factory(
                        'gpu', device_id=self.device_id)()) as t:
                comp = t.computation(self.y)
                self.queue.put(comp())

            while not self.exit.is_set():
                time.sleep(0.1)
Example #15
0
    def parse_args(self, gen_be=True):
        args = super(NgraphArgparser, self).parse_args()
        factory = ngt.make_transformer_factory(args.backend)
        ngt.set_transformer_factory(factory)

        # invert no_progress_bar meaning and store in args.progress_bar
        args.progress_bar = not args.no_progress_bar

        return args
Example #16
0
def transformer_factory(request):
    def set_and_get_factory(transformer_name):
        factory = ngt.make_transformer_factory(transformer_name)
        ngt.set_transformer_factory(factory)
        return factory

    name = request.config.getoption("--transformer")

    yield set_and_get_factory(name)

    # Reset transformer factory to default
    ngt.set_transformer_factory(ngt.make_transformer_factory("cpu"))
Example #17
0
def test_broadcast_scalar(hetr_device, config):
    if hetr_device == 'gpu':
        pytest.skip('gpu communication broadcast op is not supported.')
    device_id = config['device_id']
    x = ng.placeholder(())
    y = ng.placeholder(())
    with ng.metadata(device_id=device_id, parallel=ax_A):
        x_plus_y = x + y

    with closing(ngt.make_transformer_factory(
            'hetr', device=hetr_device)()) as transformer:
        computation = transformer.computation(x_plus_y, x, y)
        res = computation(1, 2)
        np.testing.assert_array_equal(res, 3)
Example #18
0
def test_multiple_gather_ops(config):
    c = config

    H = ng.make_axis(length=2, name='height')
    W = ng.make_axis(length=4, name='width')
    x = ng.placeholder(axes=[H, W])
    with ng.metadata(device_id=c['device_id'], parallel=W):
        x_plus_one = x + 1
        x_plus_two = x_plus_one + 2
    with closing(ngt.make_transformer_factory('hetr')()) as hetr:
        plus = hetr.computation([x_plus_two, x_plus_one], x)
        result_two, result_one = plus(c['input'])

        np.testing.assert_array_equal(result_two, c['result_two'])
        np.testing.assert_array_equal(result_one, c['result_one'])
Example #19
0
def test_distributed_plus_one(hetr_device, config):
    device_id = config['device_id']
    axes = config['axes']
    parallel_axis = config['parallel_axis']

    with ng.metadata(device=hetr_device):
        x = ng.placeholder(axes=axes)
        with ng.metadata(device_id=device_id, parallel=parallel_axis):
            x_plus_one = x + 1

    np_x = np.random.randint(100, size=axes.lengths)
    with closing(ngt.make_transformer_factory(
            'hetr', device=hetr_device)()) as transformer:
        computation = transformer.computation(x_plus_one, x)
        res = computation(np_x)
        np.testing.assert_array_equal(res, np_x + 1)
Example #20
0
def test_allreduce_hint_cpu(config):
    c = config
    parallel_axis = ng.make_axis(name='axis_parallel', length=16)
    with ng.metadata(device_id=c['device_id'], parallel=parallel_axis):
        axis_A = ng.make_axis(length=4, name='axis_A')
        axis_B = ng.make_axis(length=2, name='axis_B')
        var_A = ng.variable(axes=[axis_A], initial_value=UniformInit(1, 1)).named('var_A')
        var_B = ng.variable(initial_value=UniformInit(c['input'], c['input']),
                            axes=[axis_B]).named('var_B')
        var_B.metadata['reduce_func'] = c['func']
        var_minus = (var_A - var_B).named('var_minus')
    with closing(ngt.make_transformer_factory('hetr')()) as hetr:
        out_comp = hetr.computation([var_minus]).named('out_comp')
        result = out_comp()

        np.testing.assert_array_equal(result, c['expected_result'])
Example #21
0
def test_to_and_from_device(hetr_device, config):
    axes = config['axes']
    with ng.metadata(device=hetr_device):
        x = ng.placeholder(axes=axes) if axes else ng.placeholder(())
        with ng.metadata(device_id='1'):
            x_plus_one = x + 1
        x_plus_two = x_plus_one * 2

    np_x = np.random.randint(100,
                             size=axes.lengths) if axes else random.random()
    with closing(ngt.make_transformer_factory(
            'hetr', device=hetr_device)()) as transformer:
        computation = transformer.computation([x_plus_one, x_plus_two], x)
        res = computation(np_x)
        np.testing.assert_allclose(res[0], np_x + 1.0)
        np.testing.assert_allclose(res[1], (np_x + 1.0) * 2.0)
Example #22
0
def check_result_values(input_vector,
                        result_expected,
                        placeholder,
                        ops=OrderedSet(),
                        *args):
    """
    This function checks the result values return by the hetr computation object
    against the expected result values
    it also checks if the value returned by the hetr object matches the order in
    the expected result list

    :param: input_vector: list specifying the differnt values to be passed to
            the placeholder
    :param: result_expected: list of tuples specifying the expected result
            values from the hetr computation object
    :param: placeholder: list of placeholder to be passed for hetrcomputation
    :param: ops: list of result handlers to be paased for hetrcomputation

    """
    # Select the transformer
    transformer = ngt.make_transformer_factory('hetr')()

    # Build the hetr computation object
    if isinstance(placeholder, tuple):
        computation = transformer.computation(ops, *placeholder)
    else:
        computation = transformer.computation(ops, placeholder)
    result_obtained = []

    # Check for the return result list
    for i in input_vector:
        if isinstance(i, tuple):
            result_obtained.append(computation(*i))
        else:
            result_obtained.append(computation(i))

    # if return result is tuple
    if len(result_expected) > 1:
        np.testing.assert_array_equal(result_expected, result_obtained)

    # if return result is  scalar
    else:
        assert (np.array(tuple(result_obtained)) == np.array(
            result_expected[0])).all()

    transformer.close()
Example #23
0
def run_benchmark(model_out_comp, transformer_type, feed_dict, n_skip, n_iter):
    """
    This runs _any_ computation repeatedly with data from feed_dict, and times it

    (Nothing model-specific inside, can be reused)
    """
    times = DefaultOrderedDict()
    with closing(ngt.make_transformer_factory(transformer_type)()) as transformer:
        nviz = ngraph.transformers.passes.nviz.VizPass(show_axes=True, show_all_metadata=False)
        transformer.register_graph_pass(nviz)
        model_out_computation = transformer.add_computation(model_out_comp)
        for i in range(n_skip):
            model_out_computation(feed_dict=feed_dict)
        for i in range(n_iter):
            times[i]['start'] = time.time() * 1000.0
            model_out_computation(feed_dict=feed_dict)
            times[i]['stop'] = time.time() * 1000.0
    return times
Example #24
0
def test_comm_broadcast_op(hetr_device):
    if hetr_device == 'gpu':
        pytest.skip('gpu communication broadcast op is not supported.')
    H = ng.make_axis(length=4, name='height')
    N = ng.make_axis(length=8, name='batch')
    weight = ng.make_axis(length=2, name='weight')
    x = ng.placeholder(axes=[N, H])
    # w will be broadcasted to devices
    w = ng.placeholder(axes=[H, weight])
    with ng.metadata(device=hetr_device, device_id=('0', '1'), parallel=N):
        dot = ng.dot(x, w)

    np_x = np.random.randint(100, size=[N.length, H.length])
    np_weight = np.random.randint(100, size=[H.length, weight.length])
    with closing(ngt.make_transformer_factory(
            'hetr', device=hetr_device)()) as transformer:
        computation = transformer.computation(dot, x, w)
        res = computation(np_x, np_weight)
        np.testing.assert_array_equal(res, np.dot(np_x, np_weight))
Example #25
0
def check_communication_pass(ops_to_transform, expected_recv_nodes):
    """
    The communication pass should insert send/recv nodes wherever
    the metadata[transformer] differs between nodes.
    This checks that the recv nodes are inserted in the right place, and counts
    that the expected number of send
    nodes are found.

    :param ops_to_transform: list of ops to do the garph traversal
    :param expected_recv_nodes: lits of ops where receive nodes are expected to
           be inserted after the communication pass

    """
    transformer = ngt.make_transformer_factory('hetr')()

    send_nodes = OrderedSet()
    scatter_shared_queues = list()
    gather_shared_queues = list()
    obj = CommunicationPass(send_nodes, scatter_shared_queues,
                            gather_shared_queues)
    obj.do_pass(ops_to_transform, transformer)

    op_list_instance_type = list()
    num_expected_sendnodes = len(expected_recv_nodes)

    # Count if the communication pass inserted the expected number of send nodes
    assert num_expected_sendnodes == len(send_nodes)

    # verify if Recv nodes are inserted in the right place
    for op in expected_recv_nodes:
        for each_arg in op.args:
            op_list_instance_type.append(type(each_arg))

        if (ng.op_graph.communication.Recv in op_list_instance_type or
                ng.op_graph.communication.Gather_Recv in op_list_instance_type
                or ng.op_graph.communication.Scatter_Recv
                in op_list_instance_type) is False:
            assert False
        del op_list_instance_type[:]

    transformer.close()
Example #26
0
def test_distributed_dot(hetr_device, config):
    if hetr_device == 'gpu':
        pytest.xfail("Intermittent failure on jenkins for mgpu")
    device_id = config['device_id']
    axes_x = config['axes_x']
    axes_w = config['axes_w']
    parallel_axis = config['parallel_axis']

    np_weight = np.ones(axes_w.lengths)
    with ng.metadata(device=hetr_device):
        x = ng.placeholder(axes=axes_x)
        with ng.metadata(device_id=device_id, parallel=parallel_axis):
            w = ng.variable(axes=axes_w, initial_value=np_weight)
            dot = ng.dot(x, w)

    np_x = np.random.randint(100, size=axes_x.lengths)
    with closing(ngt.make_transformer_factory(
            'hetr', device=hetr_device)()) as transformer:
        computation = transformer.computation(dot, x)
        res = computation(np_x)
        np.testing.assert_array_equal(res, np.dot(np_x, np_weight))
Example #27
0
def test_multi_computations(hetr_device):
    if hetr_device == 'gpu':
        pytest.xfail("enable after gpu exgraph")
    axes_x = ng.make_axes([ax_A, ax_B])
    x = ng.placeholder(axes=axes_x)
    y = ng.placeholder(())
    with ng.metadata(device_id=('0', '1'), parallel=ax_A):
        f = x**2
        out = y - ng.mean(f, out_axes=())

    np_x = np.random.randint(10, size=axes_x.lengths)
    np_y = np.random.randint(10)
    with closing(ngt.make_transformer_factory('hetr',
                                              device=hetr_device)()) as t:
        comp = t.computation(out, x, y)
        another_comp = t.computation(f, x)

        res_comp = comp(np_x, np_y)
        res_another_comp = another_comp(np_x)
        ref_comp = np_y - np.mean(np_x**2)
        np.testing.assert_array_equal(res_comp, ref_comp)
        np.testing.assert_array_equal(res_another_comp, np_x**2)
Example #28
0
    def time(self, n_iterations, n_skip, computation_name, feed_dict):
        """
        This runs _any_ computation repeatedly with data from feed_dict, and times it

        (Nothing model-specific inside, can be reused)
        """
        times = DefaultOrderedDict()
        start = Benchmark.marker.init_mark()
        end = Benchmark.marker.init_mark()
        t_args = {}
        if self.transformer == 'hetr':
            t_args['device'] = self.device
        with closing(ngt.make_transformer_factory(self.transformer, **t_args)()) as transformer:
            model_out_computation = transformer.add_computation(self.computation)
            for i in range(n_skip):
                model_out_computation(feed_dict=feed_dict)
            for i in range(n_skip, n_iterations):
                Benchmark.marker.record_mark(start)
                model_out_computation(feed_dict=feed_dict)
                Benchmark.marker.record_mark(end)
                times[computation_name][i] = Benchmark.marker.get_time(start, end)
        return times
Example #29
0
def check_device_assign_pass(default_device,
                             default_device_id,
                             graph_op_metadata,
                             graph_op=OrderedSet(),
                             *args):
    """
    The Device assign pass should inject the metadata{device_id, device} as
    specified by the user for each op,
    if not specified then the default {device_id:0, device:numpy} should be
    inserted for each op.

    :param: default_device: string, the default device for each op,
            if not specified by user ex: "numpy"
    :param: default_device_id: string, the default device number for each op,
            if not specified by user ex: "0"
    :param: graph_op_metadata: dict, dictionary of list specifying  the expected
            metadata {device_id, device} for each op
    :param: graph_op: list of ops to do the graph traversal

    """
    transformer = ngt.make_transformer_factory('hetr')()

    transformers = set()
    expected_transformers = set()
    obj = DeviceAssignPass(default_device, default_device_id, transformers)

    obj.do_pass(graph_op, transformer)

    for op in graph_op_metadata.keys():
        assert op.metadata['device'] == graph_op_metadata[op][0]
        assert op.metadata['device_id'] == graph_op_metadata[op][1]
        assert op.metadata['transformer'] == graph_op_metadata[op][0] +  \
            str(graph_op_metadata[op][1])

        expected_transformers.add(op.metadata['transformer'])
    assert transformers == expected_transformers

    transformer.close()
Example #30
0
def test_multiple_gather_ops(hetr_device):
    if hetr_device == 'gpu':
        if 'gpu' not in ngt.transformer_choices():
            pytest.skip("GPUTransformer not available")
        pytest.xfail(
            "Failure due to gather recv tensor being returned in wrong shape, "
            " possible mismatch between op layout and op.tensor layout")

    H = ng.make_axis(length=2, name='height')
    W = ng.make_axis(length=4, name='width')
    x = ng.placeholder(axes=[H, W])
    with ng.metadata(device_id=('0', '1'), parallel=W):
        x_plus_one = x + 1
        x_mul_two = x_plus_one * 2

    input = np.random.randint(100, size=x.axes.lengths)
    with closing(ngt.make_transformer_factory('hetr',
                                              device=hetr_device)()) as hetr:
        plus = hetr.computation([x_mul_two, x_plus_one], x)
        result_mul_two, result_plus_one = plus(input)

        np.testing.assert_array_equal(result_plus_one, input + 1)
        np.testing.assert_array_equal(result_mul_two, (input + 1) * 2)