Esempio n. 1
0
def main(args):
    flow.config.machine_num(args.num_nodes)
    flow.config.gpu_device_num(args.gpu_num_per_node)
    flow.config.enable_legacy_model_io(True)
    func_config = flow.FunctionConfig()
    func_config.default_logical_view(flow.scope.consistent_view())
    func_config.default_data_type(flow.float)
    func_config.cudnn_conv_force_fwd_algo(0)
    func_config.cudnn_conv_force_bwd_data_algo(1)
    func_config.cudnn_conv_force_bwd_filter_algo(1)
    func_config.enable_auto_mixed_precision(args.enable_auto_mixed_precision)

    @flow.global_function(type="train", function_config=func_config)
    def alexnet_train_job():
        (labels, images) = _data_load_layer(args, args.train_dir)
        loss = alexnet(args, images, labels)
        flow.optimizer.SGD(
            flow.optimizer.PiecewiseConstantScheduler([], [1e-05]), momentum=0
        ).minimize(loss)
        return loss

    func_config = flow.FunctionConfig()
    func_config.default_data_type(flow.float)
    func_config.enable_auto_mixed_precision(args.enable_auto_mixed_precision)

    @flow.global_function(function_config=func_config)
    def alexnet_eval_job():
        with flow.scope.consistent_view():
            (labels, images) = _data_load_layer(args, args.eval_dir)
            return alexnet(args, images, labels, False)

    check_point = flow.train.CheckPoint()
    if not args.model_load_dir:
        check_point.init()
    else:
        check_point.load(args.model_load_dir)
    num_nodes = args.num_nodes
    print(
        "Traning alexnet: num_gpu_per_node = {}, num_nodes = {}.".format(
            args.gpu_num_per_node, num_nodes
        )
    )
    print("{:>12}  {:>12}  {:>12}".format("iter", "loss type", "loss value"))
    loss = []
    for i in range(args.iter_num):
        train_loss = alexnet_train_job().get().mean()
        loss.append(train_loss)
        fmt_str = "{:>12}  {:>12}  {:>12.6f}"
        print(fmt_str.format(i, "train loss:", train_loss))
        if (i + 1) % 100 == 0:
            check_point.save(_MODEL_SAVE_DIR + str(i))
    loss_file = "{}n{}c.npy".format(
        str(num_nodes), str(args.gpu_num_per_node * num_nodes)
    )
    loss_path = "./of_loss/alexnet"
    if not os.path.exists(loss_path):
        os.makedirs(loss_path)
    numpy.save(os.path.join(loss_path, loss_file), loss)
Esempio n. 2
0
    def test_shuffle(_):
        arg_dict = OrderedDict()
        arg_dict["device_type"] = ["gpu", "cpu"]
        arg_dict["x_shape"] = [(100,), (10, 1000), (10, 10, 2000)]
        arg_dict["data_type"] = ["float32", "double", "int32", "int64"]
        for (device_type, x_shape, data_type) in GenArgList(arg_dict):
            assert device_type in ["gpu", "cpu"]
            assert data_type in ["float32", "double", "int8", "int32", "int64"]
            flow.clear_default_session()
            func_config = flow.FunctionConfig()
            func_config.default_data_type(flow.float)

            @flow.global_function(function_config=flow.FunctionConfig())
            def TestJob(
                x: oft.Numpy.Placeholder(
                    x_shape, dtype=type_name_to_flow_type[data_type]
                )
            ):
                with flow.scope.placement(device_type, "0:0"):
                    return flow.random.shuffle(x)

            x = np.random.randn(*x_shape).astype(type_name_to_np_type[data_type])
            ret = TestJob(x).get().numpy()
            assert np.array_equal(x, ret) == False, x_shape
            x.sort(0)
            ret.sort(0)
            assert np.array_equal(x, ret), x_shape
            assert device_type in ["gpu", "cpu"]
            assert data_type in ["float32", "double", "int8", "int32", "int64"]
            flow.clear_default_session()
            func_config = flow.FunctionConfig()
            func_config.default_data_type(flow.float)

            @flow.global_function(function_config=flow.FunctionConfig())
            def TestJob1(
                x: oft.Numpy.Placeholder(
                    x_shape, dtype=type_name_to_flow_type[data_type]
                )
            ):
                with flow.scope.placement(device_type, "0:0"):
                    return flow.random.generate_random_batch_permutation_indices(x)

            x = np.random.randn(*x_shape).astype(type_name_to_np_type[data_type])
            ret = TestJob1(x).get().numpy()
            idx = np.arange(x_shape[0]).astype(np.int32)
            assert np.array_equal(idx, ret) == False, x_shape
            idx.sort()
            ret.sort()
            assert np.array_equal(idx, ret), x_shape
Esempio n. 3
0
    def run_fuse_cast_scale_mlir(test_case,
                                 device=None,
                                 in_type=None,
                                 out_type=None,
                                 shape=None):
        flow.clear_default_session()
        func_config = flow.FunctionConfig()

        @flow.global_function(function_config=func_config)
        def FuseCastScaleJob(x: oft.Numpy.Placeholder(
            shape, dtype=in_type)) -> Tuple[oft.Numpy, oft.Numpy]:
            with flow.scope.placement(device, "0:0-0"):
                scale = flow.get_variable(
                    "scale",
                    shape=(1, ),
                    dtype=out_type,
                    initializer=flow.random_uniform_initializer(),
                    trainable=False,
                )
                loss = flow.cast(x, dtype=out_type) * scale
                return (loss, scale)

        np_in_type = dtype_util.convert_oneflow_dtype_to_numpy_dtype(in_type)
        x = (np.random.rand(*shape) * 10).astype(np_in_type)
        ret = FuseCastScaleJob(x)
        (loss, scale) = ret
        test_case.assertTrue(np.allclose(loss, x * scale))
Esempio n. 4
0
    def test_sync_dynamic_resize(_):
        arg_dict = OrderedDict()
        arg_dict["device_type"] = ["gpu", "cpu"]
        arg_dict["x_shape"] = [(100, ), (1000, 10)]
        arg_dict["data_type"] = ["float32", "double", "int32", "int64"]
        arg_dict["size_type"] = ["int32", "int64"]
        for (device_type, x_shape, data_type,
             size_type) in GenArgList(arg_dict):
            flow.clear_default_session()
            func_config = flow.FunctionConfig()
            func_config.default_data_type(flow.float)

            @flow.global_function(function_config=func_config)
            def TestJob(
                x: oft.Numpy.Placeholder(
                    x_shape, dtype=type_name_to_flow_type[data_type]),
                size: oft.Numpy.Placeholder(
                    (1, ), dtype=type_name_to_flow_type[size_type]),
            ):
                with flow.scope.placement(device_type, "0:0"):
                    return flow.sync_dynamic_resize(x, size)

            size = np.random.randint(0, x_shape[0])
            x = np.random.rand(*x_shape).astype(
                type_name_to_np_type[data_type])
            y = (TestJob(
                x,
                np.array([size]).astype(
                    type_name_to_np_type[size_type])).get().numpy_list()[0])
            assert np.array_equal(y, x[:size])
    def test_multi_node_comm_net(test_case):
        func_config = flow.FunctionConfig()
        func_config.default_logical_view(flow.scope.consistent_view())
        func_config.default_data_type(flow.float)
        flow.config.gpu_device_num(1)

        @flow.global_function(function_config=func_config)
        def ReluJob(x: oft.Numpy.Placeholder((10, 2))):
            with flow.scope.placement("gpu", "0:0"):
                out0 = ccrelu(x, "my_op_0_0")
            with flow.scope.placement("gpu", "1:0"):
                out1 = ccrelu(out0, "my_op_1_0")
            with flow.scope.placement("gpu", "0:0"):
                out2 = ccrelu(out1, "my_op_print")
            return out2

        index = [-2, -1, 0, 1, 2]
        data = []
        for i in index:
            data.append(np.ones((10, 2), dtype=np.float32) * i)
        for i in range(5):
            ret = ReluJob(data[i]).get().numpy()
            print(ret)
            if index[i] > 0:
                test_case.assertTrue(
                    np.array_equal(
                        ret,
                        np.ones((10, 2), dtype=np.float32) * index[i]))
            else:
                test_case.assertTrue(
                    np.array_equal(ret, np.zeros((10, 2), dtype=np.float32)))
Esempio n. 6
0
def distribute_reshape_test(device_type, device_num, input_shape, shape):
    assert device_type in ["gpu", "cpu"]
    flow.clear_default_session()
    flow.config.gpu_device_num(device_num)
    func_config = flow.FunctionConfig()
    func_config.default_data_type(flow.float)

    @flow.global_function(type="train", function_config=func_config)
    def ReshapeJob():
        with flow.scope.placement(device_type, "0:0-{}".format(device_num - 1)):
            x = flow.get_variable(
                "var_x",
                shape=input_shape,
                dtype=flow.float,
                initializer=flow.random_uniform_initializer(minval=2, maxval=5),
                trainable=True,
                distribute=flow.distribute.split(2),
            )
            loss = flow.reshape(x, shape)
            flow.optimizer.SGD(
                flow.optimizer.PiecewiseConstantScheduler([], [0.0001]), momentum=0
            ).minimize(loss)
            return (x, loss)

    (x, loss) = ReshapeJob().get()
Esempio n. 7
0
def WatchDiff(test_case, device_type, input_shape, dtype):
    assert device_type in ["gpu", "cpu"]
    assert dtype in ["float32", "double"]
    flow.clear_default_session()
    func_config = flow.FunctionConfig()
    func_config.default_data_type(flow.float)

    def CheckOnes(diff):
        ones = np.ones(input_shape)
        test_case.assertTrue(
            np.allclose(diff.numpy(), ones, rtol=1e-05, atol=1e-05))

    @flow.global_function(type="train", function_config=func_config)
    def TrainJob():
        with flow.scope.placement(device_type, "0:0"):
            x = flow.get_variable(
                "in",
                shape=input_shape,
                dtype=type_name_to_flow_type[dtype],
                initializer=flow.random_uniform_initializer(),
                trainable=True,
            )
            flow.watch_diff(x, CheckOnes)
            flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler(
                [], [0.0001]),
                               momentum=0).minimize(x)

    TrainJob()
Esempio n. 8
0
def _run_multi_count_test(test_case, device_type, x1_shape, x2_shape, dtype,
                          x1_count, x2_count):
    flow.clear_default_session()
    func_config = flow.FunctionConfig()
    func_config.default_data_type(flow.float)

    @flow.global_function(function_config=func_config)
    def multi_count_not_finite_job(
        x1: oft.Numpy.Placeholder(x1_shape,
                                  dtype=type_name_to_flow_type[dtype]),
        x2: oft.Numpy.Placeholder(x2_shape,
                                  dtype=type_name_to_flow_type[dtype]),
    ):
        x_list = []
        for i in range(x1_count):
            x_list.append(x1)
        for i in range(x2_count):
            x_list.append(x2)
        with flow.scope.placement(device_type, "0:0"):
            return flow.multi_count_not_finite(x_list)

    x1 = np.random.randn(*x1_shape).astype(type_name_to_np_type[dtype])
    x1[0] = np.nan
    x1[3] = np.inf
    x2 = np.random.randn(*x2_shape).astype(type_name_to_np_type[dtype])
    x2[2] = np.inf
    x2[6, 5] = np.nan
    y = multi_count_not_finite_job(x1, x2).get()
    x1_not_finite = x1.size - np.sum(np.isfinite(x1))
    x2_not_finite = x2.size - np.sum(np.isfinite(x2))
    np_y = x1_not_finite * x1_count + x2_not_finite * x2_count
    assert y.numpy() == np_y
Esempio n. 9
0
def _test_split_to_broadcast(test_case, src_device_type, dst_device_type,
                             src_axis):
    flow.clear_default_session()
    flow.config.gpu_device_num(4)
    func_config = flow.FunctionConfig()
    func_config.default_data_type(flow.float)
    func_config.default_logical_view(flow.scope.consistent_view())

    def build_s2b(input_blob, src_device_num, dst_device_num):
        with flow.scope.placement(src_device_type,
                                  "0:0-" + str(src_device_num - 1)):
            src = flow.identity(
                input_blob.with_distribute(flow.distribute.split(src_axis)))
        with flow.scope.placement(dst_device_type,
                                  "0:0-" + str(dst_device_num - 1)):
            dst = flow.identity(
                src.with_distribute(flow.distribute.broadcast()))
        return dst

    @flow.global_function(function_config=func_config)
    def split_to_broadcast_job(input_blob: oft.Numpy.Placeholder((96, 96))):
        result_list = []
        for i in (1, 2, 3):
            for j in (1, 2, 3):
                result_list.append(build_s2b(input_blob, i, j))
        return tuple(result_list)

    x = np.random.rand(96, 96).astype(np.float32)
    result_tuple = split_to_broadcast_job(x).get()
    for out in result_tuple:
        test_case.assertTrue(np.array_equal(x, out.numpy()))
Esempio n. 10
0
def _test_multi_lbi(test_case, src_device_type, dst_device_type,
                    src_device_num, dst_device_num):
    flow.clear_default_session()
    flow.config.gpu_device_num(4)
    func_config = flow.FunctionConfig()
    func_config.default_data_type(flow.float)
    func_config.default_logical_view(flow.scope.consistent_view())

    @flow.global_function(function_config=func_config)
    def multi_lbi_job(x: oft.Numpy.Placeholder((96, 96, 96))):
        with flow.scope.placement(src_device_type,
                                  "0:0-" + str(src_device_num - 1)):
            src_s0 = flow.identity(x.with_distribute(flow.distribute.split(0)))
            src_s1 = flow.identity(x.with_distribute(flow.distribute.split(1)))
            src_b = flow.identity(x.with_distribute(flow.distribute.split(1)))
            (t0_0, t0_1, t0_2) = flow.identity_n((src_s0, src_s1, src_b))
        with flow.scope.placement(dst_device_type,
                                  "0:0-" + str(dst_device_num - 1)):
            t0_0 = t0_0.with_distribute(flow.distribute.split(1))
            t0_1 = t0_1.with_distribute(flow.distribute.broadcast())
            t0_2 = t0_2.with_distribute(flow.distribute.split(1))
            (t1_0, t1_1, t1_2) = flow.identity_n((t0_0, t0_1, t0_2))
        return (t1_0, t1_1, t1_2)

    x = np.random.uniform(-1e-05, 1e-05, (96, 96, 96)).astype(np.float32)
    r0 = multi_lbi_job(x).get()[0].numpy()
    r1 = multi_lbi_job(x).get()[1].numpy()
    r2 = multi_lbi_job(x).get()[2].numpy()
    test_case.assertTrue(np.array_equal(x, r0))
    test_case.assertTrue(np.array_equal(x, r1))
    test_case.assertTrue(np.array_equal(x, r2))
Esempio n. 11
0
def _test_partial_sum_to_broadcast(test_case, src_device_type,
                                   dst_device_type):
    flow.clear_default_session()
    flow.config.gpu_device_num(4)
    func_config = flow.FunctionConfig()
    func_config.default_data_type(flow.float)
    func_config.default_logical_view(flow.scope.consistent_view())

    def build_p2b(input_blob, src_device_num, dst_device_num):
        with flow.scope.placement(src_device_type,
                                  "0:0-" + str(src_device_num - 1)):
            src = flow.identity(
                input_blob.with_distribute(flow.distribute.split(0)))
            src = flow.math.reduce_sum(src, axis=0)
        with flow.scope.placement(dst_device_type,
                                  "0:0-" + str(dst_device_num - 1)):
            dst = flow.identity(
                src.with_distribute(flow.distribute.broadcast()))
        return dst

    @flow.global_function(function_config=func_config)
    def partial_sum_to_broadcast_job(input_blob: oft.Numpy.Placeholder(
        (96, 96, 96))):
        result_list = []
        for i in (2, 3):
            for j in (1, 2, 3):
                result_list.append(build_p2b(input_blob, i, j))
        return tuple(result_list)

    x = np.random.uniform(-1e-05, 1e-05, (96, 96, 96)).astype(np.float32)
    result_tuple = partial_sum_to_broadcast_job(x).get()
    for out in result_tuple:
        test_case.assertTrue(np.allclose(np.sum(x, axis=0), out.numpy()))
Esempio n. 12
0
def _test_slice_update(
    test_case,
    input,
    update,
    slice_args,
    output,
    dtype=flow.float32,
    device_tag=DEFAULT_DEVICE_TAG,
    verbose=False,
):
    input = input.astype(flow.convert_oneflow_dtype_to_numpy_dtype(dtype))
    update = update.astype(flow.convert_oneflow_dtype_to_numpy_dtype(dtype))
    output = output.astype(flow.convert_oneflow_dtype_to_numpy_dtype(dtype))
    flow.clear_default_session()
    func_cfg = flow.FunctionConfig()
    func_cfg.default_data_type(dtype)
    func_cfg.default_placement_scope(flow.scope.placement(device_tag, "0:0"))
    slice_func = _make_slice_update_func(
        slice_args, input.shape, update.shape, dtype, func_cfg
    )
    of_output = slice_func(input, update)
    if verbose:
        print("input:\n{}".format(input))
        print("update:\n{}".format(update))
        print("slice_args:", slice_args)
        print("output:\n{}".format(output))
        print("dtype:", dtype)
        print("device_tag:", device_tag)
        print("of_output:\n{}".format(of_output))
    test_case.assertTrue(np.array_equal(output, of_output))
Esempio n. 13
0
def _test_slice_dynamic(
    test_case,
    input,
    slice_args,
    outputs,
    static_shape=None,
    dtype=flow.float32,
    device_tag=DEFAULT_DEVICE_TAG,
):
    input = input.astype(flow.convert_oneflow_dtype_to_numpy_dtype(dtype))
    outputs = [
        output.astype(flow.convert_oneflow_dtype_to_numpy_dtype(dtype))
        for output in outputs
    ]
    if static_shape is None:
        static_shape = input.shape
    flow.clear_default_session()
    func_cfg = flow.FunctionConfig()
    func_cfg.default_data_type(dtype)
    func_cfg.default_placement_scope(flow.scope.placement(device_tag, "0:0"))
    func_cfg.default_logical_view(flow.scope.mirrored_view())
    slice_func = _make_slice_dynamic_func(slice_args, static_shape, dtype, func_cfg)
    of_outputs = slice_func([input])
    for (out, of_out) in zip(outputs, of_outputs):
        test_case.assertTrue(np.array_equal(out, of_out[0]))
Esempio n. 14
0
def _of_target_resize_bbox_scale(images, bbox_list, target_size, max_size):
    image_shape = _get_images_static_shape(images)
    bbox_shape = _get_bbox_static_shape(bbox_list)
    flow.clear_default_session()
    func_config = flow.FunctionConfig()
    func_config.default_data_type(flow.float)
    func_config.default_logical_view(flow.scope.mirrored_view())

    @flow.global_function(function_config=func_config)
    def target_resize_bbox_scale_job(
        image_def: oft.ListListNumpy.Placeholder(shape=tuple(image_shape),
                                                 dtype=flow.float),
        bbox_def: oft.ListListNumpy.Placeholder(shape=tuple(bbox_shape),
                                                dtype=flow.float),
    ):
        images_buffer = flow.tensor_list_to_tensor_buffer(image_def)
        (resized_images_buffer, new_size,
         scale) = flow.image_target_resize(images_buffer,
                                           target_size=target_size,
                                           max_size=max_size)
        bbox_buffer = flow.tensor_list_to_tensor_buffer(bbox_def)
        scaled_bbox = flow.object_bbox_scale(bbox_buffer, scale)
        scaled_bbox_list = flow.tensor_buffer_to_tensor_list(
            scaled_bbox, shape=bbox_shape[1:], dtype=flow.float)
        return (scaled_bbox_list, new_size)

    input_image_list = [np.expand_dims(image, axis=0) for image in images]
    input_bbox_list = [np.expand_dims(bbox, axis=0) for bbox in bbox_list]
    (output_bbox_list, output_image_size) = target_resize_bbox_scale_job(
        [input_image_list], [input_bbox_list]).get()
    return (output_bbox_list.numpy_lists()[0],
            output_image_size.numpy_list()[0])
Esempio n. 15
0
def _of_image_decode(images):
    image_files = [open(im, "rb") for im in images]
    images_bytes = [imf.read() for imf in image_files]
    static_shape = (len(images_bytes), max([len(bys) for bys in images_bytes]))
    for imf in image_files:
        imf.close()
    flow.clear_default_session()
    func_config = flow.FunctionConfig()
    func_config.default_data_type(flow.float)
    func_config.default_logical_view(flow.scope.mirrored_view())

    @flow.global_function(function_config=func_config)
    def image_decode_job(images_def: oft.ListListNumpy.Placeholder(
        shape=static_shape, dtype=flow.int8)):
        images_buffer = flow.tensor_list_to_tensor_buffer(images_def)
        decoded_images_buffer = flow.image_decode(images_buffer)
        return flow.tensor_buffer_to_tensor_list(decoded_images_buffer,
                                                 shape=(640, 640, 3),
                                                 dtype=flow.uint8)

    images_np_arr = [
        np.frombuffer(bys, dtype=np.byte).reshape(1, -1)
        for bys in images_bytes
    ]
    decoded_images = image_decode_job([images_np_arr]).get().numpy_lists()
    return decoded_images[0]
Esempio n. 16
0
def compare_with_tensorflow(device_type, x_shape, axis):
    assert device_type in ["gpu", "cpu"]
    flow.clear_default_session()
    func_config = flow.FunctionConfig()
    func_config.default_data_type(flow.float)

    def check_grad(x_diff_blob):
        assert np.array_equal(x_diff_blob.numpy(), np.ones(x_shape))

    @flow.global_function(type="train", function_config=func_config)
    def ExpandDimsJob():
        with flow.scope.placement(device_type, "0:0"):
            x = flow.get_variable(
                "var",
                shape=x_shape,
                dtype=flow.float,
                initializer=flow.ones_initializer(),
                trainable=True,
            )
            flow.watch_diff(x, check_grad)
            loss = flow.expand_dims(x, axis)
            flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler(
                [], [0.0001]),
                               momentum=0).minimize(loss)
            return loss

    of_out = ExpandDimsJob().get().numpy()
    tf_out = tf.expand_dims(np.ones(x_shape, dtype=np.float32), axis).numpy()
    assert np.array_equal(of_out, tf_out)
Esempio n. 17
0
def _compare_ones_with_np(input_shape, device_type, machine_ids,
                          device_counts):
    assert device_type in ["cpu", "gpu"]
    flow.clear_default_session()
    if device_type == "cpu":
        flow.config.cpu_device_num(device_counts)
    else:
        flow.config.gpu_device_num(device_counts)
    func_config = flow.FunctionConfig()
    func_config.default_placement_scope(
        flow.scope.placement(device_type, machine_ids))
    np_out_ones = np.ones(shape=input_shape, dtype=np.float32)

    @flow.global_function(type="train", function_config=func_config)
    def oneflow_ones() -> tp.Numpy:
        with flow.scope.placement(device_type, "0:0"):
            v = flow.get_variable(
                shape=np_out_ones.shape,
                dtype=flow.float32,
                initializer=flow.zeros_initializer(),
                name="x_var",
            )
        of_ones = flow.ones(shape=input_shape, dtype=flow.float32)
        of_out = of_ones + v
        with flow.scope.placement(device_type, "0:0"):
            flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler(
                [], [0.001]),
                               momentum=0).minimize(of_out)
        return of_ones

    of_out_ones = oneflow_ones()
    assert np.allclose(of_out_ones, np_out_ones)
Esempio n. 18
0
def compare_with_tensorflow(device_type,
                            data_type,
                            input_shape,
                            axis,
                            keepdims,
                            rtol=1e-05,
                            atol=1e-05):
    assert device_type in ["gpu", "cpu"]
    flow.clear_default_session()
    func_config = flow.FunctionConfig()
    func_config.default_data_type(flow.float)

    @flow.global_function(function_config=func_config)
    def ReduceSumLikeJob(x: oft.Numpy.Placeholder(input_shape)):
        with flow.scope.placement(device_type, "0:0"):
            if data_type == "float16":
                x = flow.cast(x, dtype=flow.float16)
                like = flow.math.reduce_sum(x, axis=axis, keepdims=keepdims)
                y = reduce_sum_like(x, like, axis=axis)
                y = flow.cast(y, dtype=flow.float32)
            else:
                like = flow.math.reduce_sum(x, axis=axis, keepdims=keepdims)
                y = reduce_sum_like(x, like, axis=axis)
            return y

    x = np.random.rand(*input_shape).astype(np.float16).astype(np.float32)
    of_out = ReduceSumLikeJob(x).get()
    tf_out = tf.math.reduce_sum(x, axis=axis, keepdims=keepdims)
    if data_type == "float16":
        tf_out = tf.cast(tf_out, dtype=tf.float16)
        tf_out = tf.cast(tf_out, dtype=tf.float32)
    assert np.allclose(of_out.numpy(), tf_out.numpy(), rtol=rtol, atol=atol), (
        of_out.numpy(),
        tf_out.numpy(),
    )
Esempio n. 19
0
def _make_gpt_data_loader_func(
    data_file_prefix,
    seq_length,
    num_samples,
    batch_size,
    dtype,
    shuffle=None,
    random_seed=None,
    split_sizes=None,
    split_index=None,
    machine_num=1,
    device_num=1,
    nd_sbp=None,
    start_from_saved_progress=False,
):
    assert machine_num > 0
    assert device_num > 0 and device_num <= 4
    parallel_hierachy = None
    if machine_num == 1:
        device_strs = "0:0-{}".format(device_num - 1)
    elif machine_num > 1:
        device_strs = [
            "{}:0-{}".format(machine_id, device_num - 1)
            for machine_id in range(machine_num)
        ]
        parallel_hierachy = (machine_num, device_num)
    else:
        raise ValueError("invalid machine_num", machine_num)
    flow.clear_default_session()
    flow.config.cpu_device_num(4)
    flow.config.enable_legacy_model_io(True)
    func_cfg = flow.FunctionConfig()
    func_cfg.default_logical_view(flow.scope.consistent_view())

    @flow.global_function("predict", function_config=func_cfg)
    def gpt_loader_fn() -> flow.typing.Numpy:
        with flow.scope.placement("cpu", device_strs, parallel_hierachy):
            tokens = flow.data.megatron_gpt_mmap_data_loader(
                data_file_prefix=data_file_prefix,
                seq_length=seq_length,
                num_samples=num_samples,
                batch_size=batch_size,
                dtype=dtype,
                shuffle=shuffle,
                random_seed=random_seed,
                split_sizes=split_sizes,
                split_index=split_index,
                nd_sbp=nd_sbp,
                start_from_saved_progress=start_from_saved_progress,
                name="GPTDataLoader",
            )
            if isinstance(nd_sbp, list) and len(nd_sbp) > 1:
                tokens = flow.hierarchical_parallel_cast(tokens,
                                                         nd_sbp=["B", "B"])
        tokens = flow.hierarchical_parallel_cast(tokens, nd_sbp=["B"])
        return tokens

    check_point = flow.train.CheckPoint()
    check_point.init()
    return gpt_loader_fn
Esempio n. 20
0
def compare_with_tensorflow(device_type, x_shape, data_type, axis):
    assert device_type in ["gpu", "cpu"]
    flow.clear_default_session()
    func_config = flow.FunctionConfig()
    if data_type == "float16":
        dtype = flow.float
    else:
        dtype = type_name_to_flow_type[data_type]

    @flow.global_function(type="train", function_config=func_config)
    def SoftmaxJob():
        with flow.scope.placement(device_type, "0:0"):
            x = flow.get_variable(
                "x",
                shape=x_shape,
                dtype=dtype,
                initializer=flow.random_uniform_initializer(minval=-1.0,
                                                            maxval=1.0),
                trainable=True,
            )
            x1 = x
            x = flow.identity(x)
            if data_type == "float16":
                loss = flow.cast(
                    flow.nn.softmax(flow.cast(x, dtype=flow.float16),
                                    axis=axis),
                    dtype=flow.float,
                )
            else:
                loss = flow.nn.softmax(x, axis=axis)
            flow.watch(x, test_global_storage.Setter("x"))
            flow.watch_diff(x, test_global_storage.Setter("x_diff"))
            flow.watch(loss, test_global_storage.Setter("loss"))
            flow.watch_diff(loss, test_global_storage.Setter("loss_diff"))
            total_loss = loss * x1
            flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler(
                [], [0.0001]),
                               momentum=0).minimize(total_loss)
            return loss

    of_out = SoftmaxJob().get()
    with tf.GradientTape(persistent=True) as tape:
        x = tf.Variable(test_global_storage.Get("x"))
        tf_out = tf.nn.softmax(x, axis=axis)
    loss_diff = test_global_storage.Get("loss_diff")
    tf_x_diff = tape.gradient(tf_out, x, loss_diff)
    if data_type == "float16":
        tolerance = 0.001
    else:
        tolerance = 1e-05
    assert np.allclose(of_out.numpy(),
                       tf_out.numpy(),
                       rtol=tolerance,
                       atol=tolerance)
    assert np.allclose(
        test_global_storage.Get("x_diff"),
        tf_x_diff.numpy(),
        rtol=tolerance,
        atol=tolerance,
    )
Esempio n. 21
0
def GetSeveralLossesAsNumpy(enable_inplace, num_iters=10):
    flow.config.enable_debug_mode(True)
    flow.config.gpu_device_num(1)
    train_config = flow.FunctionConfig()
    train_config.default_logical_view(flow.scope.consistent_view())
    train_config.enable_inplace(enable_inplace)

    @flow.global_function(type="train", function_config=train_config)
    def PretrainJob():
        loss = BuildPreTrainNet(
            batch_size=FLAGS.batch_size,
            data_part_num=FLAGS.data_part_num,
            seq_length=FLAGS.seq_length,
            max_position_embeddings=FLAGS.max_position_embeddings,
            num_hidden_layers=1,
            num_attention_heads=FLAGS.num_attention_heads,
            hidden_dropout_prob=FLAGS.hidden_dropout_prob,
            attention_probs_dropout_prob=FLAGS.attention_probs_dropout_prob,
            vocab_size=FLAGS.vocab_size,
            type_vocab_size=FLAGS.type_vocab_size,
            max_predictions_per_seq=FLAGS.max_predictions_per_seq,
        )
        CreateOptimizer().minimize(loss)
        return loss

    check_point = flow.train.CheckPoint()
    check_point.load(FLAGS.model_load_dir)
    ret = [PretrainJob().get().mean() for _ in range(num_iters)]
    flow.clear_default_session()
    return np.array(ret)
Esempio n. 22
0
def _of_assign_and_relu(value, dtype, device_type, assign=flow.assign):
    flow.clear_default_session()
    if os.getenv("ONEFLOW_TEST_CPU_ONLY") is None:
        flow.config.gpu_device_num(1)
    flow.config.cpu_device_num(1)
    func_config = flow.FunctionConfig()
    func_config.default_data_type(dtype)
    func_config.default_placement_scope(
        flow.scope.placement(device_type, "0:0"))

    @flow.global_function(function_config=func_config)
    def assign_fn(value_def: oft.Numpy.Placeholder(value.shape, dtype=dtype)):
        var = flow.get_variable(
            name="var",
            shape=value.shape,
            dtype=dtype,
            initializer=flow.constant_initializer(0),
        )
        assign(var, value_def)

    @flow.global_function(function_config=func_config)
    def relu_fn():
        var = flow.get_variable(
            name="var",
            shape=value.shape,
            dtype=dtype,
            initializer=flow.constant_initializer(0),
        )
        return flow.nn.relu(var)

    assign_fn(value)
    return relu_fn().get().numpy()
Esempio n. 23
0
    def test_dynamic_reshape(test_case):
        data_shape = (10, 10, 10)
        flow.config.gpu_device_num(2)
        func_config = flow.FunctionConfig()
        func_config.default_data_type(flow.float)
        func_config.default_logical_view(flow.scope.mirrored_view())

        @flow.global_function(type="train", function_config=func_config)
        def DynamicReshapeJob(x: oft.ListNumpy.Placeholder(data_shape)):
            reshape_out1 = flow.reshape(x, (-1, 20))
            my_model = flow.get_variable(
                "my_model",
                shape=(20, 32),
                dtype=flow.float,
                initializer=flow.random_uniform_initializer(minval=-10,
                                                            maxval=10),
                trainable=True,
            )
            my_model = flow.cast_to_current_logical_view(my_model)
            mm_out = flow.matmul(reshape_out1, my_model)
            reshape_out2 = flow.reshape(mm_out, (-1, 8, 4))
            flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler(
                [], [0.0001]),
                               momentum=0).minimize(reshape_out2)
            return reshape_out1

        data = [
            np.random.rand(*data_shape).astype(np.float32) for i in range(2)
        ]
        out = DynamicReshapeJob(data).get().numpy_list()
        for i in range(2):
            test_case.assertTrue(
                np.array_equal(np.reshape(data[i], (50, 20)), out[i]))
Esempio n. 24
0
def compare_with_tensorflow(device_type, in_shape, axis, k, data_type, sorted):
    assert device_type in ["gpu", "cpu"]
    assert data_type in ["float32", "double", "int8", "int32", "int64"]
    flow.clear_default_session()
    func_config = flow.FunctionConfig()
    func_config.default_logical_view(flow.scope.mirrored_view())
    func_config.default_data_type(flow.float)

    @flow.global_function(function_config=func_config)
    def TopKJob(input: oft.ListNumpy.Placeholder(
        tuple([dim + 10 for dim in in_shape]),
        dtype=type_name_to_flow_type[data_type],
    )):
        with flow.scope.placement(device_type, "0:0"):
            return flow.math.top_k(input, axis, k, sorted)

    input = (np.random.random(in_shape) * 100).astype(
        type_name_to_np_type[data_type])
    of_out = TopKJob([input]).get().numpy_list()[0]
    if k <= in_shape[axis]:
        perm = get_perm_when_transpose_axis_to_last_dim(len(in_shape), axis)
        x = tf.transpose(input, perm)
        (_, indices) = tf.math.top_k(x, k, sorted)
        tf_out = tf.transpose(indices, get_inversed_perm(perm))
    else:
        tf_out = tf.argsort(input, axis, direction="DESCENDING", stable=True)
    assert np.array_equal(of_out, tf_out.numpy())
    def test_multi_node_comm_net_dynamic(test_case):
        func_config = flow.FunctionConfig()
        func_config.default_logical_view(flow.scope.mirrored_view())
        func_config.default_placement_scope(flow.scope.placement("gpu", "0:0"))
        func_config.default_data_type(flow.float)
        flow.config.machine_num(2)
        flow.config.gpu_device_num(1)

        @flow.global_function(function_config=func_config)
        def ReluJob(x: oft.ListNumpy.Placeholder((10, 2))):
            with flow.scope.placement("gpu", "0:0"):
                out0 = flow.math.relu(x)
            with flow.scope.placement("gpu", "1:0"):
                out1 = flow.math.relu(out0)
            with flow.scope.placement("gpu", "0:0"):
                out2 = flow.math.relu(out1)
            return out2

        index = [-2, -1, 0, 1, 2]
        data = []
        for i in index:
            data.append(np.ones((5, 2), dtype=np.float32) * i)
        for i in range(5):
            ret = ReluJob([data[i]]).get().numpy_list()[0]
            print(ret)
            if index[i] > 0:
                test_case.assertTrue(
                    np.array_equal(
                        ret,
                        np.ones((5, 2), dtype=np.float32) * index[i]))
            else:
                test_case.assertTrue(
                    np.array_equal(ret, np.zeros((5, 2), dtype=np.float32)))
Esempio n. 26
0
    def test_name_scope(test_case):
        flow.clear_default_session()
        func_config = flow.FunctionConfig()
        func_config.default_data_type(flow.float)

        def get_var(var_name):
            return flow.get_variable(
                name=var_name,
                shape=(2, 256, 14, 14),
                dtype=flow.float32,
                initializer=flow.random_uniform_initializer(),
            )

        @flow.global_function(function_config=func_config)
        def test_name_scope_job():
            with flow.scope.namespace("backbone"):
                with flow.scope.namespace("branch"):
                    var1 = get_var("var")
                with flow.scope.namespace("branch"):
                    var2 = get_var("var")
            var3 = get_var("backbone-branch-var")
            return (var1, var2, var3)

        (var1, var2, var3) = test_name_scope_job().get()
        test_case.assertTrue(np.array_equal(var1.numpy(), var2.numpy()))
        test_case.assertTrue(np.array_equal(var1.numpy(), var3.numpy()))
Esempio n. 27
0
def _test_reshape(test_case):
    flow.clear_default_session()
    flow.config.gpu_device_num(4)
    flow.config.enable_legacy_model_io(True)
    flow.config.enable_model_io_v2(True)
    func_config = flow.FunctionConfig()
    func_config.default_data_type(flow.float32)

    @flow.global_function(type="train", function_config=func_config)
    def FlowJob(x: flow.typing.Numpy.Placeholder((4, 6), dtype=flow.float)):
        with flow.scope.placement("gpu", "0:0-3", (2, 2)):
            v = flow.get_variable(
                "x",
                shape=(4, 6),
                dtype=flow.float,
                initializer=flow.constant_initializer(0),
                trainable=True,
                nd_sbp=["S(0)", "S(1)"],
            )
            x = flow.hierarchical_parallel_cast(x, nd_sbp=["S(0)", "S(1)"])
            x += v
            loss = flow.reshape(x, (4, 2, 3))
        loss = flow.hierarchical_parallel_cast(loss, nd_sbp=["S(0)"])
        flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler([],
                                                                     [0.0001]),
                           momentum=0).minimize(loss)
        return loss

    x = np.random.randn(4, 6).astype(np.float32)
    my_loss = FlowJob(x).get()
    test_case.assertTrue(np.allclose(x.flatten(), my_loss.numpy().flatten()))
Esempio n. 28
0
    def run_job(test_case, device=None, in_type=None, shape=None):
        assert shape is not None
        flow.clear_default_session()
        func_config = flow.FunctionConfig()

        @flow.global_function(type="train", function_config=func_config)
        def FuseBnAddReluJob(x: oft.Numpy.Placeholder(
            shape, dtype=in_type)) -> oft.Numpy:
            addend = flow.constant_like(x, 2)
            with flow.scope.placement(device, "0:0-0"):
                x = (flow.get_variable(
                    "x1",
                    shape=shape,
                    dtype=in_type,
                    initializer=flow.random_uniform_initializer(minval=-10,
                                                                maxval=10),
                    trainable=True,
                ) + x)
                loss = flow.nn.relu(_batch_norm(x, last=False) + addend) + 1
                flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler(
                    [], [0.0001]),
                                   momentum=0).minimize(loss)
                return loss

        np_in_type = dtype_util.convert_oneflow_dtype_to_numpy_dtype(in_type)
        x = (np.random.rand(*shape) * 10).astype(np_in_type)
        FuseBnAddReluJob(x)
Esempio n. 29
0
def _make_dim_gather_fn(test_case, sample, datashape):
    flow.clear_default_session()
    func_config = flow.FunctionConfig()
    func_config.default_data_type(flow.float32)
    func_config.default_logical_view(flow.scope.mirrored_view())
    func_config.default_placement_scope(flow.scope.placement("gpu", "0:0"))

    def _compare_diff(blob: oft.ListNumpy):
        test_case.assertTrue(np.allclose(sample["grad"], blob[0]))

    @flow.global_function(type="train", function_config=func_config)
    def DynamicDimGatherJob(
        params_def: oft.ListNumpy.Placeholder(datashape, dtype=flow.float32),
        index_def: oft.ListNumpy.Placeholder(datashape, dtype=flow.int32),
    ) -> oft.ListNumpy:
        x_var = flow.get_variable(
            "input",
            shape=(1,),
            dtype=flow.float32,
            initializer=flow.constant_initializer(0),
        )
        x_var = flow.cast_to_current_logical_view(x_var)
        x = x_var + params_def
        y = flow.dim_gather(x, sample["dim"], index_def)
        flow.optimizer.SGD(
            flow.optimizer.PiecewiseConstantScheduler([], [0.001]), momentum=0
        ).minimize(y)
        flow.watch_diff(x, _compare_diff)
        return y

    return DynamicDimGatherJob
def _get_train_conf():
    train_conf = flow.FunctionConfig()
    train_conf.default_data_type(flow.float)
    train_conf.indexed_slices_optimizer_conf(
        dict(include_op_names=dict(
            op_name=['wide_embedding', 'deep_embedding'])))
    return train_conf