Example #1
0
def _of_tensor_scatter_nd_add(
    params,
    indices,
    updates,
    device_type,
    mirrored,
    params_grad_watcher,
    updates_grad_watcher,
):
    flow.clear_default_session()
    func_config = flow.FunctionConfig()
    func_config.default_data_type(flow.float)

    def do_tensor_scatter_nd_add(params_blob, indices_blob, updates_blob):
        with flow.scope.placement(device_type, "0:0"):
            params_var = flow.get_variable(
                "params",
                shape=params_blob.shape,
                dtype=flow.float32,
                initializer=flow.constant_initializer(0),
            )
            updates_var = flow.get_variable(
                "updates",
                shape=updates_blob.shape,
                dtype=flow.float32,
                initializer=flow.constant_initializer(0),
            )
            params_var = flow.cast_to_current_logical_view(params_var)
            params_blob = flow.cast_to_current_logical_view(params_blob)
            updates_blob = flow.cast_to_current_logical_view(updates_blob)
            updates_var = flow.cast_to_current_logical_view(updates_var)
            params_var = params_var + params_blob
            updates_var = updates_var + updates_blob
            out = flow.tensor_scatter_nd_add(params_var, indices_blob,
                                             updates_var)
            flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler(
                [], [1e-3]),
                               momentum=0).minimize(out)

        flow.watch_diff(params_var, params_grad_watcher)
        flow.watch_diff(updates_var, updates_grad_watcher)
        return out

    if mirrored:
        func_config.default_logical_view(flow.scope.mirrored_view())

        @flow.global_function(type="train", function_config=func_config)
        def tensor_scatter_nd_add_fn(
            params_def: oft.ListNumpy.Placeholder(params.shape,
                                                  dtype=flow.float),
            indices_def: oft.ListNumpy.Placeholder(indices.shape,
                                                   dtype=flow.int32),
            updates_def: oft.ListNumpy.Placeholder(updates.shape,
                                                   dtype=flow.float),
        ):
            return do_tensor_scatter_nd_add(params_def, indices_def,
                                            updates_def)

        return (tensor_scatter_nd_add_fn([params], [indices],
                                         [updates]).get().numpy_list()[0])

    else:
        func_config.default_logical_view(flow.scope.consistent_view())

        @flow.global_function(type="train", function_config=func_config)
        def tensor_scatter_nd_add_fn(
            params_def: oft.Numpy.Placeholder(params.shape, dtype=flow.float),
            indices_def: oft.Numpy.Placeholder(indices.shape,
                                               dtype=flow.int32),
            updates_def: oft.Numpy.Placeholder(updates.shape,
                                               dtype=flow.float),
        ):
            return do_tensor_scatter_nd_add(params_def, indices_def,
                                            updates_def)

        return tensor_scatter_nd_add_fn(params, indices, updates).get().numpy()
def _of_clip_by_value(values,
                      min,
                      max,
                      device_type="gpu",
                      dynamic=False,
                      grad_cb=None):
    data_type = _np_dtype_to_of_dtype(values.dtype)

    if callable(grad_cb):

        def clip(values_blob):
            with flow.scope.placement(device_type, "0:0"):
                x = flow.get_variable(
                    "values",
                    shape=values.shape,
                    dtype=data_type,
                    initializer=flow.constant_initializer(0),
                )
                x = flow.cast_to_current_logical_view(x)
                x = x + values_blob
                y = flow.clip_by_value(x, min, max)
                flow.losses.add_loss(y)

            flow.watch_diff(x, grad_cb)
            return y

    else:

        def clip(values_blob):
            with flow.scope.placement(device_type, "0:0"):
                return flow.clip_by_value(values_blob, min, max, name="Clip")

    flow.clear_default_session()
    func_config = flow.FunctionConfig()
    func_config.default_data_type(data_type)
    if grad_cb is not None:
        func_config.train.primary_lr(1e-3)
        func_config.train.model_update_conf(dict(naive_conf={}))

    if dynamic:
        func_config.default_logical_view(flow.scope.mirrored_view())

        @flow.global_function(func_config)
        def clip_fn(values_def: oft.ListNumpy.Placeholder(values.shape,
                                                          dtype=data_type)):
            return clip(values_def)

        check_point = flow.train.CheckPoint()
        check_point.init()
        return clip_fn([values]).get().numpy_list()[0]

    else:
        func_config.default_logical_view(flow.scope.consistent_view())

        @flow.global_function(func_config)
        def clip_fn(values_def: oft.Numpy.Placeholder(values.shape,
                                                      dtype=data_type)):
            return clip(values_def)

        check_point = flow.train.CheckPoint()
        check_point.init()
        return clip_fn(values).get().numpy()
Example #3
0
def compare_with_tensorflow(
    device_type,
    x_shape,
    filters,
    kernel_size,
    groups,
    data_format="NCHW",
    padding="VALID",
    stride=1,
):
    assert device_type in ["gpu", "cpu"]
    flow.clear_default_session()
    func_config = flow.FunctionConfig()
    func_config.default_data_type(flow.float)
    if data_format == "NCHW":
        xy_data_transpose = (0, 2, 3, 1)
        weight_data_transpose = (2, 3, 1, 0)
    else:
        xy_data_transpose = (0, 1, 2, 3)
        weight_data_transpose = (1, 2, 3, 0)

    @flow.global_function(type="train", function_config=func_config)
    def ConvJob():
        with flow.scope.placement(device_type, "0:0"):
            x = flow.get_variable(
                "x",
                shape=x_shape,
                dtype=flow.float,
                initializer=flow.random_uniform_initializer(minval=0,
                                                            maxval=100),
                trainable=True,
            )
            if data_format == "NCHW":
                weight_shape = (filters, x.shape[1] // groups, kernel_size,
                                kernel_size)
            else:
                weight_shape = (filters, kernel_size, kernel_size,
                                x.shape[3] // groups)
            weight = flow.get_variable(
                "conv-weight",
                shape=weight_shape,
                dtype=flow.float,
                initializer=flow.random_uniform_initializer(minval=0,
                                                            maxval=100),
            )
            loss = flow.nn.conv2d(
                x,
                weight,
                strides=[stride, stride],
                padding=padding,
                data_format=data_format,
                dilations=[1, 1],
                groups=groups,
                name="conv",
            )
            flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler(
                [], [1e-4]),
                               momentum=0).minimize(loss)

            flow.watch(x, test_global_storage.Setter("x"))
            flow.watch_diff(x, test_global_storage.Setter("x_diff"))
            flow.watch(weight, test_global_storage.Setter("weight"))
            flow.watch_diff(weight, test_global_storage.Setter("weight_diff"))
            flow.watch(loss, test_global_storage.Setter("loss"))
            flow.watch_diff(loss, test_global_storage.Setter("loss_diff"))

            return loss

    # OneFlow
    check_point = flow.train.CheckPoint()
    check_point.init()
    of_out = ConvJob().get()

    # TensorFlow
    with tf.GradientTape(persistent=True) as tape:
        x = tf.Variable(
            test_global_storage.Get("x").transpose(xy_data_transpose))
        assert groups > 0
        assert filters % groups == 0
        if groups == 1:
            weight = tf.Variable(
                test_global_storage.Get("weight").transpose(
                    weight_data_transpose))
            tf_out = tf.nn.conv2d(
                x,
                weight,
                strides=[1, stride, stride, 1],
                padding=padding,
                data_format="NHWC",
            )
        else:
            weight = tf.Variable(
                test_global_storage.Get("weight").transpose(
                    weight_data_transpose))
            tf_out = grouped_convolution2D(x,
                                           weight,
                                           padding=padding,
                                           num_groups=groups)

    loss_diff = test_global_storage.Get("loss_diff").transpose(
        xy_data_transpose)
    tf_x_diff = tape.gradient(tf_out, x, loss_diff)
    tf_weight_diff = tape.gradient(tf_out, weight, loss_diff)
    max_diff = np.max(
        np.absolute(of_out.numpy().transpose(xy_data_transpose) -
                    tf_out.numpy()))
    assert np.allclose(
        of_out.numpy().transpose(xy_data_transpose),
        tf_out.numpy(),
        rtol=1e-5,
        atol=1e-5,
    ), max_diff
    assert np.allclose(
        test_global_storage.Get("x_diff").transpose(xy_data_transpose),
        tf_x_diff.numpy(),
        rtol=1e-4,
        atol=1e-4,
    )
    assert np.allclose(
        test_global_storage.Get("weight_diff").transpose(
            weight_data_transpose),
        tf_weight_diff.numpy(),
        rtol=1e-5,
        atol=1e-5,
    )
Example #4
0
def _test_fused_scale_tril_fw_bw(test_case, device, shape, type_name, diagonal,
                                 fill_value, scale):
    flow.clear_default_session()
    func_config = flow.FunctionConfig()
    func_config.default_data_type(flow.float)

    if type_name == "float16":
        flow_type = flow.float
        np_type = np.float32
    else:
        flow_type = type_name_to_flow_type[type_name]
        np_type = type_name_to_np_type[type_name]

    @flow.global_function(type="train", function_config=func_config)
    def test_fused_scale_tril_fw_bw_job(
            x: oft.Numpy.Placeholder(shape, dtype=flow_type), ):
        with flow.scope.placement(device, "0:0"):
            x_var = flow.get_variable(
                name="xv",
                shape=(1, ),
                dtype=flow.float,
                initializer=flow.zeros_initializer(),
            )
            x += flow.cast(x_var, dtype=flow_type)
            if type_name == "float16":
                out = flow.cast(
                    flow.math.fused_scale_tril(flow.cast(x, flow.float16),
                                               diagonal,
                                               scale=scale),
                    flow.float,
                )
            else:
                out = flow.math.fused_scale_tril(x, diagonal, scale=scale)
            flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler(
                [], [1e-4]),
                               momentum=0).minimize(out)

            flow.watch(x, test_global_storage.Setter("x"))
            flow.watch_diff(x, test_global_storage.Setter("x_diff"))
            flow.watch(out, test_global_storage.Setter("out"))
            flow.watch_diff(out, test_global_storage.Setter("out_diff"))
            return out

    check_point = flow.train.CheckPoint()
    check_point.init()
    x = np.random.randint(low=0, high=100, size=shape)
    test_fused_scale_tril_fw_bw_job(x.astype(np_type)).get()

    np_out = np.where(
        np.tril(np.ones(shape), diagonal),
        test_global_storage.Get("x") * scale,
        np.full(shape, fill_value).astype(np_type),
    )
    np_x_diff = np.tril(test_global_storage.Get("out_diff"), diagonal) * scale

    if type_name == "float16":
        tolerance = 1e-3
    else:
        tolerance = 1e-5
    test_case.assertTrue(
        np.allclose(np_out,
                    test_global_storage.Get("out"),
                    rtol=tolerance,
                    atol=tolerance))
    test_case.assertTrue(
        np.allclose(np_x_diff,
                    test_global_storage.Get("x_diff"),
                    rtol=tolerance,
                    atol=tolerance))
Example #5
0
def test_layer_norm(_):
    confs = [{
        "x_shape": (4, 5, 2, 6),
        "begin_norm_axis": -1,
        "begin_params_axis": -1
    }]
    arg_dict = OrderedDict()
    arg_dict["device_type"] = ["gpu"]
    arg_dict["confs"] = confs
    arg_dict["data_type"] = ["float32"]
    arg_dict["trainable"] = [True, False]
    arg_dict["center"] = [True, False]
    arg_dict["scale"] = [True, False]
    arg_dict["epsilon"] = [0.0, 1e-10]

    for case in GenArgList(arg_dict):
        (device_type, confs, data_type, trainable, center, scale,
         epsilon) = case
        x_shape = confs["x_shape"]
        begin_norm_axis = confs["begin_norm_axis"]
        begin_params_axis = confs["begin_params_axis"]
        flow.clear_default_session()

        # Random inputs
        x = np.random.randn(*x_shape).astype(type_name_to_np_type[data_type])
        dim = len(x.shape) - 2

        # TF results
        with tf.GradientTape(persistent=True) as tape:
            x_tf = tf.Variable(x)
            y_tf = tf.keras.layers.LayerNormalization(
                axis=begin_norm_axis,
                epsilon=epsilon,
                center=center,
                scale=scale,
                beta_initializer="zeros",
                gamma_initializer="ones",
                beta_regularizer=None,
                gamma_regularizer=None,
                beta_constraint=None,
                gamma_constraint=None,
                trainable=trainable,
            )(x_tf)

        dx_tf = tape.gradient(y_tf, x_tf, tf.constant(1.0, shape=y_tf.shape))

        def assert_grad(b):
            assert np.allclose(dx_tf.numpy(), b.numpy(), rtol=1e-5,
                               atol=1e-5), (
                                   case,
                                   dx_tf.numpy(),
                                   b.numpy(),
                               )

        # 1F results
        dtype = type_name_to_flow_type[data_type]

        func_config = flow.FunctionConfig()
        func_config.default_data_type(flow.float)
        func_config.train.primary_lr(1e-4)
        func_config.train.model_update_conf(dict(naive_conf={}))

        @flow.global_function(func_config)
        def test_job(x: oft.Numpy.Placeholder(x_shape, dtype=dtype)):
            v = flow.get_variable(
                "x",
                shape=x_shape,
                dtype=dtype,
                initializer=flow.constant_initializer(0),
                trainable=True,
            )
            flow.watch_diff(v, assert_grad)
            x += v
            with flow.scope.placement(device_type, "0:0"):
                y = flow.layers.layer_norm(
                    x,
                    begin_norm_axis=begin_norm_axis,
                    begin_params_axis=begin_params_axis,
                    center=center,
                    scale=scale,
                )
            flow.losses.add_loss(y)
            return y

        check_point = flow.train.CheckPoint()
        check_point.init()
        y = test_job(x).get()
        assert y.numpy().shape == y_tf.numpy().shape, (
            y.numpy().shape,
            y_tf.numpy().shape,
        )
        diff = y.numpy() - y_tf.numpy()
        max_diff = np.max(np.abs(diff))
        assert np.allclose(y.numpy(), y_tf.numpy(), rtol=1e-5, atol=2e-3), (
            case,
            max_diff,
        )
Example #6
0
def compare_with_not_fused(
    test_case,
    device_type,
    x_shape,
    data_type,
    data_format,
    rate,
    seed,
    fuse_add_to_output,
):
    assert device_type in ["gpu", "cpu"]
    flow.clear_default_session()
    func_config = flow.FunctionConfig()
    func_config.enable_fuse_add_to_output(fuse_add_to_output)

    if data_type == "float16":
        dtype = flow.float
    else:
        dtype = type_name_to_flow_type[data_type]

    if data_format == "NCHW":
        bias_shape = (x_shape[1], )
    elif data_format == "NHWC":
        bias_shape = (x_shape[len(x_shape) - 1], )

    @flow.global_function(type="train", function_config=func_config)
    def FlowJob(
            value: oft.Numpy.Placeholder(x_shape),
            bias: oft.Numpy.Placeholder(bias_shape),
            addend: oft.Numpy.Placeholder(x_shape),
    ):
        with flow.scope.placement(device_type, "0:0"):
            value += flow.get_variable(
                name="v1",
                shape=(1, ),
                dtype=flow.float,
                initializer=flow.zeros_initializer(),
            )
            bias += flow.get_variable(
                name="v2",
                shape=(1, ),
                dtype=flow.float,
                initializer=flow.zeros_initializer(),
            )
            addend += flow.get_variable(
                name="v3",
                shape=(1, ),
                dtype=flow.float,
                initializer=flow.zeros_initializer(),
            )

            x1 = flow.identity(value)
            x2 = flow.identity(value)

            bias1 = flow.identity(bias)
            bias2 = flow.identity(bias)

            addend1 = flow.identity(addend)
            addend2 = flow.identity(addend)

            flow.watch_diff(x1, test_global_storage.Setter("x1_diff"))
            flow.watch_diff(x2, test_global_storage.Setter("x2_diff"))

            flow.watch_diff(bias1, test_global_storage.Setter("bias1_diff"))
            flow.watch_diff(bias2, test_global_storage.Setter("bias2_diff"))

            flow.watch_diff(addend1,
                            test_global_storage.Setter("addend1_diff"))
            flow.watch_diff(addend2,
                            test_global_storage.Setter("addend2_diff"))

            if data_type == "float16":
                out1 = flow.nn.dropout(
                    flow.nn.bias_add(
                        flow.cast(x1, dtype=flow.float16),
                        flow.cast(bias1, dtype=flow.float16),
                        data_format=data_format,
                    ),
                    rate=rate,
                    seed=seed,
                    name="dropout",
                )
                y1 = flow.cast(
                    out1 + flow.cast(addend1, dtype=flow.float16),
                    dtype=flow.float,
                )
                out2 = flow.nn.fused_bias_add_dropout(
                    flow.cast(x2, dtype=flow.float16),
                    flow.cast(bias2, dtype=flow.float16),
                    data_format=data_format,
                    rate=rate,
                    seed=seed,
                )
                y2 = flow.cast(
                    out2 + flow.cast(addend2, dtype=flow.float16),
                    dtype=flow.float,
                )
            else:
                y1 = (flow.nn.dropout(
                    flow.nn.bias_add(x1, bias1, data_format=data_format),
                    rate=rate,
                    seed=seed,
                    name="dropout",
                ) + addend1)
                y2 = (flow.nn.fused_bias_add_dropout(
                    x2,
                    bias2,
                    data_format=data_format,
                    rate=rate,
                    seed=seed,
                ) + addend2)
            flow.watch(y1, test_global_storage.Setter("y1"))
            flow.watch(y2, test_global_storage.Setter("y2"))
            flow.watch_diff(y1, test_global_storage.Setter("y1_diff"))
            flow.watch_diff(y2, test_global_storage.Setter("y2_diff"))

            loss = y1 + y2
        flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler([],
                                                                     [0.001]),
                           momentum=0).minimize(flow.math.reduce_sum(loss))

        return loss

    x = np.random.uniform(low=0, high=10, size=x_shape).astype(np.float32)
    bias = np.random.uniform(low=0, high=10,
                             size=bias_shape).astype(np.float32)
    add = np.random.uniform(low=0, high=10, size=x_shape).astype(np.float32)
    of_out = FlowJob(x, bias, add).get()

    y1 = test_global_storage.Get("y1")
    y2 = test_global_storage.Get("y2")

    tol = 1e-5
    test_case.assertTrue(
        np.allclose(y1, y2, rtol=tol, atol=tol, equal_nan=True))
    x1_diff = test_global_storage.Get("x1_diff")
    x2_diff = test_global_storage.Get("x2_diff")
    test_case.assertTrue(
        np.allclose(x1_diff, x2_diff, rtol=tol, atol=tol, equal_nan=True))
    bias1_diff = test_global_storage.Get("bias1_diff")
    bias2_diff = test_global_storage.Get("bias2_diff")
    test_case.assertTrue(
        np.allclose(bias1_diff, bias2_diff, rtol=tol, atol=tol,
                    equal_nan=True))
    bias1_diff = test_global_storage.Get("bias1_diff")
    bias2_diff = test_global_storage.Get("bias2_diff")
    test_case.assertTrue(
        np.allclose(bias1_diff, bias2_diff, rtol=tol, atol=tol,
                    equal_nan=True))
Example #7
0
def main(args):
    flow.config.machine_num(args.num_nodes)
    flow.config.gpu_device_num(args.gpu_num_per_node)

    func_config = flow.FunctionConfig()
    func_config.default_distribute_strategy(flow.scope.consistent_view())
    func_config.default_data_type(flow.float)
    func_config.train.primary_lr(0.00001)
    func_config.train.model_update_conf(dict(naive_conf={}))
    func_config.cudnn_conv_force_fwd_algo(0)
    func_config.cudnn_conv_force_bwd_data_algo(1)
    func_config.cudnn_conv_force_bwd_filter_algo(1)

    @flow.global_function(func_config)
    def alexnet_train_job():
        (labels, images) = _data_load_layer(args, args.train_dir)
        loss = alexnet(args, images, labels)
        flow.losses.add_loss(loss)
        return loss

    func_config = flow.FunctionConfig()
    func_config.default_data_type(flow.float)
    #  print(func_config.function_desc.job_config_proto)
    @flow.global_function(func_config)
    def alexnet_eval_job():
        with flow.scope.consistent_view():
            (labels, images) = _data_load_layer(args, args.eval_dir)
            return alexnet(args, images, labels, False)

    check_point = flow.train.CheckPoint()
    if not args.model_load_dir:
        check_point.init()
    else:
        check_point.load(args.model_load_dir)

    num_nodes = args.num_nodes
    print("Traning alexnet: num_gpu_per_node = {}, num_nodes = {}.".format(
        args.gpu_num_per_node, num_nodes))

    print("{:>12}  {:>12}  {:>12}".format("iter", "loss type", "loss value"))
    loss = []
    for i in range(args.iter_num):
        train_loss = alexnet_train_job().get().mean()
        loss.append(train_loss)

        fmt_str = "{:>12}  {:>12}  {:>12.6f}"
        print(fmt_str.format(i, "train loss:", train_loss))

        # if (i + 1) % 10 == 0:
        #   eval_loss = alexnet_eval_job().get().mean()
        # print(
        #     fmt_str.format(
        #         i, "eval loss:", eval_loss
        #     )
        # )
        if (i + 1) % 100 == 0:
            check_point.save(_MODEL_SAVE_DIR + str(i))

    # save loss to file
    loss_file = "{}n{}c.npy".format(str(num_nodes),
                                    str(args.gpu_num_per_node * num_nodes))
    loss_path = "./of_loss/alexnet"
    if not os.path.exists(loss_path):
        os.makedirs(loss_path)
    numpy.save(os.path.join(loss_path, loss_file), loss)
def compare_with_tensorflow(
    device_type,
    x_shape,
    filters,
    kernel_size,
    groups,
    of_padding="SAME",
    tf_padding="SAME",
    stride=1,
    data_format="NCHW",
):
    assert device_type in ["gpu", "cpu"]
    flow.clear_default_session()
    func_config = flow.FunctionConfig()
    func_config.default_data_type(flow.float)

    func_config.default_logical_view(flow.scope.mirrored_view())

    if data_format == "NCHW":
        xy_data_transpose = (0, 2, 3, 1)
        weight_data_transpose = (2, 3, 1, 0)
    else:
        xy_data_transpose = (0, 1, 2, 3)
        weight_data_transpose = (1, 2, 3, 0)

    @flow.global_function(type="train", function_config=func_config)
    def DynamicConvJob(x: oft.ListNumpy.Placeholder((10, 3, 100, 100))):
        with flow.scope.placement(device_type, "0:0"):
            x_var = flow.get_variable(
                name="v1",
                shape=(1, ),
                dtype=flow.float,
                initializer=flow.zeros_initializer(),
            )
            x_var = flow.cast_to_current_logical_view(x_var)
            x += x_var
            if data_format == "NCHW":
                weight_shape = (filters, x_shape[1] // groups, kernel_size,
                                kernel_size)
            else:
                weight_shape = (filters, kernel_size, kernel_size,
                                x_shape[3] // groups)
            weight = flow.get_variable(
                "conv-weight",
                shape=weight_shape,
                dtype=flow.float,
                initializer=flow.random_uniform_initializer(minval=0,
                                                            maxval=100),
            )
            weight = flow.cast_to_current_logical_view(weight)
            loss = flow.nn.conv2d(
                x,
                weight,
                strides=[stride, stride],
                padding=of_padding,
                data_format=data_format,
                dilations=[1, 1],
                groups=groups,
            )
            flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler(
                [], [1e-4]),
                               momentum=0).minimize(loss)

            flow.watch(x, global_storage_setter("x"))
            flow.watch_diff(x, global_storage_setter("x_diff"))
            flow.watch(weight, global_storage_setter("weight"))
            flow.watch_diff(weight, global_storage_setter("weight_diff"))
            flow.watch(loss, global_storage_setter("loss"))
            flow.watch_diff(loss, global_storage_setter("loss_diff"))

            return loss

    # OneFlow
    check_point = flow.train.CheckPoint()
    check_point.init()
    data = [np.random.rand(*x_shape).astype(np.float32)]
    of_out = DynamicConvJob(data).get().numpy_list()[0]
    # TensorFlow
    with tf.GradientTape(persistent=True) as tape:
        x = tf.Variable(data[0].transpose(xy_data_transpose))
        assert groups > 0
        assert x_shape[1] % groups == 0
        assert filters % groups == 0
        weight = tf.Variable(
            global_storage["weight"].numpy().transpose(weight_data_transpose))

        tf_out = tf.nn.conv2d(
            x,
            weight,
            strides=[1, stride, stride, 1],
            padding=tf_padding,
            data_format="NHWC",
        )

    idx = np.where(
        np.abs(of_out.transpose(xy_data_transpose) - tf_out.numpy()) > 5e-4)
    assert np.allclose(
        of_out.transpose(xy_data_transpose),
        tf_out.numpy(),
        rtol=1e-5,
        atol=1e-5,
    )

    loss_diff = global_storage["loss_diff"].numpy_list()[0].transpose(
        xy_data_transpose)
    tf_x_diff = tape.gradient(tf_out, x, loss_diff)
    tf_weight_diff = tape.gradient(tf_out, weight, loss_diff)
    rtol = 1e-4
    atol = 1e-4
    if device_type == "cpu":
        rtol *= 100
        atol *= 100
    assert np.allclose(
        global_storage["x_diff"].numpy_list()[0].transpose(xy_data_transpose),
        tf_x_diff.numpy(),
        rtol=rtol,
        atol=atol,
    ), (global_storage["x_diff"].numpy_list()[0].transpose(xy_data_transpose) -
        tf_x_diff.numpy())
    assert np.allclose(
        global_storage["weight_diff"].numpy().transpose(weight_data_transpose),
        tf_weight_diff.numpy(),
        rtol=1e-5,
        atol=1e-5,
    )
    def train(self, epochs):
        # download data
        train_data = TrainSet(args)
        val_data = TestSet(args)

        # save loss, psnr, ssim
        Loss = []
        Val_psnr = []
        Val_ssim = []

        # config
        func_config = flow.FunctionConfig()
        func_config.default_data_type(flow.double)
        flow.config.gpu_device_num(self.gpu_num_per_node)
        flow.config.enable_debug_mode(True)
        # train config
        lr_scheduler = flow.optimizer.PiecewiseConstantScheduler([], [self.lr])

        @flow.global_function(type="predict", function_config=func_config)
        def train_lte(input: tp.Numpy.Placeholder(
            (self.batch_size, 3, 160,
             160))) -> Tuple[tp.Numpy, tp.Numpy, tp.Numpy]:
            x_lv1, x_lv2, x_lv3 = self.LTE(input, trainable=True)
            return x_lv1, x_lv2, x_lv3

        @flow.global_function(type="predict", function_config=func_config)
        def train_searchtransfer(lrsr_lv3_unfold: tp.Numpy.Placeholder(
            (self.batch_size, 2304, 1600)),
                                 refsr_lv3_unfold: tp.Numpy.Placeholder(
                                     (self.batch_size, 2304, 1600)),
                                 ref_lv3_unfold: tp.Numpy.Placeholder(
                                     (self.batch_size, 2304, 1600)),
                                 ref_lv2_unfold: tp.Numpy.Placeholder(
                                     (self.batch_size, 4608, 1600)),
                                 ref_lv1_unfold: tp.Numpy.Placeholder(
                                     (self.batch_size, 9216,
                                      1600))) -> Tuple[tp.Numpy, tp.Numpy,
                                                       tp.Numpy, tp.Numpy]:
            refsr_lv3_unfold = flow.transpose(refsr_lv3_unfold, perm=[0, 2, 1])

            refsr_lv3_unfold = flow.math.l2_normalize(
                refsr_lv3_unfold, axis=2)  # [N, Hr*Wr, C*k*k]
            lrsr_lv3_unfold = flow.math.l2_normalize(lrsr_lv3_unfold,
                                                     axis=1)  # [N, C*k*k, H*W]

            R_lv3 = flow.matmul(refsr_lv3_unfold,
                                lrsr_lv3_unfold)  # [N, Hr*Wr, H*W]
            R_lv3_star = flow.math.reduce_max(R_lv3, axis=1)  # [N, H*W]
            R_lv3_star_arg = flow.math.argmax(R_lv3, axis=1)  # [N, H*W]

            T_lv3_unfold = self.bis(ref_lv3_unfold, R_lv3_star_arg)
            T_lv2_unfold = self.bis(ref_lv2_unfold, R_lv3_star_arg)
            T_lv1_unfold = self.bis(ref_lv1_unfold, R_lv3_star_arg)

            return R_lv3_star, T_lv3_unfold, T_lv2_unfold, T_lv1_unfold

        @flow.global_function(type="train", function_config=func_config)
        def train_mainnet(lr: tp.Numpy.Placeholder(
            (self.batch_size, 3, 40, 40)), S: tp.Numpy.Placeholder(
                (self.batch_size, 1, 40, 40)), T_lv3: tp.Numpy.Placeholder(
                    (self.batch_size, 256, 40, 40)),
                          T_lv2: tp.Numpy.Placeholder(
                              (self.batch_size, 128, 80, 80)),
                          T_lv1: tp.Numpy.Placeholder(
                              (self.batch_size, 64, 160, 160)),
                          hr: tp.Numpy.Placeholder(
                              (self.batch_size, 3, 160, 160))) -> tp.Numpy:
            sr = self.mainnet(lr, S, T_lv3, T_lv2, T_lv1, trainable=True)
            loss = flow.math.reduce_mean(
                flow.math.abs(flow.math.subtract(sr, hr)))
            flow.optimizer.Adam(lr_scheduler, 0.9, 0.999).minimize(loss)
            return loss

        @flow.global_function(type="predict", function_config=func_config)
        def eval_lte(input: tp.Numpy.Placeholder(
            (1, 3, 160, 160))) -> Tuple[tp.Numpy, tp.Numpy, tp.Numpy]:
            x_lv1, x_lv2, x_lv3 = self.LTE(input, trainable=False)
            return x_lv1, x_lv2, x_lv3

        @flow.global_function(type="predict", function_config=func_config)
        def eval_searchtransfer(lrsr_lv3_unfold: tp.Numpy.Placeholder(
            (1, 2304, 1600)), refsr_lv3_unfold: tp.Numpy.Placeholder(
                (1, 2304, 1600)), ref_lv3_unfold: tp.Numpy.Placeholder(
                    (1, 2304, 1600)), ref_lv2_unfold: tp.Numpy.Placeholder(
                        (1, 4608, 1600)), ref_lv1_unfold: tp.Numpy.Placeholder(
                            (1, 9216, 1600))) -> Tuple[tp.Numpy, tp.Numpy,
                                                       tp.Numpy, tp.Numpy]:
            refsr_lv3_unfold = flow.transpose(refsr_lv3_unfold, perm=[0, 2, 1])

            refsr_lv3_unfold = flow.math.l2_normalize(
                refsr_lv3_unfold, axis=2)  # [N, Hr*Wr, C*k*k]
            lrsr_lv3_unfold = flow.math.l2_normalize(lrsr_lv3_unfold,
                                                     axis=1)  # [N, C*k*k, H*W]

            R_lv3 = flow.matmul(refsr_lv3_unfold,
                                lrsr_lv3_unfold)  # [N, Hr*Wr, H*W]
            R_lv3_star = flow.math.reduce_max(R_lv3, axis=1)  # [N, H*W]
            R_lv3_star_arg = flow.math.argmax(R_lv3, axis=1)  # [N, H*W]

            T_lv3_unfold = self.bis(ref_lv3_unfold, R_lv3_star_arg)
            T_lv2_unfold = self.bis(ref_lv2_unfold, R_lv3_star_arg)
            T_lv1_unfold = self.bis(ref_lv1_unfold, R_lv3_star_arg)

            return R_lv3_star, T_lv3_unfold, T_lv2_unfold, T_lv1_unfold

        @flow.global_function(type="predict", function_config=func_config)
        def eval_mainnet(lr: tp.Numpy.Placeholder(
            (1, 3, 40, 40)), S: tp.Numpy.Placeholder(
                (1, 1, 40, 40)), T_lv3: tp.Numpy.Placeholder((1, 256, 40, 40)),
                         T_lv2: tp.Numpy.Placeholder(
                             (1, 128, 80, 80)), T_lv1: tp.Numpy.Placeholder(
                                 (1, 64, 160, 160))) -> tp.Numpy:
            sr = self.mainnet(lr, S, T_lv3, T_lv2, T_lv1, trainable=False)
            return sr

        check_point = flow.train.CheckPoint()
        check_point.load(self.vgg_path)

        batch_num = len(train_data) // self.batch_size
        pre_best, best_psnr = -1, 0
        print("****************** start training *****************")
        for epoch_idx in range(epochs):
            start = time.time()
            train_data.shuffle(epoch_idx)
            print("****************** train  *****************")
            for batch_idx in range(batch_num):
                lr, lr_sr, hr, ref, ref_sr = [], [], [], [], []
                for idx in range(self.batch_size):
                    sample = train_data[batch_idx * self.batch_size + idx]
                    lr.append(sample['LR'][np.newaxis, :])
                    lr_sr.append(sample['LR_sr'][np.newaxis, :])
                    hr.append(sample['HR'][np.newaxis, :])
                    ref.append(sample['Ref'][np.newaxis, :])
                    ref_sr.append(sample['Ref_sr'][np.newaxis, :])
                lr = np.ascontiguousarray(np.concatenate(lr, axis=0))
                lr_sr = np.ascontiguousarray(np.concatenate(lr_sr, axis=0))
                hr = np.ascontiguousarray(np.concatenate(hr, axis=0))
                ref = np.ascontiguousarray(np.concatenate(ref, axis=0))
                ref_sr = np.ascontiguousarray(np.concatenate(ref_sr, axis=0))

                _, _, lrsr_lv3 = train_lte((lr_sr + 1.) / 2.)
                _, _, refsr_lv3 = train_lte((ref_sr + 1.) / 2.)
                ref_lv1, ref_lv2, ref_lv3 = train_lte((ref + 1.) / 2.)

                ### search
                lrsr_lv3_unfold = self.unfold(lrsr_lv3)
                refsr_lv3_unfold = self.unfold(refsr_lv3)

                ### transfer
                ref_lv3_unfold = self.unfold(ref_lv3)
                ref_lv2_unfold = self.unfold(ref_lv2,
                                             kernel_size=6,
                                             padding=2,
                                             stride=2)
                ref_lv1_unfold = self.unfold(ref_lv1,
                                             kernel_size=12,
                                             padding=4,
                                             stride=4)

                R_lv3_star, T_lv3_unfold, T_lv2_unfold, T_lv1_unfold = train_searchtransfer(
                    lrsr_lv3_unfold, refsr_lv3_unfold, ref_lv3_unfold,
                    ref_lv2_unfold, ref_lv1_unfold)

                T_lv3 = self.fold(T_lv3_unfold,
                                  output_size=lrsr_lv3.shape[-2:],
                                  kernel_size=3,
                                  padding=1,
                                  stride=1) / (3. * 3.)
                T_lv2 = self.fold(
                    T_lv2_unfold,
                    output_size=(lrsr_lv3.shape[2] * 2, lrsr_lv3.shape[3] * 2),
                    kernel_size=6,
                    padding=2,
                    stride=2) / (3. * 3.)
                T_lv1 = self.fold(
                    T_lv1_unfold,
                    output_size=(lrsr_lv3.shape[2] * 4, lrsr_lv3.shape[3] * 4),
                    kernel_size=12,
                    padding=4,
                    stride=4) / (3. * 3.)

                S = np.reshape(R_lv3_star, [
                    R_lv3_star.shape[0], 1, lrsr_lv3.shape[2],
                    lrsr_lv3.shape[3]
                ])

                loss = train_mainnet(lr, S, T_lv3, T_lv2, T_lv1, hr)

                if (batch_idx + 1) % self.print_interval == 0:
                    print("{}th epoch, {}th batch, loss:{} ".format(
                        epoch_idx + 1, batch_idx + 1, loss))

                    Loss.append(loss)

            print("Time for epoch {} is {} sec.".format(
                epoch_idx + 1,
                time.time() - start))

            if (epoch_idx + 1) % self.val_every == 0:
                val_psnr, val_ssim = 0., 0.
                val_batch_num = len(val_data)
                for batch_idx in range(val_batch_num):
                    sample = val_data[batch_idx]
                    lr = np.ascontiguousarray(sample['LR'][np.newaxis, :])
                    lr_sr = np.ascontiguousarray(
                        sample['LR_sr'][np.newaxis, :])
                    hr = np.ascontiguousarray(sample['HR'][np.newaxis, :])
                    ref = np.ascontiguousarray(sample['Ref'][np.newaxis, :])
                    ref_sr = np.ascontiguousarray(
                        sample['Ref_sr'][np.newaxis, :])

                    _, _, lrsr_lv3 = eval_lte((lr_sr + 1.) / 2.)
                    _, _, refsr_lv3 = eval_lte((ref_sr + 1.) / 2.)
                    ref_lv1, ref_lv2, ref_lv3 = eval_lte((ref + 1.) / 2.)

                    ### search
                    lrsr_lv3_unfold = self.unfold(lrsr_lv3)
                    refsr_lv3_unfold = self.unfold(refsr_lv3)

                    ### transfer
                    ref_lv3_unfold = self.unfold(ref_lv3)
                    ref_lv2_unfold = self.unfold(ref_lv2,
                                                 kernel_size=6,
                                                 padding=2,
                                                 stride=2)
                    ref_lv1_unfold = self.unfold(ref_lv1,
                                                 kernel_size=12,
                                                 padding=4,
                                                 stride=4)

                    R_lv3_star, T_lv3_unfold, T_lv2_unfold, T_lv1_unfold = eval_searchtransfer(
                        lrsr_lv3_unfold, refsr_lv3_unfold, ref_lv3_unfold,
                        ref_lv2_unfold, ref_lv1_unfold)

                    T_lv3 = self.fold(T_lv3_unfold,
                                      output_size=lrsr_lv3.shape[-2:],
                                      kernel_size=3,
                                      padding=1,
                                      stride=1) / (3. * 3.)
                    T_lv2 = self.fold(T_lv2_unfold,
                                      output_size=(lrsr_lv3.shape[2] * 2,
                                                   lrsr_lv3.shape[3] * 2),
                                      kernel_size=6,
                                      padding=2,
                                      stride=2) / (3. * 3.)
                    T_lv1 = self.fold(T_lv1_unfold,
                                      output_size=(lrsr_lv3.shape[2] * 4,
                                                   lrsr_lv3.shape[3] * 4),
                                      kernel_size=12,
                                      padding=4,
                                      stride=4) / (3. * 3.)

                    S = np.reshape(R_lv3_star, [
                        R_lv3_star.shape[0], 1, lrsr_lv3.shape[2],
                        lrsr_lv3.shape[3]
                    ])

                    sr = eval_mainnet(lr, S, T_lv3, T_lv2, T_lv1)
                    # sr: range [-1, 1]
                    # hr: range [-1, 1]

                    ### prepare data
                    sr = (sr + 1.) * 127.5
                    hr = (hr + 1.) * 127.5

                    sr = np.transpose(np.round(np.squeeze(sr)), (1, 2, 0))
                    hr = np.transpose(np.round(np.squeeze(hr)), (1, 2, 0))

                    ### calculate psnr and ssim
                    val_psnr += self.calc_psnr(sr, hr)
                    val_ssim += self.calc_ssim(sr, hr)

                val_psnr = val_psnr / val_batch_num
                val_ssim = val_ssim / val_batch_num

                Val_psnr.append(val_psnr)
                Val_ssim.append(val_ssim)
                print("****************** evalute  *****************")
                print("{}th epoch, val_psnr:{}, val_ssim:{}.".format(
                    epoch_idx + 1, val_psnr, val_ssim))
                if epoch_idx + 1 > 10 and val_psnr > best_psnr:
                    best_psnr = val_psnr
                    if pre_best != -1:
                        # delete the previous best checkpoint
                        print(
                            "delete the previous best {}th epoch model".format(
                                pre_best))
                        shutil.rmtree(
                            os.path.join(self.checkpoint_path,
                                         "{}th_epoch".format(pre_best)))

                    # save parameters
                    check_point.save(
                        os.path.join(self.checkpoint_path,
                                     "{}th_epoch".format(epoch_idx + 1)))
                    pre_best = epoch_idx + 1
                    print("save the best {}th epoch model at {}.".format(
                        epoch_idx + 1,
                        str(datetime.now().strftime("%Y-%m-%d-%H:%M:%S"))))

        # save train loss and val
        np.save(os.path.join(self.loss_path, 'loss_{}.npy'.format(epochs)),
                Loss)

        np.save(os.path.join(self.loss_path, 'Val_psnr_{}.npy'.format(epochs)),
                Val_psnr)
        np.save(os.path.join(self.loss_path, 'Val_ssim_{}.npy'.format(epochs)),
                Val_ssim)
        print("*************** Train {} done ***************** ".format(
            self.path))
Example #10
0
def _compare_kldivloss_with_np(
    input_shape,
    target_shape,
    log_target,
    device_type,
    machine_ids,
    device_counts,
):
    input = np.random.random(size=input_shape).astype(np.float32)
    target = np.random.random(size=target_shape).astype(np.float32)

    log_target = log_target[0]
    assert device_type in ["cpu", "gpu"]

    flow.clear_default_session()
    if device_type == "cpu":
        flow.config.cpu_device_num(device_counts)
    else:
        flow.config.gpu_device_num(device_counts)

    func_config = flow.FunctionConfig()
    func_config.default_placement_scope(
        flow.scope.placement(device_type, machine_ids))
    func_config.default_logical_view(flow.scope.consistent_view())

    def np_kldivloss(np_input, np_target, np_log_target):
        if log_target:
            np_kl_div_loss = np.exp(np_target) * (np_target - np_input)
        else:
            np_kl_div_out_loss = target * (np.log(target) - np_input)
            np_zeros = np.zeros_like(np_kl_div_out_loss, dtype=np.float32)
            # when target < 0, we set to `0`, when target > 0, we set to `1`.
            # set the element in _kl_div_loss as `0` to avoid `nan` value.
            np_kl_div_loss = np.where(target > 0, np_kl_div_out_loss, np_zeros)

        return {
            "np_kldivloss": np_kl_div_loss,
            "np_kldivloss_mean": np.mean(np_kl_div_loss),
            "np_kldivloss_sum": np.sum(np_kl_div_loss),
        }

    np_out_kldivloss_dict = np_kldivloss(input, target, log_target)

    def np_kldivloss_diff(input, target, np_log_target):
        elem_cnt = input.size
        if np_log_target:
            _np_diff = -np.exp(target)
        else:
            _np_diff = -target
            # Because when np_log_target == False, the loss will be set to zero when target < 0
            _zero_index = np.where(target > 0, 1, 0)
            _np_diff = _np_diff * _zero_index

        return {
            "np_kldivloss_grad": _np_diff,
            "np_kldivloss_grad_mean": _np_diff / elem_cnt,
        }

    np_grad_dict = np_kldivloss_diff(input, target, log_target)

    def assert_prediction_grad(blob: tp.Numpy):
        # validate the correstness of gradient
        assert np.allclose(blob,
                           np_grad_dict["np_kldivloss_grad_mean"],
                           atol=1e-4)

    @flow.global_function(
        type="train",
        function_config=func_config,
    )
    def oneflow_kldivloss(
        of_input: tp.Numpy.Placeholder(shape=input.shape),
        of_target: tp.Numpy.Placeholder(shape=target.shape),
    ) -> Dict[str, tp.Numpy]:
        with flow.scope.placement(device_type, "0:0"):
            v = flow.get_variable(
                shape=input.shape,
                dtype=flow.float32,
                initializer=flow.zeros_initializer(),
                name="x_var",
            )
            of_input = of_input + v

        flow.watch_diff(of_input, assert_prediction_grad)

        of_kldivloss = flow.nn.KLDivLoss(
            of_input,
            of_target,
            log_target=log_target,
            reduction="none",
            name="kldivloss",
        )
        of_kldivloss_mean = flow.nn.KLDivLoss(
            of_input,
            of_target,
            log_target=log_target,
            reduction="mean",
            name="kldivloss_mean",
        )
        of_kldivloss_sum = flow.nn.KLDivLoss(
            of_input,
            of_target,
            log_target=log_target,
            reduction="sum",
            name="kldivloss_sum",
        )

        with flow.scope.placement(device_type, "0:0"):
            flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler(
                [], [1e-3]),
                               momentum=0).minimize(of_kldivloss_mean)

        return {
            "of_kldivloss": of_kldivloss,
            "of_kldivloss_mean": of_kldivloss_mean,
            "of_kldivloss_sum": of_kldivloss_sum,
        }

    of_out_kldivloss_dict = oneflow_kldivloss(input, target)

    assert np.allclose(
        of_out_kldivloss_dict["of_kldivloss"],
        np_out_kldivloss_dict["np_kldivloss"],
        atol=1e-5,
    )

    assert np.allclose(
        of_out_kldivloss_dict["of_kldivloss_mean"],
        np_out_kldivloss_dict["np_kldivloss_mean"],
    )
    assert np.allclose(
        of_out_kldivloss_dict["of_kldivloss_sum"],
        np_out_kldivloss_dict["np_kldivloss_sum"],
    )
Example #11
0
def compare_with_tensorflow(test_case, device_type, x_shape, filters,
                            kernel_size, groups):
    assert device_type in ["gpu", "cpu"]

    flow.clear_default_session()
    func_config = flow.FunctionConfig()
    func_config.default_data_type(flow.float)

    @flow.global_function(type="train", function_config=func_config)
    def ConvJob():
        with flow.scope.placement(device_type, "0:0"):
            x = flow.get_variable(
                "x",
                shape=x_shape,
                dtype=flow.float,
                initializer=flow.random_uniform_initializer(minval=0,
                                                            maxval=100),
                trainable=True,
            )
            loss = flow.layers.conv2d(
                x,
                filters,
                kernel_size=kernel_size,
                strides=[1, 1],
                padding="valid",
                data_format="NCHW",
                dilation_rate=1,
                groups=groups,
                use_bias=False,
                kernel_initializer=flow.random_uniform_initializer(minval=0,
                                                                   maxval=100),
                weight_name="conv2d_weight",
            )
            weight_shape = (filters, x.shape[1] // groups, kernel_size,
                            kernel_size)
            weight = flow.get_variable(
                name="conv2d_weight",
                shape=weight_shape,
                dtype=flow.float,
                initializer=flow.random_uniform_initializer(minval=0,
                                                            maxval=100),
            )
            flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler(
                [], [1e-4]),
                               momentum=0).minimize(loss)

            flow.watch(x, test_global_storage.Setter("x"))
            flow.watch_diff(x, test_global_storage.Setter("x_diff"))
            flow.watch(weight, test_global_storage.Setter("weight"))
            flow.watch_diff(weight, test_global_storage.Setter("weight_diff"))
            flow.watch(loss, test_global_storage.Setter("loss"))
            flow.watch_diff(loss, test_global_storage.Setter("loss_diff"))

            return loss

    # OneFlow
    of_out = ConvJob().get()

    # TensorFlow
    with tf.GradientTape(persistent=True) as tape:
        x = tf.Variable(test_global_storage.Get("x").transpose(0, 2, 3, 1))
        assert groups > 0
        assert x_shape[1] % groups == 0
        assert filters % groups == 0
        if groups == 1:
            weight = tf.Variable(
                test_global_storage.Get("weight").transpose(2, 3, 1, 0))
            tf_out = tf.nn.conv2d(x,
                                  weight,
                                  strides=[1, 1, 1, 1],
                                  padding="VALID",
                                  data_format="NHWC")
        else:
            weight = tf.Variable(
                test_global_storage.Get("weight").transpose(2, 3, 1, 0))
            tf_out = grouped_convolution2D(x,
                                           weight,
                                           padding="VALID",
                                           num_groups=groups)

    loss_diff = test_global_storage.Get("loss_diff").transpose(0, 2, 3, 1)
    tf_x_diff = tape.gradient(tf_out, x, loss_diff)
    tf_weight_diff = tape.gradient(tf_out, weight, loss_diff)

    of_out_np = of_out.numpy().transpose(0, 2, 3, 1)
    tf_out_np = tf_out.numpy()
    max_abs_diff = np.max(np.absolute(of_out_np - tf_out_np))
    fail_info = "\nshape (of vs. tf): {} vs. {}\nmax_abs_diff: {}".format(
        of_out_np.shape, tf_out_np.shape, max_abs_diff)
    test_case.assertTrue(
        np.allclose(of_out_np, tf_out_np, rtol=1e-5, atol=1e-5), fail_info)

    of_x_diff_arr = test_global_storage.Get("x_diff").transpose(0, 2, 3, 1)
    tf_x_diff_arr = tf_x_diff.numpy()
    max_abs_diff = np.max(np.abs(of_x_diff_arr - tf_x_diff_arr))

    test_case.assertTrue(
        np.allclose(of_x_diff_arr, tf_x_diff_arr, rtol=1e-5, atol=1e-4))
    test_case.assertTrue(
        np.allclose(
            test_global_storage.Get("weight_diff").transpose(2, 3, 1, 0),
            tf_weight_diff.numpy(),
            rtol=1e-5,
            atol=1e-5,
        ))
Example #12
0
def test_ccrelu_2n1c(test_case):
    func_config = flow.FunctionConfig()
    func_config.default_logical_view(flow.scope.consistent_view())
    fixed_tensor_def_test(test_case, func_config)
Example #13
0
def test_mirror_ccrelu(test_case):
    func_config = flow.FunctionConfig()
    func_config.default_logical_view(flow.scope.mirrored_view())
    mirrored_tensor_def_test(test_case, func_config)
Example #14
0
def main(args):
    flow.config.machine_num(args.num_nodes)
    flow.config.gpu_device_num(args.gpu_num_per_node)
    train_config = flow.FunctionConfig()
    train_config.default_logical_view(flow.scope.consistent_view())
    train_config.default_data_type(flow.float)
    train_config.enable_auto_mixed_precision(args.enable_auto_mixed_precision)

    @flow.global_function(type="train", function_config=train_config)
    def vgg_train_job():
        (labels, images) = _data_load_layer(args, args.train_dir)
        to_return = vgg(images, labels)
        loss = to_return[-1]
        flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler(
            [], [0.00001]),
                           momentum=0).minimize(loss)
        return loss

    eval_config = flow.FunctionConfig()
    eval_config.default_logical_view(flow.scope.consistent_view())
    eval_config.default_data_type(flow.float)
    eval_config.enable_auto_mixed_precision(args.enable_auto_mixed_precision)

    @flow.global_function(function_config=eval_config)
    def vgg_eval_job():
        (labels, images) = _data_load_layer(args, args.eval_dir)
        return vgg(images, labels, False)

    check_point = flow.train.CheckPoint()
    if not args.model_load_dir:
        check_point.init()
    else:
        check_point.load(args.model_load_dir)

    num_nodes = args.num_nodes
    print("Traning vgg16: num_gpu_per_node = {}, num_nodes = {}.".format(
        args.gpu_num_per_node, num_nodes))

    print("{:>12}  {:>12}  {:>12}".format("iter", "loss type", "loss value"))
    loss = []
    for i in range(args.iter_num):
        train_loss = vgg_train_job().get().mean()
        loss.append(train_loss)

        fmt_str = "{:>12}  {:>12}  {:>12.6f}"
        print(fmt_str.format(i, "train loss:", train_loss))

        # if (i + 1) % 10 == 0:
        #   eval_loss = alexnet_eval_job().get().mean()
        # print(
        #     fmt_str.format(
        #         i, "eval loss:", eval_loss
        #     )
        # )
        if (i + 1) % 100 == 0:
            check_point.save(_MODEL_SAVE_DIR + str(i))

    # save loss to file
    loss_file = "{}n{}c.npy".format(str(num_nodes),
                                    str(args.gpu_num_per_node * num_nodes))
    loss_path = "./of_loss/vgg16"
    if not os.path.exists(loss_path):
        os.makedirs(loss_path)
    numpy.save(os.path.join(loss_path, loss_file), loss)
Example #15
0
def compare_with_tensorflow(device_type,
                            x_shape,
                            filters,
                            kernel_size,
                            groups,
                            padding="VALID",
                            stride=1):
    assert device_type in ["gpu", "cpu"]
    flow.clear_default_session()
    func_config = flow.FunctionConfig()
    func_config.default_data_type(flow.float)
    func_config.train.primary_lr(1e-4)
    func_config.train.model_update_conf(dict(naive_conf={}))

    @flow.global_function(func_config)
    def ConvJob():
        with flow.scope.placement(device_type, "0:0"):
            x = flow.get_variable(
                "x",
                shape=x_shape,
                dtype=flow.float,
                initializer=flow.random_uniform_initializer(minval=0,
                                                            maxval=100),
                trainable=True,
            )
            weight_shape = (filters, int(x.shape[1] / groups), kernel_size,
                            kernel_size)
            weight = flow.get_variable(
                "conv-weight",
                shape=weight_shape,
                dtype=flow.float,
                initializer=flow.random_uniform_initializer(minval=0,
                                                            maxval=100),
            )
            loss = flow.nn.conv2d(
                x,
                weight,
                strides=[stride, stride],
                padding=padding,
                data_format="NCHW",
                dilations=[1, 1],
                groups=groups,
            )
            flow.losses.add_loss(loss)

            flow.watch(x, test_global_storage.Setter("x"))
            flow.watch_diff(x, test_global_storage.Setter("x_diff"))
            flow.watch(weight, test_global_storage.Setter("weight"))
            flow.watch_diff(weight, test_global_storage.Setter("weight_diff"))
            flow.watch(loss, test_global_storage.Setter("loss"))
            flow.watch_diff(loss, test_global_storage.Setter("loss_diff"))

            return loss

    # OneFlow
    check_point = flow.train.CheckPoint()
    check_point.init()
    of_out = ConvJob().get()
    # TensorFlow
    with tf.GradientTape(persistent=True) as tape:
        x = tf.Variable(test_global_storage.Get("x").transpose(0, 2, 3, 1))
        assert groups > 0
        assert x_shape[1] % groups == 0
        assert filters % groups == 0
        if groups == 1:
            weight = tf.Variable(
                test_global_storage.Get("weight").transpose(2, 3, 1, 0))
            tf_out = tf.nn.conv2d(
                x,
                weight,
                strides=[1, stride, stride, 1],
                padding=padding,
                data_format="NHWC",
            )
        else:
            weight = tf.Variable(
                test_global_storage.Get("weight").transpose(2, 3, 1, 0))
            tf_out = grouped_convolution2D(x,
                                           weight,
                                           padding=padding,
                                           num_groups=groups)

    loss_diff = test_global_storage.Get("loss_diff").transpose(0, 2, 3, 1)
    tf_x_diff = tape.gradient(tf_out, x, loss_diff)
    tf_weight_diff = tape.gradient(tf_out, weight, loss_diff)
    max_diff = np.max(
        np.absolute(of_out.numpy().transpose(0, 2, 3, 1) - tf_out.numpy()))
    assert np.allclose(of_out.numpy().transpose(0, 2, 3, 1),
                       tf_out.numpy(),
                       rtol=1e-5,
                       atol=1e-5), max_diff
    assert np.allclose(
        test_global_storage.Get("x_diff").transpose(0, 2, 3, 1),
        tf_x_diff.numpy(),
        rtol=1e-4,
        atol=1e-4,
    )
    assert np.allclose(
        test_global_storage.Get("weight_diff").transpose(2, 3, 1, 0),
        tf_weight_diff.numpy(),
        rtol=1e-5,
        atol=1e-5,
    )
    def test(self, model_path):
        # download data
        val_data = TestSet(args)

        # config
        func_config = flow.FunctionConfig()
        func_config.default_data_type(flow.double)
        flow.config.gpu_device_num(self.gpu_num_per_node)
        flow.config.enable_debug_mode(True)

        @flow.global_function(type="predict", function_config=func_config)
        def eval_lte(input: tp.Numpy.Placeholder(
            (1, 3, 160, 160))) -> Tuple[tp.Numpy, tp.Numpy, tp.Numpy]:
            x_lv1, x_lv2, x_lv3 = self.LTE(input, trainable=False)
            return x_lv1, x_lv2, x_lv3

        @flow.global_function(type="predict", function_config=func_config)
        def eval_searchtransfer(lrsr_lv3_unfold: tp.Numpy.Placeholder(
            (1, 2304, 1600)), refsr_lv3_unfold: tp.Numpy.Placeholder(
                (1, 2304, 1600)), ref_lv3_unfold: tp.Numpy.Placeholder(
                    (1, 2304, 1600)), ref_lv2_unfold: tp.Numpy.Placeholder(
                        (1, 4608, 1600)), ref_lv1_unfold: tp.Numpy.Placeholder(
                            (1, 9216, 1600))) -> Tuple[tp.Numpy, tp.Numpy,
                                                       tp.Numpy, tp.Numpy]:
            refsr_lv3_unfold = flow.transpose(refsr_lv3_unfold, perm=[0, 2, 1])

            refsr_lv3_unfold = flow.math.l2_normalize(
                refsr_lv3_unfold, axis=2)  # [N, Hr*Wr, C*k*k]
            lrsr_lv3_unfold = flow.math.l2_normalize(lrsr_lv3_unfold,
                                                     axis=1)  # [N, C*k*k, H*W]

            R_lv3 = flow.matmul(refsr_lv3_unfold,
                                lrsr_lv3_unfold)  # [N, Hr*Wr, H*W]
            R_lv3_star = flow.math.reduce_max(R_lv3, axis=1)  # [N, H*W]
            R_lv3_star_arg = flow.math.argmax(R_lv3, axis=1)  # [N, H*W]

            T_lv3_unfold = self.bis(ref_lv3_unfold, R_lv3_star_arg)
            T_lv2_unfold = self.bis(ref_lv2_unfold, R_lv3_star_arg)
            T_lv1_unfold = self.bis(ref_lv1_unfold, R_lv3_star_arg)

            return R_lv3_star, T_lv3_unfold, T_lv2_unfold, T_lv1_unfold

        @flow.global_function(type="predict", function_config=func_config)
        def eval_mainnet(lr: tp.Numpy.Placeholder(
            (1, 3, 40, 40)), S: tp.Numpy.Placeholder(
                (1, 1, 40, 40)), T_lv3: tp.Numpy.Placeholder((1, 256, 40, 40)),
                         T_lv2: tp.Numpy.Placeholder(
                             (1, 128, 80, 80)), T_lv1: tp.Numpy.Placeholder(
                                 (1, 64, 160, 160))) -> tp.Numpy:
            sr = self.mainnet(lr, S, T_lv3, T_lv2, T_lv1, trainable=False)
            return sr

        check_point = flow.train.CheckPoint()
        check_point.load(model_path)

        val_psnr, val_ssim = 0., 0.
        val_batch_num = len(val_data)
        for batch_idx in range(val_batch_num):
            sample = val_data[batch_idx]
            lr = np.ascontiguousarray(sample['LR'][np.newaxis, :])
            lr_sr = np.ascontiguousarray(sample['LR_sr'][np.newaxis, :])
            hr = np.ascontiguousarray(sample['HR'][np.newaxis, :])
            ref = np.ascontiguousarray(sample['Ref'][np.newaxis, :])
            ref_sr = np.ascontiguousarray(sample['Ref_sr'][np.newaxis, :])

            _, _, lrsr_lv3 = eval_lte((lr_sr + 1.) / 2.)
            _, _, refsr_lv3 = eval_lte((ref_sr + 1.) / 2.)
            ref_lv1, ref_lv2, ref_lv3 = eval_lte((ref + 1.) / 2.)

            ### search
            lrsr_lv3_unfold = self.unfold(lrsr_lv3)
            refsr_lv3_unfold = self.unfold(refsr_lv3)

            ### transfer
            ref_lv3_unfold = self.unfold(ref_lv3)
            ref_lv2_unfold = self.unfold(ref_lv2,
                                         kernel_size=6,
                                         padding=2,
                                         stride=2)
            ref_lv1_unfold = self.unfold(ref_lv1,
                                         kernel_size=12,
                                         padding=4,
                                         stride=4)

            R_lv3_star, T_lv3_unfold, T_lv2_unfold, T_lv1_unfold = eval_searchtransfer(
                lrsr_lv3_unfold, refsr_lv3_unfold, ref_lv3_unfold,
                ref_lv2_unfold, ref_lv1_unfold)

            T_lv3 = self.fold(T_lv3_unfold,
                              output_size=lrsr_lv3.shape[-2:],
                              kernel_size=3,
                              padding=1,
                              stride=1) / (3. * 3.)
            T_lv2 = self.fold(
                T_lv2_unfold,
                output_size=(lrsr_lv3.shape[2] * 2, lrsr_lv3.shape[3] * 2),
                kernel_size=6,
                padding=2,
                stride=2) / (3. * 3.)
            T_lv1 = self.fold(
                T_lv1_unfold,
                output_size=(lrsr_lv3.shape[2] * 4, lrsr_lv3.shape[3] * 4),
                kernel_size=12,
                padding=4,
                stride=4) / (3. * 3.)

            S = np.reshape(
                R_lv3_star,
                [R_lv3_star.shape[0], 1, lrsr_lv3.shape[2], lrsr_lv3.shape[3]])

            sr = eval_mainnet(lr, S, T_lv3, T_lv2, T_lv1)
            # sr: range [-1, 1]
            # hr: range [-1, 1]

            ### prepare data
            sr = (sr + 1.) * 127.5
            hr = (hr + 1.) * 127.5

            sr = np.transpose(np.round(np.squeeze(sr)), (1, 2, 0))
            hr = np.transpose(np.round(np.squeeze(hr)), (1, 2, 0))

            ### calculate psnr and ssim
            val_psnr += self.calc_psnr(sr, hr)
            val_ssim += self.calc_ssim(sr, hr)

        val_psnr = val_psnr / val_batch_num
        val_ssim = val_ssim / val_batch_num

        print("****************** evalute  *****************")
        print("val_psnr:{}, val_ssim:{}.".format(val_psnr, val_ssim))
Example #17
0
def compare_with_tensorflow(device_type, x_shape, data_type, axis):
    assert device_type in ["gpu", "cpu"]
    flow.clear_default_session()
    func_config = flow.FunctionConfig()

    if data_type == "float16":
        dtype = flow.float
    else:
        dtype = type_name_to_flow_type[data_type]

    @flow.global_function(type="train", function_config=func_config)
    def SoftmaxJob():
        with flow.scope.placement(device_type, "0:0"):
            x = flow.get_variable(
                "x",
                shape=x_shape,
                dtype=dtype,
                initializer=flow.random_uniform_initializer(minval=-0.1,
                                                            maxval=0.1),
                trainable=True,
            )
            if data_type == "float16":
                loss = flow.cast(
                    flow.nn.softmax(flow.cast(x, dtype=flow.float16),
                                    axis=axis),
                    dtype=flow.float,
                )
            else:
                loss = flow.nn.softmax(x, axis=axis)
            flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler(
                [], [1e-4]),
                               momentum=0).minimize(loss)

            flow.watch(x, test_global_storage.Setter("x"))
            flow.watch_diff(x, test_global_storage.Setter("x_diff"))
            flow.watch(loss, test_global_storage.Setter("loss"))
            flow.watch_diff(loss, test_global_storage.Setter("loss_diff"))

            return loss

    # OneFlow
    check_point = flow.train.CheckPoint()
    check_point.init()
    of_out = SoftmaxJob().get()
    # TensorFlow
    with tf.GradientTape(persistent=True) as tape:
        x = tf.Variable(test_global_storage.Get("x"))
        tf_out = tf.nn.softmax(x, axis=axis)

    loss_diff = test_global_storage.Get("loss_diff")
    tf_x_diff = tape.gradient(tf_out, x, loss_diff)
    if data_type == "float16":
        tolerance = 1e-3
    else:
        tolerance = 1e-5
    assert np.allclose(of_out.numpy(),
                       tf_out.numpy(),
                       rtol=tolerance,
                       atol=tolerance)
    assert np.allclose(
        test_global_storage.Get("x_diff"),
        tf_x_diff.numpy(),
        rtol=tolerance,
        atol=tolerance,
    )
Example #18
0
def summary_demo():
    func_config = flow.FunctionConfig()
    func_config.default_data_type(flow.float)
    func_config.default_logical_view(flow.scope.mirrored_view())
    with tempfile.TemporaryDirectory() as logdir:

        @flow.global_function(function_config=func_config)
        def CreateWriter():
            flow.summary.create_summary_writer(logdir)

        @flow.global_function(function_config=func_config)
        def ScalarJob(
            value: flow.typing.ListNumpy.Placeholder((1, ), dtype=flow.float),
            step: flow.typing.ListNumpy.Placeholder((1, ), dtype=flow.int64),
            tag: flow.typing.ListNumpy.Placeholder((1000, ), dtype=flow.int8),
        ):
            flow.summary.scalar(value, step, tag)

        @flow.global_function(function_config=func_config)
        def HistogramJob(
                value: flow.typing.ListNumpy.Placeholder((200, 200, 200),
                                                         dtype=flow.float),
                step: flow.typing.ListNumpy.Placeholder((1, ),
                                                        dtype=flow.int64),
                tag: flow.typing.ListNumpy.Placeholder((9, ), dtype=flow.int8),
        ):
            flow.summary.histogram(value, step, tag)

        @flow.global_function(function_config=func_config)
        def PbJob(
            value: flow.typing.ListNumpy.Placeholder((1500, ),
                                                     dtype=flow.int8),
            step: flow.typing.ListNumpy.Placeholder((1, ), dtype=flow.int64),
        ):
            flow.summary.pb(value, step=step)

        @flow.global_function(function_config=func_config)
        def ImageJob(
                value: flow.typing.ListNumpy.Placeholder(shape=(100, 2000,
                                                                2000, 4),
                                                         dtype=flow.uint8),
                step: flow.typing.ListNumpy.Placeholder((1, ),
                                                        dtype=flow.int64),
                tag: flow.typing.ListNumpy.Placeholder((10, ),
                                                       dtype=flow.int8),
        ):
            flow.summary.image(value, step=step, tag=tag)

        @flow.global_function(function_config=func_config)
        def FlushJob():
            flow.summary.flush_summary_writer()

        CreateWriter()
        projecotr = flow.summary.Projector(logdir)
        projecotr.create_embedding_projector()
        projecotr.create_exception_projector()

        hparams = {
            flow.summary.HParam("learning_rate",
                                flow.summary.RealRange(1e-2, 1e-1)):
            0.02,
            flow.summary.HParam("dense_layers", flow.summary.IntegerRange(
                2, 7)):
            5,
            flow.summary.HParam("optimizer",
                                flow.summary.ValueSet(["adam", "sgd"])):
            "adam",
            flow.summary.HParam("accuracy", flow.summary.RealRange(1e-2, 1e-1)):
            0.001,
            flow.summary.HParam("magic", flow.summary.ValueSet([False, True])):
            True,
            flow.summary.Metric("loss", float):
            0.02,
            "dropout":
            0.6,
        }

        for i in range(200):
            t = ["vgg16", "resnet50", "mask-rcnn", "yolov3"]
            pb = flow.summary.text(t)
            value = np.fromstring(str(pb), dtype=np.int8)
            step = np.array([i], dtype=np.int64)
            PbJob([value], [step])

            pb2 = flow.summary.hparams(hparams)
            value = np.fromstring(str(pb2), dtype=np.int8)
            step = np.array([i], dtype=np.int64)
            PbJob([value], [step])

        for idx in range(100):
            value = np.array([idx], dtype=np.float32)
            step = np.array([idx], dtype=np.int64)
            tag = np.fromstring("scalar", dtype=np.int8)
            ScalarJob([value], [step], [tag])

        value = np.array(
            [
                [[1, 2, 3, 0], [0, 2, 3, 1], [2, 3, 4, 1]],
                [[1, 0, 2, 0], [2, 1, 2, 0], [2, 1, 1, 1]],
            ],
            dtype=np.float64,
        )

        for idx in range(20):
            value = np.random.rand(100, 100, 100).astype(np.float32)
            step = np.array([idx], dtype=np.int64)
            tag = np.fromstring("histogram", dtype=np.int8)
            HistogramJob([value], [step], [tag])

        value_ = np.random.rand(10, 10, 10).astype(np.float32)
        label = (np.random.rand(10) * 10).astype(np.int64)
        x = (np.random.rand(10, 10, 10) * 255).astype(np.uint8)
        sample_name = "sample"
        sample_type = "image"
        step = 1
        tag_exception = "exception_projector"
        tag_embedding = "embedding_projector"
        for i in range(20):
            projecotr.exception_projector(
                value=value,
                tag=tag_exception,
                step=step,
                sample_name=sample_name,
                sample_type=sample_type,
                x=x,
            )
            projecotr.embedding_projector(
                value=value,
                label=label,
                tag=tag_embedding,
                step=step,
                sample_name=sample_name,
                sample_type=sample_type,
                x=x,
            )

        images = [
            cv2.cvtColor(np.ones([512, 512], np.uint8),
                         cv2.COLOR_BGR2RGB).astype(np.uint8),
            cv2.cvtColor(np.ones([512, 512], np.uint8),
                         cv2.COLOR_BGR2RGB).astype(np.uint8),
        ]

        images = np.array(images, dtype=np.uint8)
        imageRed = np.ones([512, 512, 3]).astype(np.uint8)
        Red = np.array([0, 255, 255], dtype=np.uint8)
        imageNew = np.multiply(imageRed, Red)
        imageNew = np.expand_dims(imageNew, axis=0)
        images = np.concatenate((images, imageNew), axis=0)
        step = np.array([1], dtype=np.int64)
        tag = np.fromstring("image", dtype=np.int8)
        for i in range(20):
            ImageJob([images], [step], [tag])

        graph = flow.summary.Graph(logdir)
        graph.write_structure_graph()

        time.sleep(1)
        FlushJob()
        time.sleep(1)
Example #19
0
    def train(self, epochs):
        # download data npy
        train_hr_data_path = os.path.join(
            self.data_dir, "{}_{}hr_imgs.npy".format("train", self.hr_size))
        train_lr_data_path = os.path.join(
            self.data_dir, "{}_{}lr_imgs.npy".format("train", self.lr_size))
        val_hr_data_path = os.path.join(
            self.data_dir, "{}_{}hr_imgs.npy".format("val", self.hr_size))
        val_lr_data_path = os.path.join(
            self.data_dir, "{}_{}lr_imgs.npy".format("val", self.lr_size))

        train_hr_data = np.load(train_hr_data_path)
        train_lr_data = np.load(train_lr_data_path)
        val_hr_data = np.load(val_hr_data_path)
        val_lr_data = np.load(val_lr_data_path)

        assert train_hr_data.shape == (
            16700, 3, self.hr_size,
            self.hr_size), "The shape of train_hr_data is {}".format(
                train_hr_data.shape)
        assert val_lr_data.shape == (
            425, 3, self.lr_size,
            self.lr_size), "The shape of val_lr_data is {}".format(
                val_lr_data.shape)

        # save loss
        G_l2_loss = []
        G_gan_loss = []
        G_perceptual_loss = []
        G_tv_loss = []
        G_total_loss = []
        D_total_loss = []
        Val_l2_error = []
        Val_ssim = []
        Val_psnr = []

        # config
        func_config = flow.FunctionConfig()
        func_config.default_data_type(flow.float)
        flow.config.gpu_device_num(self.gpu_num_per_node)
        # train config
        lr_scheduler = flow.optimizer.PiecewiseConstantScheduler([], [self.lr])

        @flow.global_function(type="predict", function_config=func_config)
        def eval_generator(input: tp.Numpy.Placeholder(
            (self.batch_size, 3, self.lr_size, self.lr_size))) -> tp.Numpy:
            g_out = self.Generator(input, trainable=False)
            return g_out

        @flow.global_function(type="train", function_config=func_config)
        def train_generator(input: tp.Numpy.Placeholder(
            (self.batch_size, 3, self.lr_size,
             self.lr_size)), target: tp.Numpy.Placeholder(
                 (self.batch_size, 3, self.hr_size,
                  self.hr_size))) -> Tuple[tp.Numpy, tp.Numpy, tp.Numpy,
                                           tp.Numpy, tp.Numpy, tp.Numpy]:
            g_out = self.Generator(input, trainable=True)
            g_logits = self.Discriminator(g_out, trainable=False)
            # Adversarial Loss
            g_gan_loss = 0.001 * flow.math.reduce_mean(1 - g_logits)
            # Image Loss
            g_l2_loss = self.mseloss(g_out, target)
            # TV Loss
            g_tv_loss = self.total_variance_loss(g_out, weight=2e-8)

            # Perceptual loss
            def perceptual_loss(fake, real, weight=1.0):
                fake_feature = self.vgg16bn(fake, trainable=False)
                real_feature = self.vgg16bn(real, trainable=False, reuse=True)

                return self.mseloss(fake_feature, real_feature, weight=weight)

            g_perceptual_loss = perceptual_loss(g_out, target, weight=0.006)

            g_total_loss = g_l2_loss + g_gan_loss + g_perceptual_loss + g_tv_loss

            flow.optimizer.Adam(lr_scheduler, beta1=0.5,
                                beta2=0.999).minimize(g_total_loss)

            return g_l2_loss, g_gan_loss, g_perceptual_loss, g_tv_loss, g_total_loss, g_out

        @flow.global_function(type="train", function_config=func_config)
        def train_discriminator(input: tp.Numpy.Placeholder(
            (self.batch_size, 3, self.lr_size, self.lr_size)),
                                target: tp.Numpy.Placeholder(
                                    (self.batch_size, 3, self.hr_size,
                                     self.hr_size))) -> tp.Numpy:
            g_out = self.Generator(input, trainable=False)
            g_logits = self.Discriminator(g_out, trainable=True)
            d_logits = self.Discriminator(target, trainable=True, reuse=True)

            d_loss = 1 - flow.math.reduce_mean(d_logits - g_logits)

            flow.optimizer.Adam(lr_scheduler, beta1=0.5,
                                beta2=0.999).minimize(d_loss)

            return d_loss

        # load trained weight of vgg16bn and initialize automatically GAN model
        flow.load_variables(flow.checkpoint.get(self.vgg_path))

        # trained weights of vgg need to be changed, because vgg is used twice like Discriminator. Please use weights in of_vgg16bn_reuse path to load vgg for perceptual loss.
        # flow.checkpoint.save("vgg_checkpoint")

        batch_num = len(train_hr_data) // self.batch_size
        pre_best, best_psnr = -1, 0
        print("****************** start training *****************")
        for epoch_idx in range(epochs):
            start = time.time()
            print("****************** train  *****************")
            for batch_idx in range(batch_num):
                inputs = train_lr_data[batch_idx *
                                       self.batch_size:(batch_idx + 1) *
                                       self.batch_size].astype(np.float32,
                                                               order="C")
                target = train_hr_data[batch_idx *
                                       self.batch_size:(batch_idx + 1) *
                                       self.batch_size].astype(np.float32,
                                                               order="C")
                d_loss = train_discriminator(inputs, target)
                g_l2_loss, g_gan_loss, g_perceptual_loss, g_tv_loss, g_total_loss, g_out = train_generator(
                    inputs, target)

                d_loss = d_loss.mean()
                g_l2_loss = g_l2_loss.mean()
                g_gan_loss = g_gan_loss.mean()
                g_perceptual_loss = g_perceptual_loss.mean()
                g_tv_loss = g_tv_loss.mean()
                g_total_loss = g_total_loss.mean()

                if (batch_idx + 1) % self.print_interval == 0:
                    print(
                        "{}th epoch, {}th batch, g_l2_loss:{}, g_gan_loss:{}, g_perceptual_loss:{}, g_tv_loss:{}, gloss:{}, dloss:{} "
                        .format(epoch_idx + 1, batch_idx + 1, g_l2_loss,
                                g_gan_loss, g_perceptual_loss, g_tv_loss,
                                g_total_loss, d_loss))

                    G_l2_loss.append(g_l2_loss)
                    G_gan_loss.append(g_gan_loss)
                    G_perceptual_loss.append(g_perceptual_loss)
                    G_tv_loss.append(g_tv_loss)
                    G_total_loss.append(g_total_loss)
                    D_total_loss.append(d_loss)

            print("Time for epoch {} is {} sec.".format(
                epoch_idx + 1,
                time.time() - start))

            if (epoch_idx + 1) % 1 == 0:
                # save train images
                # self.save_images(g_out, inputs, target, epoch_idx, name="train")

                # save val images, trainable = False
                # and calculate MSE, SSIMs, SSIM, PSNR
                val_l2_error, val_ssim, val_psnr = 0, 0, 0
                val_batch_num = len(val_hr_data) // self.batch_size
                for val_batch_idx in range(val_batch_num):
                    val_inputs = val_lr_data[val_batch_idx *
                                             self.batch_size:(val_batch_idx +
                                                              1) *
                                             self.batch_size].astype(
                                                 np.float32, order="C")
                    val_target = val_hr_data[val_batch_idx *
                                             self.batch_size:(val_batch_idx +
                                                              1) *
                                             self.batch_size].astype(
                                                 np.float32, order="C")
                    val_g_out = eval_generator(val_inputs)

                    val_l2_error += (np.square(val_g_out - val_target).mean())
                    val_ssim += self.ssim(val_target.transpose(0, 2, 3, 1),
                                          val_g_out.transpose(0, 2, 3, 1))
                    # val_ssims += (pytorch_ssim.ssim(val_g_out, val_target, oneflow=True).item())
                    val_psnr += self.psnr(val_target.transpose(0, 2, 3, 1),
                                          val_g_out.transpose(0, 2, 3, 1))

                # save val images
                self.save_images(val_g_out,
                                 val_inputs,
                                 val_target,
                                 epoch_idx,
                                 name="val")

                val_l2_error = val_l2_error / val_batch_num
                val_ssim = val_ssim / val_batch_num
                val_psnr = val_psnr / val_batch_num
                # val_psnr = 10 * np.log10(1 / val_l2_error)

                Val_l2_error.append(val_l2_error)
                Val_ssim.append(val_ssim)
                Val_psnr.append(val_psnr)
                print("****************** evalute  *****************")
                print(
                    "{}th epoch, {}th batch, val_l2_error:{}, val_ssim:{}, val_psnr:{}."
                    .format(epoch_idx + 1, batch_idx + 1, val_l2_error,
                            val_ssim, val_psnr))
                if epoch_idx + 1 > 50 and val_psnr > best_psnr:
                    best_psnr = val_psnr
                    if pre_best != -1:
                        # delete the previous best checkpoint
                        print(
                            "delete the previous best {}th epoch model".format(
                                pre_best))
                        shutil.rmtree(
                            os.path.join(self.checkpoint_path,
                                         "{}th_epoch".format(pre_best)))

                    # save parameters
                    flow.checkpoint.save(
                        os.path.join(self.checkpoint_path,
                                     "{}th_epoch".format(epoch_idx + 1)))
                    pre_best = epoch_idx + 1
                    print("save the best {}th epoch model at {}.".format(
                        epoch_idx + 1,
                        str(datetime.now().strftime("%Y-%m-%d-%H:%M:%S"))))

        # save train loss and val error to plot
        np.save(
            os.path.join(self.loss_path, 'G_l2_loss_{}.npy'.format(epochs)),
            G_l2_loss)
        np.save(
            os.path.join(self.loss_path, 'G_gan_loss_{}.npy'.format(epochs)),
            G_gan_loss)
        np.save(
            os.path.join(self.loss_path,
                         'G_perceptual_loss_{}.npy'.format(epochs)),
            G_perceptual_loss)
        np.save(
            os.path.join(self.loss_path, 'G_tv_loss_{}.npy'.format(epochs)),
            G_tv_loss)
        np.save(
            os.path.join(self.loss_path, 'G_total_loss_{}.npy'.format(epochs)),
            G_total_loss)
        np.save(
            os.path.join(self.loss_path, 'D_total_loss_{}.npy'.format(epochs)),
            D_total_loss)

        np.save(
            os.path.join(self.loss_path, 'Val_l2_error_{}.npy'.format(epochs)),
            Val_l2_error)
        np.save(os.path.join(self.loss_path, 'Val_ssim_{}.npy'.format(epochs)),
                Val_ssim)
        np.save(os.path.join(self.loss_path, 'Val_psnr_{}.npy'.format(epochs)),
                Val_psnr)
        print("*************** Train {} done ***************** ".format(
            self.path))
def compare_with_tensorflow(device_type, data_type, x_shape, case):
    assert device_type in ["gpu", "cpu"]
    flow.clear_default_session()
    func_config = flow.FunctionConfig()
    func_config.default_data_type(flow.float)

    @flow.global_function(type="train", function_config=func_config)
    def ScalarAddByTensorJob():
        with flow.scope.placement(device_type, "0:0"):
            x = flow.get_variable(
                "x",
                shape=x_shape,
                dtype=flow.float,
                initializer=flow.random_uniform_initializer(minval=0, maxval=100),
                trainable=True,
            )
            y = flow.get_variable(
                "y",
                shape=(1,),
                dtype=flow.float,
                initializer=flow.random_uniform_initializer(minval=0, maxval=100),
                trainable=True,
            )
            if case == "add":
                loss = flow.math.add(x, y)
            elif case == "sub":
                loss = flow.math.subtract(x, y)
            elif case == "mul":
                loss = flow.math.multiply(x, y)
            elif case == "div":
                loss = flow.math.divide(x, y)
            flow.optimizer.SGD(
                flow.optimizer.PiecewiseConstantScheduler([], [1e-4]), momentum=0
            ).minimize(loss)

            flow.watch(x, test_global_storage.Setter("x"))
            flow.watch(y, test_global_storage.Setter("y"))
            flow.watch_diff(x, test_global_storage.Setter("x_diff"))
            flow.watch_diff(y, test_global_storage.Setter("y_diff"))
            flow.watch(loss, test_global_storage.Setter("loss"))
            flow.watch_diff(loss, test_global_storage.Setter("loss_diff"))

            return loss

    # OneFlow
    check_point = flow.train.CheckPoint()
    check_point.init()
    of_out = ScalarAddByTensorJob().get()
    # TensorFlow
    with tf.GradientTape(persistent=True) as tape:
        x = tf.Variable(test_global_storage.Get("x"))
        y = tf.Variable(test_global_storage.Get("y"))
        if case == "add":
            tf_out = x + y
        elif case == "sub":
            tf_out = x - y
        elif case == "mul":
            tf_out = x * y
        elif case == "div":
            tf_out = x / y
    loss_diff = test_global_storage.Get("loss_diff")
    tf_x_diff = tape.gradient(tf_out, x, loss_diff)
    tf_y_diff = tape.gradient(tf_out, y, loss_diff)

    assert np.allclose(of_out.numpy(), tf_out.numpy(), rtol=1e-5, atol=1e-5)
    assert np.allclose(
        test_global_storage.Get("x_diff"), tf_x_diff.numpy(), rtol=1e-5, atol=1e-5
    )
    assert np.allclose(
        test_global_storage.Get("y_diff"), tf_y_diff.numpy(), rtol=1e-5, atol=1e-5
    )
Example #21
0
    def test_layer_norm(_):
        confs = [
            {
                "x_shape": (40, 64),
                "begin_norm_axis": -1,
                "begin_params_axis": -1
            },
        ]
        arg_dict = OrderedDict()
        arg_dict["device_type"] = ["cpu", "gpu"]
        arg_dict["confs"] = confs
        arg_dict["data_type"] = ["float32", "float16"]
        arg_dict["trainable"] = [True, False]
        arg_dict["center"] = [True, False]
        arg_dict["scale"] = [True, False]
        arg_dict["epsilon"] = [1e-5, 1e-10]
        arg_dict["fuse_add_to_output"] = [True, False]

        for case in GenArgList(arg_dict):
            (
                device_type,
                confs,
                data_type,
                trainable,
                center,
                scale,
                epsilon,
                fuse_add_to_output,
            ) = case
            if device_type == "cpu" and data_type == "float16":
                continue
            if device_type == "cpu" and fuse_add_to_output == True:
                continue
            x_shape = confs["x_shape"]
            begin_norm_axis = confs["begin_norm_axis"]
            begin_params_axis = confs["begin_params_axis"]
            flow.clear_default_session()
            assert (begin_norm_axis == begin_params_axis
                    ), "tf doesn't support a dedicated begin_params_axis"
            # Random inputs
            if data_type == "float16":
                x = (np.random.uniform(low=-1, high=1, size=x_shape).astype(
                    np.float16).astype(np.float32))
            else:
                x = np.random.uniform(low=-1, high=1, size=x_shape).astype(
                    type_name_to_np_type[data_type])

            dim = len(x.shape) - 2

            # TF results
            with tf.GradientTape(persistent=True) as tape:
                x_tf = tf.Variable(x)
                if data_type == "float16":
                    x_tf = tf.cast(x_tf, dtype=tf.float16)
                    tf.keras.backend.set_floatx("float16")
                layer = tf.keras.layers.LayerNormalization(
                    axis=begin_norm_axis,
                    epsilon=epsilon,
                    center=center,
                    scale=scale,
                    beta_initializer="zeros",
                    gamma_initializer="ones",
                    beta_regularizer=None,
                    gamma_regularizer=None,
                    beta_constraint=None,
                    gamma_constraint=None,
                    trainable=trainable,
                )
                y_tf = layer(x_tf)
                z_tf = y_tf + x_tf
            if data_type == "float16":
                dx_tf = tape.gradient(
                    z_tf, x_tf,
                    tf.constant(1.0, shape=z_tf.shape, dtype=tf.float16))
            else:
                dx_tf = tape.gradient(z_tf, x_tf,
                                      tf.constant(1.0, shape=z_tf.shape))
            grad = tape.gradient(z_tf, layer.trainable_variables)
            if trainable:
                if scale and center:
                    tf_gamma_diff = grad[0]
                    tf_beta_diff = grad[1]
                elif scale and not center:
                    tf_gamma_diff = grad[0]
                elif not scale and center:
                    tf_beta_diff = grad[0]
                else:
                    pass
            else:
                pass

            def assert_grad(b):
                diff = dx_tf.numpy() - b.numpy()
                max_diff = np.max(np.abs(diff))
                if data_type == "float16":
                    tolerance = 3e-3
                else:
                    tolerance = 1e-5
                assert np.allclose(dx_tf.numpy(),
                                   b.numpy(),
                                   rtol=tolerance,
                                   atol=tolerance), (
                                       case,
                                       max_diff,
                                   )

            def assert_grad_gamma(b):
                diff = tf_gamma_diff.numpy() - b.numpy()
                max_diff = np.max(np.abs(diff))
                assert np.allclose(tf_gamma_diff.numpy(),
                                   b.numpy(),
                                   rtol=1e-4,
                                   atol=1e-4), (
                                       case,
                                       max_diff,
                                   )

            def assert_grad_beta(b):
                diff = tf_beta_diff.numpy() - b.numpy()
                max_diff = np.max(np.abs(diff))
                assert np.allclose(tf_beta_diff.numpy(),
                                   b.numpy(),
                                   rtol=1e-5,
                                   atol=1e-5), (
                                       case,
                                       max_diff,
                                   )

            # 1F results
            if data_type == "float16":
                dtype = flow.float
            else:
                dtype = type_name_to_flow_type[data_type]

            func_config = flow.FunctionConfig()
            func_config.default_data_type(flow.float)
            func_config.enable_fuse_add_to_output(fuse_add_to_output)

            @flow.global_function(type="train", function_config=func_config)
            def test_job(x: oft.Numpy.Placeholder(x_shape, dtype=dtype)):
                v = flow.get_variable(
                    "x",
                    shape=x_shape,
                    dtype=dtype,
                    initializer=flow.constant_initializer(0),
                    trainable=True,
                )
                flow.watch_diff(v, assert_grad)
                x += v
                if data_type == "float16":
                    x = flow.cast(x, dtype=flow.float16)
                with flow.scope.placement(device_type, "0:0"):
                    param_shape = x.shape[begin_params_axis:]
                    gamma = None
                    beta = None
                    if center:
                        with flow.scope.namespace("LayerNorm"):
                            beta = flow.get_variable(
                                name="beta",
                                shape=param_shape,
                                dtype=flow.float,
                                initializer=flow.constant_initializer(0.0),
                                trainable=trainable,
                                model_name="beta",
                                reuse=False,
                            )
                            if trainable:
                                flow.watch_diff(beta, assert_grad_beta)
                            if data_type == "float16":
                                beta = flow.cast(beta, dtype=flow.float16)

                    if scale:
                        with flow.scope.namespace("LayerNorm"):
                            gamma = flow.get_variable(
                                name="gamma",
                                shape=param_shape,
                                dtype=flow.float,
                                initializer=flow.constant_initializer(1.0),
                                trainable=trainable,
                                model_name="gamma",
                                reuse=False,
                            )
                            if trainable:
                                if data_type == "float16":
                                    flow.watch_diff(
                                        gamma,
                                        test_global_storage.Setter(
                                            "gamma_diff"))
                                else:
                                    flow.watch_diff(gamma, assert_grad_gamma)
                            if data_type == "float16":
                                gamma = flow.cast(gamma, dtype=flow.float16)
                    x = flow.identity(x)

                    y = flow.nn.layer_norm(
                        x,
                        gamma=gamma,
                        beta=beta,
                        begin_norm_axis=begin_norm_axis,
                        begin_params_axis=begin_params_axis,
                        epsilon=epsilon,
                    )
                    z = y + x
                if data_type == "float16":
                    y = flow.cast(y, dtype=flow.float)
                    z = flow.cast(z, dtype=flow.float)

                flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler(
                    [], [1e-4]),
                                   momentum=0).minimize(z)
                return y

            y = test_job(x).get()

            assert y.numpy().shape == y_tf.numpy().shape, (
                y.numpy().shape,
                y_tf.numpy().shape,
            )
            diff = y.numpy() - y_tf.numpy()
            max_diff = np.max(np.abs(diff))
            assert np.allclose(y.numpy(), y_tf.numpy(), rtol=1e-5,
                               atol=2e-3), (
                                   case,
                                   max_diff,
                               )
            if data_type == "float16" and trainable and scale:
                np_dy = np.ones(x.shape).astype(np.float32)
                np_gamma_diff = np.sum(np_dy * y.numpy().astype(np.float32),
                                       axis=0).astype(np.float16)
                max_diff = np.max(
                    np.abs(np_gamma_diff - test_global_storage.Get(
                        "gamma_diff").astype(np.float16)))
                assert np.allclose(
                    np_gamma_diff,
                    test_global_storage.Get("gamma_diff").astype(np.float16),
                    rtol=5e-2,
                    atol=5e-2,
                ), (
                    case,
                    max_diff,
                )
Example #22
0
def compare_with_tensorflow(device_type, params_case, dilations, data_format):
    input_shape, output_shape, padding, strides, kernel_size = params_case
    assert data_format in ["NCHW", "NHWC"]
    out_channels = output_shape[1] if data_format == "NCHW" else output_shape[3]
    in_channels = input_shape[1] if data_format == "NCHW" else input_shape[3]
    assert device_type in ["gpu"]

    flow.clear_default_session()
    func_config = flow.FunctionConfig()
    func_config.default_data_type(flow.float)
    func_config.train.primary_lr(1e-4)
    func_config.train.model_update_conf(dict(naive_conf={}))

    @flow.global_function(func_config)
    def DeconvJob():
        with flow.scope.placement(device_type, "0:0"):
            x = flow.get_variable(
                "x",
                shape=input_shape,
                dtype=flow.float,
                initializer=flow.random_uniform_initializer(minval=-10, maxval=10),
                trainable=True,
            )
            if data_format == "NCHW":
                weight = flow.get_variable(
                    "weight",
                    shape=(in_channels, out_channels, kernel_size, kernel_size),
                    dtype=flow.float,
                    initializer=flow.random_uniform_initializer(minval=-10, maxval=10),
                    trainable=True,
                )
            else:
                weight = flow.get_variable(
                    "weight",
                    shape=(in_channels, kernel_size, kernel_size, out_channels),
                    dtype=flow.float,
                    initializer=flow.random_uniform_initializer(minval=-10, maxval=10),
                    trainable=True,
                )
            loss = flow.nn.conv2d_transpose(
                x,
                weight,
                strides=strides,
                output_shape=output_shape,
                dilations=dilations,
                padding=padding,
                data_format=data_format,
            )
            flow.losses.add_loss(loss)

            flow.watch(x, test_global_storage.Setter("x"))
            flow.watch_diff(x, test_global_storage.Setter("x_diff"))
            flow.watch(weight, test_global_storage.Setter("weight"))
            flow.watch_diff(weight, test_global_storage.Setter("weight_diff"))
            flow.watch(loss, test_global_storage.Setter("loss"))
            flow.watch_diff(loss, test_global_storage.Setter("loss_diff"))

            return loss

    # OneFlow
    check_point = flow.train.CheckPoint()
    check_point.init()
    of_out = DeconvJob().get()
    # Tensorflow
    if data_format == "NCHW":
        with tf.GradientTape(persistent=True) as tape:
            x = tf.Variable(test_global_storage.Get("x").transpose(0, 2, 3, 1))
            output_shape = (
                output_shape[0],
                output_shape[2],
                output_shape[3],
                output_shape[1],
            )
            w = tf.Variable(test_global_storage.Get("weight").transpose(2, 3, 1, 0))
            tf_out = tf.nn.conv2d_transpose(
                x,
                w,
                output_shape=output_shape,
                strides=[1, strides, strides, 1],
                padding=padding,
                data_format="NHWC",
            )

        loss_diff = test_global_storage.Get("loss_diff").transpose(0, 2, 3, 1)
        tf_x_diff = tape.gradient(tf_out, x, loss_diff)
        tf_weight_diff = tape.gradient(tf_out, w, loss_diff)

        assert np.allclose(
            of_out.numpy().transpose(0, 2, 3, 1), tf_out.numpy(), rtol=1e-02, atol=1e-02
        )
        assert np.allclose(
            test_global_storage.Get("x_diff").transpose(0, 2, 3, 1),
            tf_x_diff.numpy(),
            rtol=1e-4,
            atol=1e-4,
        )
        assert np.allclose(
            test_global_storage.Get("weight_diff").transpose(2, 3, 1, 0),
            tf_weight_diff.numpy(),
            rtol=1e-4,
            atol=1e-4,
        )
    else:
        with tf.GradientTape(persistent=True) as tape:
            x = tf.Variable(test_global_storage.Get("x"))
            w = tf.Variable(test_global_storage.Get("weight").transpose(1, 2, 3, 0))
            tf_out = tf.nn.conv2d_transpose(
                x,
                w,
                output_shape=output_shape,
                strides=[1, strides, strides, 1],
                padding=padding,
                data_format="NHWC",
            )
        loss_diff = test_global_storage.Get("loss_diff")
        tf_x_diff = tape.gradient(tf_out, x, loss_diff)
        tf_weight_diff = tape.gradient(tf_out, w, loss_diff)

        assert np.allclose(of_out.numpy(), tf_out.numpy(), rtol=1e-02, atol=1e-02), (
            of_out.numpy() - tf_out.numpy()
        )
        assert np.allclose(
            test_global_storage.Get("x_diff"), tf_x_diff.numpy(), rtol=1e-02, atol=1e-02
        )
        assert np.allclose(
            test_global_storage.Get("weight_diff").transpose(1, 2, 3, 0),
            tf_weight_diff.numpy(),
            rtol=1e-2,
            atol=1e-2,
        )
Example #23
0
def _compare_mish_with_np(input_shape, device_type, machine_ids,
                          device_counts):
    input_1 = np.random.random(size=input_shape).astype(np.float32)

    assert device_type in ["cpu", "gpu"]

    flow.clear_default_session()
    if device_type == "cpu":
        flow.config.cpu_device_num(device_counts)
    else:
        flow.config.gpu_device_num(device_counts)

    func_config = flow.FunctionConfig()
    func_config.default_placement_scope(
        flow.scope.placement(device_type, machine_ids))

    def np_mish(input):
        return input * np.tanh(np.log1p(np.exp(input)))

    np_out_mish = np_mish(input_1)

    def np_diff(input):
        u = np.log1p(np.exp(input))

        return np.tanh(u) + input * (1 - np.tanh(u)**2) * (np.exp(input) /
                                                           (1 + np.exp(input)))

    _np_grad = np_diff(input_1)

    def assert_prediction_grad(blob: tp.Numpy):
        assert np.allclose(blob, _np_grad)

    @flow.global_function(
        type="train",
        function_config=func_config,
    )
    def oneflow_mish(
            of_input_1: tp.Numpy.Placeholder(shape=input_1.shape),
    ) -> tp.Numpy:
        with flow.scope.placement(device_type, "0:0"):
            v = flow.get_variable(
                shape=input_1.shape,
                dtype=flow.float32,
                initializer=flow.zeros_initializer(),
                name="x_var",
            )
            x_var = of_input_1 + v

        flow.watch_diff(x_var, assert_prediction_grad)

        of_mish_out = flow.math.mish(x_var)

        with flow.scope.placement(device_type, "0:0"):
            flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler(
                [], [1e-3]),
                               momentum=0).minimize(of_mish_out)

        return of_mish_out

    of_out_mish = oneflow_mish(input_1)

    assert np.allclose(of_out_mish, np_out_mish)
Example #24
0
def _compare_triplet_margin_loss_with_np(
    anchor_shape,
    pos_shape,
    neg_shape,
    eps,
    margin,
    p,
    swap,
    device_type,
    machine_ids,
    device_counts,
):
    anchor = np.random.random(size=anchor_shape).astype(np.float32)
    pos = np.random.random(size=pos_shape).astype(np.float32)
    neg = np.random.random(size=neg_shape).astype(np.float32)
    eps = eps

    assert device_type in ["cpu", "gpu"]

    flow.clear_default_session()
    if device_type == "cpu":
        flow.config.cpu_device_num(device_counts)
    else:
        flow.config.gpu_device_num(device_counts)

    func_config = flow.FunctionConfig()
    func_config.default_placement_scope(
        flow.scope.placement(device_type, machine_ids))
    func_config.default_logical_view(flow.scope.consistent_view())

    def np_triplet_margin_loss(np_anchor, np_pos, np_neg, eps, np_margin, np_p,
                               swap):
        np_d_1_norm = np.power(np.abs((np_anchor - np_pos + eps)), np_p)
        np_d_2_norm = np.power(np.abs((np_anchor - np_neg + eps)), np_p)

        np_d_1 = np.power(np.sum(np_d_1_norm, axis=-1), 1.0 / np_p)
        np_d_2 = np.power(np.sum(np_d_2_norm, axis=-1), 1.0 / np_p)

        if swap:
            np_dist_swap = np.power(np.abs((np_pos - np_neg + eps)), np_p)
            np_dist_swap = np.power(np.sum(np_dist_swap, axis=-1), 1.0 / np_p)
            np_d_2 = np.minimum(np_d_2, np_dist_swap)

        np_triplet_margin_loss = np.maximum((np_margin + np_d_1 - np_d_2), 0)
        np_triplet_margin_loss_mean = np.mean(np_triplet_margin_loss)
        np_triplet_margin_loss_sum = np.sum(np_triplet_margin_loss)

        return {
            "np_triplet_margin_loss": np_triplet_margin_loss,
            "np_triplet_margin_loss_mean": np_triplet_margin_loss_mean,
            "np_triplet_margin_loss_sum": np_triplet_margin_loss_sum,
        }

    np_out_tripletloss_dict = np_triplet_margin_loss(anchor, pos, neg, eps,
                                                     margin, p, swap)

    def np_triplet_loss_diff(anchor, pos, neg, margin, p):
        def _compute_distance(x1, x2, x3):
            d_1_norm = np.power(np.abs((x1 - x2 + 1e-6)), p)
            d_2_norm = np.power(np.abs((x1 - x3 + 1e-6)), p)
            d_1 = np.power(np.sum(d_1_norm, axis=-1), 1.0 / p)
            d_2 = np.power(np.sum(d_2_norm, axis=-1), 1.0 / p)

            return d_1 - d_2 + margin

        def _compute_per_diff(x1, x2, p, eps=1e-6):
            # Add epsilon to avoid divided by zero
            _abs_index = np.where(x1 - x2 > 0, 1, -1)
            # When element == 0, its grad = 0
            _abs_index_support = np.where(x1 - x2 == 0, 1, 0)
            _abs_grad = _abs_index + _abs_index_support

            _abs_val = np.abs(x1 - x2 + eps)
            _power_abs_val = np.power(_abs_val, p)
            _sum_val = np.sum(_power_abs_val, axis=1, keepdims=True)

            # Add epsilon to avoid divided by zero
            _sqrt_sum_val = np.power(_sum_val + eps, 1.0 / p - 1)

            _power_val = np.power(_abs_val, p - 1)

            _grad = np.multiply(_sqrt_sum_val, _power_val)
            # Multiply the abs grad
            _grad *= _abs_grad
            return _grad / x1.shape[0]

        d = _compute_distance(anchor, pos, neg)
        # Because We use max(x, 0), the value less than 0, the corresponding grad is 0
        # So Here we compute the index that its grad need to be place to 0
        zero_index = np.where(d < -1e-6)

        anchor_grad_1 = _compute_per_diff(anchor, pos, p)
        anchor_grad_2 = _compute_per_diff(anchor, neg, p)

        total_grad = anchor_grad_1 - anchor_grad_2

        for i in zero_index:
            total_grad[i] = 0

        grad_dict = {
            "np_triplet_loss_grad_mean": total_grad,
        }

        return grad_dict

    np_grad_dict = np_triplet_loss_diff(anchor, pos, neg, margin, p)

    def assert_prediction_grad(blob: tp.Numpy):
        # Evaluate the gradient
        assert np.allclose(blob,
                           np_grad_dict["np_triplet_loss_grad_mean"],
                           rtol=1e-3)

    @flow.global_function(
        type="train",
        function_config=func_config,
    )
    def oneflow_marginloss(
        of_anchor: tp.Numpy.Placeholder(shape=anchor.shape),
        of_pos: tp.Numpy.Placeholder(shape=pos.shape),
        of_neg: tp.Numpy.Placeholder(shape=neg.shape),
    ) -> Dict[str, tp.Numpy]:
        with flow.scope.placement(device_type, "0:0"):
            v = flow.get_variable(
                shape=anchor.shape,
                dtype=flow.float32,
                initializer=flow.constant_initializer(0),
                name="x_var",
            )
            x_anchor = of_anchor + v

        flow.watch_diff(x_anchor, assert_prediction_grad)

        triplet_marginloss = flow.nn.TripletMarginLoss(
            x_anchor,
            of_pos,
            of_neg,
            margin=margin,
            p=p,
            swap=swap,
            reduction="none",
            name="of_tripletmarginloss",
        )
        triplet_marginloss_mean = flow.nn.TripletMarginLoss(
            x_anchor,
            of_pos,
            of_neg,
            margin=margin,
            p=p,
            swap=swap,
            reduction="mean",
            name="of_tripletmarginloss_mean",
        )
        triplet_marginloss_sum = flow.nn.TripletMarginLoss(
            x_anchor,
            of_pos,
            of_neg,
            margin=margin,
            p=p,
            swap=swap,
            reduction="sum",
            name="of_tripletmarginloss_sum",
        )

        with flow.scope.placement(device_type, "0:0"):
            flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler(
                [], [1e-3]),
                               momentum=0).minimize(triplet_marginloss_mean)

        return {
            "of_triplet_margin_loss": triplet_marginloss,
            "of_triplet_margin_loss_mean": triplet_marginloss_mean,
            "of_triplet_margin_loss_sum": triplet_marginloss_sum,
        }

    of_out_tripletloss_dict = oneflow_marginloss(anchor, pos, neg)

    assert np.allclose(
        of_out_tripletloss_dict["of_triplet_margin_loss"],
        np_out_tripletloss_dict["np_triplet_margin_loss"],
    )

    assert np.allclose(
        of_out_tripletloss_dict["of_triplet_margin_loss_mean"],
        np_out_tripletloss_dict["np_triplet_margin_loss_mean"],
    )
    assert np.allclose(
        of_out_tripletloss_dict["of_triplet_margin_loss_sum"],
        np_out_tripletloss_dict["np_triplet_margin_loss_sum"],
    )
def compare_with_tensorflow(device_type, data_type, shape):
    assert device_type in ["gpu", "cpu"]
    flow.clear_default_session()
    func_config = flow.FunctionConfig()

    dtype = type_name_to_flow_type[data_type]

    def np_sigmoid(x):
        return 1 / (1 + np.exp(-x))

    @flow.global_function(type="train", function_config=func_config)
    def SigmoidCrossEntropyWithLogitsJob(labels: oft.Numpy.Placeholder(
        shape, dtype)):
        with flow.scope.placement(device_type, "0:0"):
            x = flow.get_variable(
                "x",
                shape=shape,
                dtype=type_name_to_flow_type[data_type],
                initializer=flow.random_uniform_initializer(minval=-10,
                                                            maxval=10),
                trainable=True,
            )
            loss = flow.nn.sigmoid_cross_entropy_with_logits(labels=labels,
                                                             logits=x)

            flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler(
                [], [1e-4]),
                               momentum=0).minimize(loss)

            flow.watch(x, test_global_storage.Setter("x"))
            flow.watch_diff(x, test_global_storage.Setter("x_diff"))
            flow.watch(loss, test_global_storage.Setter("loss"))
            flow.watch_diff(loss, test_global_storage.Setter("loss_diff"))
            return loss

    # fake labels
    labels = np_sigmoid(np.random.randint(0, 10, size=shape)).astype(
        type_name_to_np_type[data_type])

    # OneFlow
    of_out = SigmoidCrossEntropyWithLogitsJob(labels).get()

    # TensorFlow
    with tf.GradientTape(persistent=True) as tape:
        x = tf.Variable(test_global_storage.Get("x"))
        tf_out = tf.nn.sigmoid_cross_entropy_with_logits(labels, x)

        loss_diff = test_global_storage.Get("loss_diff")
        tf_x_diff = tape.gradient(tf_out, x, loss_diff)

    tolerance = 1e-5
    assert np.allclose(of_out.numpy(),
                       tf_out.numpy(),
                       rtol=tolerance,
                       atol=tolerance)
    assert np.allclose(
        test_global_storage.Get("x_diff"),
        tf_x_diff.numpy(),
        rtol=tolerance,
        atol=tolerance,
    )
    flow.clear_default_session()
Example #26
0
def make_matmul_func(
    a_shape,
    b_shape,
    trans_a,
    trans_b,
    alpha,
    dtype,
    device_type,
    test_add_to_output,
    fuse_add_to_output,
    tf32,
):
    assert device_type in ["gpu", "cpu"]

    flow.clear_default_session()
    flow.config.enable_tensor_float_32_compute(tf32)
    func_config = flow.FunctionConfig()
    func_config.default_data_type(flow.float)
    func_config.enable_fuse_add_to_output(fuse_add_to_output)
    func_config.default_placement_scope(flow.scope.placement(device_type, "0:0"))

    @flow.global_function(type="train", function_config=func_config)
    def matmul_job() -> typing.Tuple[
        flow.typing.Numpy, flow.typing.Numpy, flow.typing.Numpy, flow.typing.Numpy
    ]:
        a_var = flow.get_variable(
            "a",
            shape=a_shape,
            dtype=flow.float32,
            initializer=flow.random_uniform_initializer(minval=0, maxval=1),
            trainable=True,
        )

        b_var = flow.get_variable(
            "b",
            shape=b_shape,
            dtype=flow.float32,
            initializer=flow.random_uniform_initializer(minval=0, maxval=1),
            trainable=True,
        )

        flow.watch_diff(a_var, test_global_storage.Setter("a_diff"))
        flow.watch_diff(b_var, test_global_storage.Setter("b_diff"))

        if dtype is flow.float16:
            a = flow.amp_white_identity(a_var)
            b = flow.amp_white_identity(b_var)
        else:
            a = a_var
            b = b_var

        c = flow.matmul(a, b, trans_a, trans_b, alpha)

        add_to = flow.get_variable(
            "c",
            shape=c.shape,
            dtype=flow.float32,
            initializer=flow.random_uniform_initializer(minval=-1, maxval=1),
            trainable=True,
        )
        if test_add_to_output:
            flow.watch_diff(add_to, test_global_storage.Setter("add_to_diff"))
            if dtype is flow.float16:
                add_to = flow.amp_white_identity(add_to)
            c = c + add_to

        flow.watch_diff(c, test_global_storage.Setter("c_diff"))
        get_optimizer().minimize(c)
        return a_var, b_var, add_to, c

    return matmul_job
Example #27
0
def compare_with_tensorflow(
    device_type,
    x_shape,
    filters,
    kernel_size,
    groups,
    of_padding="SAME",
    tf_padding="SAME",
    stride_d=1,
    stride_h=1,
    stride_w=1,
    data_format="NCDHW",
    dilation_d=1,
    dilation_h=1,
    dilation_w=1,
):
    assert device_type in ["gpu", "cpu"]
    flow.clear_default_session()
    func_config = flow.FunctionConfig()
    func_config.default_data_type(flow.float)

    func_config.default_logical_view(flow.scope.consistent_view())
    func_config.cudnn_conv_heuristic_search_algo(False)
    if data_format == "NCDHW":
        xy_data_transpose = (0, 2, 3, 4, 1)
        weight_data_transpose = (2, 3, 4, 1, 0)
    else:
        xy_data_transpose = (0, 1, 2, 3, 4)
        weight_data_transpose = (1, 2, 3, 4, 0)

    @flow.global_function(type="train", function_config=func_config)
    def ConvJob():
        with flow.scope.placement(device_type, "0:0"):
            x = flow.get_variable(
                "x",
                shape=x_shape,
                dtype=flow.float,
                initializer=flow.random_uniform_initializer(minval=0,
                                                            maxval=100),
                trainable=True,
            )
            if data_format == "NCDHW":
                weight_shape = (
                    filters,
                    x.shape[1] // groups,
                    kernel_size,
                    kernel_size,
                    kernel_size,
                )
            else:
                weight_shape = (
                    filters,
                    kernel_size,
                    kernel_size,
                    kernel_size,
                    x.shape[4] // groups,
                )
            weight = flow.get_variable(
                "conv-weight",
                shape=weight_shape,
                dtype=flow.float,
                initializer=flow.random_uniform_initializer(minval=0,
                                                            maxval=100),
            )
            loss = flow.nn.conv3d(
                x,
                weight,
                strides=[stride_d, stride_h, stride_w],
                padding=of_padding,
                data_format=data_format,
                dilations=[dilation_d, dilation_h, dilation_w],
                groups=groups,
            )
            flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler(
                [], [1e-4]),
                               momentum=0).minimize(loss)

            flow.watch(x, test_global_storage.Setter("x"))
            flow.watch_diff(x, test_global_storage.Setter("x_diff"))
            flow.watch(weight, test_global_storage.Setter("weight"))
            flow.watch_diff(weight, test_global_storage.Setter("weight_diff"))
            flow.watch(loss, test_global_storage.Setter("loss"))
            flow.watch_diff(loss, test_global_storage.Setter("loss_diff"))

            return loss

    # OneFlow
    check_point = flow.train.CheckPoint()
    check_point.init()
    of_out = ConvJob().get()
    # TensorFlow
    with tf.GradientTape(persistent=True) as tape:
        x = tf.Variable(
            test_global_storage.Get("x").transpose(xy_data_transpose))
        assert groups > 0
        assert x_shape[1] % groups == 0
        assert filters % groups == 0
        weight = tf.Variable(
            test_global_storage.Get("weight").transpose(weight_data_transpose))

        tf_out = tf.nn.conv3d(
            x,
            weight,
            strides=[1, stride_d, stride_h, stride_w, 1],
            padding=tf_padding,
            data_format="NDHWC",
            dilations=[1, dilation_d, dilation_h, dilation_w, 1],
        )
    loss_diff = test_global_storage.Get("loss_diff").transpose(
        xy_data_transpose)
    tf_x_diff = tape.gradient(tf_out, x, loss_diff)
    tf_weight_diff = tape.gradient(tf_out, weight, loss_diff)
    assert np.allclose(
        of_out.numpy().transpose(xy_data_transpose),
        tf_out.numpy(),
        rtol=1e-5,
        atol=1e-5,
    )
    diff_idx = np.where(
        np.abs(
            test_global_storage.Get("x_diff").transpose(xy_data_transpose) -
            tf_x_diff.numpy()) > 5e-4)
    assert np.allclose(
        test_global_storage.Get("x_diff").transpose(xy_data_transpose),
        tf_x_diff.numpy(),
        rtol=1e-4,
        atol=1e-4,
    )
    assert np.allclose(
        test_global_storage.Get("weight_diff").transpose(
            weight_data_transpose),
        tf_weight_diff.numpy(),
        rtol=1e-5,
        atol=1e-5,
    )
Example #28
0
def compare_with_tensorflow(
    device_type,
    a_shape,
    b_shape,
    transpose_a,
    transpose_b,
    data_type,
    fuse_add_to_output,
    enable_tf32,
    alpha,
):
    assert device_type in ["gpu", "cpu"]
    flow.clear_default_session()
    func_config = flow.FunctionConfig()
    func_config.default_data_type(flow.float)
    func_config.enable_fuse_add_to_output(fuse_add_to_output)
    flow.config.enable_tensor_float_32_compute(enable_tf32)
    if data_type == "float16":
        dtype = flow.float
    else:
        dtype = type_name_to_flow_type[data_type]

    @flow.global_function(type="train", function_config=func_config)
    def MatmulJob():
        with flow.scope.placement(device_type, "0:0"):
            a = flow.get_variable(
                "a",
                shape=a_shape,
                dtype=dtype,
                initializer=flow.random_uniform_initializer(minval=0, maxval=1),
                trainable=True,
            )
            b = flow.get_variable(
                "b",
                shape=b_shape,
                dtype=dtype,
                initializer=flow.random_uniform_initializer(minval=0, maxval=1),
                trainable=True,
            )
            if data_type == "float16":
                out = flow.matmul(
                    flow.cast(a, dtype=flow.float16),
                    flow.cast(b, dtype=flow.float16),
                    transpose_a,
                    transpose_b,
                    alpha,
                )
                c = flow.get_variable(
                    "c",
                    shape=out.shape,
                    dtype=dtype,
                    initializer=flow.random_uniform_initializer(minval=-1, maxval=1),
                    trainable=True,
                )
                loss = flow.cast(
                    out + flow.cast(c, dtype=flow.float16), dtype=flow.float
                )
            else:
                out = flow.matmul(a, b, transpose_a, transpose_b, alpha)
                c = flow.get_variable(
                    "c",
                    shape=out.shape,
                    dtype=dtype,
                    initializer=flow.random_uniform_initializer(minval=-1, maxval=1),
                    trainable=True,
                )
                loss = out + c

            flow.optimizer.SGD(
                flow.optimizer.PiecewiseConstantScheduler([], [1e-4]), momentum=0
            ).minimize(loss)

            flow.watch(a, test_global_storage.Setter("a"))
            flow.watch_diff(a, test_global_storage.Setter("a_diff"))
            flow.watch(b, test_global_storage.Setter("b"))
            flow.watch_diff(b, test_global_storage.Setter("b_diff"))
            flow.watch(c, test_global_storage.Setter("c"))
            flow.watch_diff(c, test_global_storage.Setter("c_diff"))
            flow.watch(loss, test_global_storage.Setter("loss"))
            flow.watch_diff(loss, test_global_storage.Setter("loss_diff"))

            return loss

    # OneFlow
    of_out = MatmulJob().get()
    # TensorFlow
    with tf.GradientTape(persistent=True) as tape:
        a = tf.Variable(test_global_storage.Get("a"))
        b = tf.Variable(test_global_storage.Get("b"))
        c = tf.Variable(test_global_storage.Get("c"))
        if data_type == "float16":
            a = tf.cast(a, tf.float16)
            b = tf.cast(b, tf.float16)
            c = tf.cast(c, tf.float16)
        tf_out = tf.matmul(a, b, transpose_a, transpose_b)
        tf_out = tf_out * alpha
        tf_out = tf_out + c
        if data_type == "float16":
            tf_out = tf.cast(tf_out, tf.float32)

    loss_diff = test_global_storage.Get("loss_diff")
    tf_a_diff = tape.gradient(tf_out, a, loss_diff)
    tf_b_diff = tape.gradient(tf_out, b, loss_diff)
    tf_c_diff = tape.gradient(tf_out, c, loss_diff)
    if data_type == "float16":
        tolerance = 2e-3
    else:
        tolerance = 1e-3
    assert np.allclose(
        of_out.numpy(), tf_out.numpy(), rtol=tolerance, atol=tolerance
    ), np.max(np.abs(of_out.numpy() - tf_out.numpy()))
    assert np.allclose(
        test_global_storage.Get("a_diff"),
        tf_a_diff.numpy(),
        rtol=tolerance,
        atol=tolerance,
    )
    assert np.allclose(
        test_global_storage.Get("b_diff"),
        tf_b_diff.numpy(),
        rtol=tolerance,
        atol=tolerance,
    )
    assert np.allclose(
        test_global_storage.Get("c_diff"),
        tf_c_diff.numpy(),
        rtol=tolerance,
        atol=tolerance,
    )
Example #29
0
    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import unittest
import numpy as np
import oneflow as flow
import oneflow.typing as oft


func_config = flow.FunctionConfig()
func_config.default_logical_view(flow.scope.mirrored_view())
func_config.default_data_type(flow.float)


@flow.unittest.skip_unless_1n1d()
class TestUnpackPack(flow.unittest.TestCase):
    def test_unpack_pack(test_case):
        if flow.eager_execution_enabled():
            return

        @flow.global_function(function_config=func_config)
        def UnpackPackJob(a: oft.Numpy.Placeholder((3, 4))):
            return flow.pack(flow.unpack(a, 3), 3)

        x = np.random.rand(3, 4).astype(np.float32)
Example #30
0
def _compare_scatter_nd_update_with_tf(
    test_case,
    device_type,
    params_shape,
    indices_shape,
    updates_shape,
    allow_duplicate_index=False,
    verbose=False,
):
    params, updates, indices = _random_inputs(params_shape, indices_shape,
                                              updates_shape,
                                              allow_duplicate_index)

    x_const = tf.constant(params)
    y_const = tf.constant(updates)
    i_const = tf.constant(indices)
    with tf.GradientTape() as t1:
        x = tf.Variable(params)
        z1 = tf.tensor_scatter_nd_update(x, i_const, y_const)
    dz_dx = t1.gradient(z1, x)

    with tf.GradientTape() as t2:
        y = tf.Variable(updates)
        z2 = tf.tensor_scatter_nd_update(x_const, i_const, y)
    dz_dy = t2.gradient(z2, y)

    test_case.assertTrue(np.allclose(z1.numpy(), z2.numpy()))

    def compare_dz_dx(params_grad):
        test_case.assertTrue(np.allclose(dz_dx.numpy(), params_grad.numpy()))

    def compare_dz_dy(updates_grad):
        test_case.assertTrue(np.allclose(dz_dy.numpy(), updates_grad.numpy()))

    flow.clear_default_session()
    func_config = flow.FunctionConfig()
    func_config.default_data_type(flow.float)
    func_config.default_logical_view(flow.scope.consistent_view())

    @flow.global_function(type="train", function_config=func_config)
    def scatter_nd_update_grad_fn(
            x_def: oft.Numpy.Placeholder(params.shape, dtype=flow.float),
            indices_def: oft.Numpy.Placeholder(indices.shape,
                                               dtype=flow.int32),
            y_def: oft.Numpy.Placeholder(updates.shape, dtype=flow.float),
    ):
        with flow.scope.placement(device_type, "0:0"):
            x = flow.get_variable(
                "params",
                shape=params.shape,
                dtype=flow.float32,
                initializer=flow.constant_initializer(0),
            )
            y = flow.get_variable(
                "updates",
                shape=updates.shape,
                dtype=flow.float32,
                initializer=flow.constant_initializer(0),
            )
            x = x + x_def
            y = y + y_def
            z = flow.tensor_scatter_nd_update(x, indices_def, y)
            flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler(
                [], [1e-3]),
                               momentum=0).minimize(z)

        flow.watch_diff(x, compare_dz_dx)
        flow.watch_diff(y, compare_dz_dy)
        return z

    of_z = scatter_nd_update_grad_fn(params, indices, updates).get()

    if verbose is True:
        print("device_type:", device_type)
        print("x:", params)
        print("y:", updates)
        print("indices:", indices)
        print("tf_z:", z1.numpy())
        print("of_z:", of_z.numpy())

    test_case.assertTrue(np.allclose(z1.numpy(), of_z.numpy()))