Esempio n. 1
0
def dense(input,
          units,
          name,
          use_bias=False,
          trainable=True,
          reuse=False,
          const_init=False):
    name_ = name if reuse == False else name + "_reuse"

    in_shape = input.shape
    in_num_axes = len(in_shape)
    assert in_num_axes >= 2

    inputs = flow.reshape(input,
                          (-1, in_shape[-1])) if in_num_axes > 2 else input

    weight = flow.get_variable(
        name="{}-weight".format(name),
        shape=(units, inputs.shape[1]),
        dtype=inputs.dtype,
        initializer=flow.random_normal_initializer(
            stddev=0.02) if not const_init else get_const_initializer(),
        trainable=trainable,
        reuse=reuse,
        model_name="weight",
    )

    out = flow.matmul(
        a=inputs,
        b=weight,
        transpose_b=True,
        name=name_ + "matmul",
    )

    if use_bias:
        bias = flow.get_variable(
            name="{}-bias".format(name),
            shape=(units, ),
            dtype=inputs.dtype,
            initializer=flow.random_normal_initializer()
            if not const_init else get_const_initializer(),
            trainable=trainable,
            reuse=reuse,
            model_name="bias",
        )
        out = flow.nn.bias_add(out, bias, name=name_ + "_bias_add")

    out = flow.reshape(out, in_shape[:-1] +
                       (units, )) if in_num_axes > 2 else out
    return out
Esempio n. 2
0
    def __init__(self, batch_size, seq_length, hidden_size, vocab_size):
        self.batch_size = batch_size
        self.seq_length = seq_length
        self.hidden_size = hidden_size
        self.vocab_size = vocab_size

        args = get_args()
        self.embedding_dropout_rate = args.hidden_dropout
        self.use_fp16 = args.fp16

        self.wpe_initializer = flow.random_normal_initializer(
            stddev=args.init_method_std)
        self.wte_initializer = flow.random_normal_initializer(
            stddev=args.init_method_std)
Esempio n. 3
0
def get_linear_params(
    name,
    input_size,
    output_size,
    dtype,
    weight_initializer=flow.random_normal_initializer(stddev=0.02),
    bias_initializer=flow.constant_initializer(0.0),
    weight_parallel_dist=None,
    bias_parallel_dist=None,
):
    with flow.scope.namespace(name):
        weight = flow.get_variable(
            name="weight",
            shape=(input_size, output_size),
            dtype=dtype,
            initializer=weight_initializer,
            nd_sbp=weight_parallel_dist,
        )
        bias = flow.get_variable(
            name="bias",
            shape=(output_size, ),
            dtype=dtype,
            initializer=bias_initializer,
            nd_sbp=bias_parallel_dist,
        )

    return weight, bias
def _AddClassficationLoss(input_blob,
                          label_blob,
                          hidden_size,
                          label_num,
                          initializer_range,
                          scope_name='classification'):
    with flow.scope.namespace(scope_name):
        output_weight_blob = flow.get_variable(
            name="output_weights",
            shape=[label_num, hidden_size],
            dtype=input_blob.dtype,
            # initializer=bert_util.CreateInitializer(initializer_range),
            initializer=flow.random_normal_initializer(
                mean=0.0, stddev=initializer_range, seed=None, dtype=None))
        output_bias_blob = flow.get_variable(
            name="output_bias",
            shape=[label_num],
            dtype=input_blob.dtype,
            initializer=flow.constant_initializer(0.0),
        )
        logit_blob = flow.matmul(input_blob,
                                 output_weight_blob,
                                 transpose_b=True)
        logit_blob = flow.nn.bias_add(logit_blob, output_bias_blob)
        pre_example_loss = flow.nn.sparse_softmax_cross_entropy_with_logits(
            logits=logit_blob, labels=label_blob)
        loss = pre_example_loss
        return loss, pre_example_loss, logit_blob
 def broadcast_to_compatible_with_fn(
     x_def: oft.Numpy.Placeholder(x.shape, dtype=flow.float)
 ):
     x_var = flow.get_variable(
         "x_var",
         shape=x.shape,
         dtype=flow.float,
         initializer=flow.constant_initializer(0),
         trainable=True,
     )
     compatible_var = [
         flow.get_variable(
             "compatible_var_{}".format(i),
             shape=cp_shape,
             dtype=flow.float,
             initializer=flow.random_normal_initializer(),
             trainable=False,
         )
         for (i, cp_shape) in enumerate(compatible_shape)
     ]
     x_var = x_var + x_def
     y = flow.broadcast_to_compatible_with(x_var, compatible_var)
     flow.optimizer.SGD(
         flow.optimizer.PiecewiseConstantScheduler([], [0.001]), momentum=0
     ).minimize(y)
     flow.watch_diff(x_var, dx_watcher)
     return y
Esempio n. 6
0
 def get_variable(name):
     return flow.get_variable(
         name=name,
         shape=(10, 80, 40, 20),
         dtype=dtype,
         initializer=flow.random_normal_initializer(mean=10, stddev=1),
         distribute=flow.distribute.split(0),
     )
Esempio n. 7
0
 def model() -> tp.Numpy:
     with get_placement():
         x = flow.get_variable(
             name="x",
             shape=(10, 801, 820, 4),
             dtype=dtype,
             initializer=flow.random_normal_initializer(mean=10, stddev=1),
             distribute=flow.distribute.split(0),
         )
         y = flow.get_variable(
             name="y",
             shape=(10, 801, 820, 4),
             dtype=dtype,
             initializer=flow.random_normal_initializer(mean=10, stddev=1),
             distribute=flow.distribute.split(0),
         )
         return flow.math.reduce_mean(x + y)
Esempio n. 8
0
 def model() -> tp.Numpy:
     with get_placement():
         x = flow.get_variable(
             name="x",
             shape=(4, 5),
             dtype=flow.float32,
             initializer=flow.random_normal_initializer(mean=10, stddev=1),
         )
         w = flow.get_variable(
             name="w",
             shape=(5, 6),
             dtype=flow.float32,
             initializer=flow.random_normal_initializer(mean=10, stddev=1),
             distribute=flow.distribute.split(0),
         )
         y = flow.matmul(x, w)
         flow.optimizer.SGD(
             flow.optimizer.PiecewiseConstantScheduler([], [0.01]), momentum=0.9
         ).minimize(y)
         return y
Esempio n. 9
0
 def add() -> tp.Numpy:
     with get_placement():
         x = flow.get_variable(
             name="x",
             shape=(9, 3),
             dtype=dtype,
             initializer=flow.random_normal_initializer(mean=10, stddev=1),
             distribute=flow.distribute.split(0),
         )
         y = flow.get_variable(
             name="y",
             shape=(9, 3),
             dtype=dtype,
             initializer=flow.constant_initializer(5, dtype=dtype),
         )
         z = flow.get_variable(
             name="z",
             shape=(9, 3),
             dtype=dtype,
             initializer=flow.random_normal_initializer(),
         )
         return flow.math.add_n([x, y, z])
Esempio n. 10
0
def deconv2d(
    input,
    filters,
    size,
    name,
    strides=2,
    trainable=True,
    reuse=False,
    const_init=False,
    use_bias=False,
):
    name_ = name if reuse == False else name + "_reuse"
    # weight : [in_channels, out_channels, height, width]
    weight_shape = (input.shape[1], filters, size, size)
    output_shape = (
        input.shape[0],
        input.shape[1],
        input.shape[2] * strides,
        input.shape[3] * strides,
    )

    weight = flow.get_variable(
        name + "-weight",
        shape=weight_shape,
        dtype=input.dtype,
        initializer=flow.random_normal_initializer(
            stddev=0.02) if not const_init else get_const_initializer(),
        trainable=trainable,
    )

    output = flow.nn.conv2d_transpose(
        input,
        weight,
        strides=[strides, strides],
        output_shape=output_shape,
        padding="SAME",
        data_format="NCHW",
        name=name_,
    )

    if use_bias:
        bias = flow.get_variable(
            name + "-bias",
            shape=(filters, ),
            dtype=input.dtype,
            initializer=flow.constant_initializer(0.0),
            trainable=trainable,
        )

        output = flow.nn.bias_add(output, bias, "NCHW")
    return output
Esempio n. 11
0
    def __init__(self, batch_size, seq_length, hidden_size):
        self.batch_size = batch_size
        self.seq_length = seq_length
        self.hidden_size = hidden_size

        args = get_args()
        self.multihead_attention_fusion = args.multihead_attention_fusion
        self.num_layers = args.num_layers
        self.layers = []
        for i in range(self.num_layers):
            self.layers.append(
                TransformerLayer(
                    f"h{i}",
                    i + 1,
                    batch_size,
                    seq_length,
                    hidden_size,
                    initializer=flow.random_normal_initializer(
                        stddev=args.init_method_std),
                    output_layer_initializer=flow.random_normal_initializer(
                        stddev=(args.init_method_std /
                                math.sqrt(2.0 * self.num_layers))),
                ))
def _get_initializer(model_name):
    if model_name == "weight":
        return flow.variance_scaling_initializer(2.0,
                                                 mode="fan_out",
                                                 distribution="random_normal",
                                                 data_format="NCHW")
    elif model_name == "bias":
        return flow.zeros_initializer()
    elif model_name == "gamma":
        return flow.ones_initializer()
    elif model_name == "beta":
        return flow.zeros_initializer()
    elif model_name == "dense_weight":
        return flow.random_normal_initializer(0, 0.01)
 def broadcast_to_compatible_with_fn(
     x_def: oft.ListNumpy.Placeholder(shape=x_shape, dtype=flow.float)
 ):
     compatible_var = [
         flow.get_variable(
             "compatible_var_{}".format(i),
             shape=cp_shape,
             dtype=flow.float,
             initializer=flow.random_normal_initializer(),
             trainable=False,
         )
         for (i, cp_shape) in enumerate(compatible_shape)
     ]
     return flow.broadcast_to_compatible_with(x_def, compatible_var)
Esempio n. 14
0
def conv2d(
    input,
    filters,
    size,
    name,
    strides=2,
    padding="same",
    trainable=True,
    reuse=False,
    const_init=False,
    use_bias=True,
):
    name_ = name if reuse == False else name + "_reuse"

    # (output_dim, k_h, k_w, input.shape[3]) if NHWC
    weight_shape = (filters, input.shape[1], size, size)
    weight = flow.get_variable(
        name + "-weight",
        shape=weight_shape,
        dtype=input.dtype,
        initializer=flow.random_normal_initializer(
            stddev=0.02) if not const_init else get_const_initializer(),
        trainable=trainable,
        reuse=reuse,
    )

    output = flow.nn.compat_conv2d(
        input,
        weight,
        strides=[strides, strides],
        padding=padding,
        data_format="NCHW",
        name=name_,
    )

    if use_bias:
        bias = flow.get_variable(
            name + "-bias",
            shape=(filters, ),
            dtype=input.dtype,
            initializer=flow.constant_initializer(0.0),
            trainable=trainable,
            reuse=reuse,
        )

        output = flow.nn.bias_add(output, bias, "NCHW")
    return output
Esempio n. 15
0
 def _get_kernel_initializer():
     return flow.random_normal_initializer(stddev=0.01)
Esempio n. 16
0
def kaiming_initializer(
    shape: Sequence[int],
    distribution: str = "random_normal",
    mode: str = "fan_in",
    nonlinearity: str = "leaky_relu",
    negative_slope: float = 0.0,
    data_format: str = "NCHW",
) -> None:
    """Initialize weight according to the method described in `Delving deep into
    rectifiers: Surpassing human-level performance on ImageNet classification`
    - He, K. et al. (2015), using a normal or uniform distribution.

    When distribution is "random_normal"

    The equation is: 

    .. math:: 

        W \\sim N(0, \\sqrt{\\frac{{2}}{{n}}})

    When distribution is "random_uniform"

    The equation is: 

    .. math:: 

        W \\sim U(-\\sqrt{\\frac{{6}}{{n}}}, \\sqrt{\\frac{{6}}{{n}}})
    
    If mode is "fan_in", the "n" is the number of input units in the weight Blob. 

    If mode is "fan_out", the "n" is the number of output units in the weight Blob. 

    if mode is "fan_avg", the "n" is the average of the number of input and output units in the weight Blob

    Args:
        shape (Sequence[int]): Blob shape.
        distribution (str, optional): 'random_normal' or 'random_uniform'. Defaults to "random_normal".
        mode (str, optional): 'fan_in', 'fan_out' or 'fan_avg'. Defaults to "fan_in".
        nonlinearity (str, optional): None, 'tanh', 'sigmoid', 'relu' or 'leaky_relu'. Defaults to "leaky_relu".
        negative_slope (float, optional): The negative slope of leaky_relu. Defaults to 0.0.
        data_format (str, optional):  'NCHW', 'NHWC'. Defaults to "NCHW".

    Raises:
        NotImplementedError: Only support normal and uniform distribution

    Returns:
        [type]: flow.random_normal_initializer or flow.random_uniform_initializer

    For example: 

    Example 1: 

    .. code-block:: python 

        import oneflow.compatible.single_client as flow
        import oneflow.compatible.single_client.typing as tp


        def watch_handler(y: tp.Numpy):
            print("out", y)


        @flow.global_function()
        def kaiming_Job() -> None:
            init = flow.kaiming_initializer(shape=(3, 3), 
                                            mode="fan_avg", 
                                            nonlinearity="relu")
            blob = flow.get_variable(
                "blob-weight",
                shape=(3, 3),
                initializer=init,
                trainable=True
            )
            flow.watch(blob, watch_handler)


        checkpoint = flow.train.CheckPoint()
        checkpoint.init()
        kaiming_Job()

        # out [[ 0.54521346  0.32585594  1.3474437 ]
        #      [ 0.30729076 -0.19158769  0.2709008 ]
        #      [-0.95830524 -0.05093324  0.28178614]]

    Example 2: 

    .. code-block:: python 
    
        import oneflow.compatible.single_client as flow
        import numpy as np
        import oneflow.compatible.single_client.typing as tp


        @flow.global_function()
        def conv2d_kaiming_Job(x: tp.Numpy.Placeholder((1, 256, 32, 32))
        ) -> tp.Numpy:
            initializer = flow.kaiming_initializer(shape=(1, 256, 32, 32))
            conv2d = flow.layers.conv2d(
                x,
                filters=128,
                kernel_size=3,
                strides=1,
                padding='SAME',
                kernel_initializer=initializer, 
                name="Conv2d"
            )
            return conv2d


        x = np.random.randn(1, 256, 32, 32).astype(np.float32)
        out = conv2d_kaiming_Job(x)

        # out.shape (1, 128, 32, 32)

    """
    assert isinstance(shape, (tuple, flow.Size))
    assert len(shape) >= 2
    elem_cnt = functools.reduce(lambda a, b: a * b, shape, 1)
    assert elem_cnt > 0
    assert distribution in ["random_normal", "random_uniform"]
    assert mode in ["fan_in", "fan_out", "fan_avg"]
    assert nonlinearity in [None, "tanh", "sigmoid", "relu", "leaky_relu"]
    assert data_format in ["NCHW", "NHWC"]
    fan = _CalcFan(shape, mode, _get_data_format(data_format))
    gain = CalcGain(nonlinearity, negative_slope)
    std = gain / math.sqrt(fan)
    if distribution == "random_normal":
        return flow.random_normal_initializer(0.0, std)
    elif distribution == "random_uniform":
        bound = math.sqrt(3.0) * std
        return flow.random_uniform_initializer(-bound, bound)
    else:
        raise NotImplementedError(
            "Only support normal and uniform distribution")