Ejemplo n.º 1
0
    def __init__(
        self,
        layer_idx,
        normalized_shape,
        eps=1e-5,
    ):
        super().__init__()
        self.normalized_shape = normalized_shape
        self.epsilon = eps

        self.beta = flow.nn.Parameter(
            flow.empty(
                normalized_shape,
                dtype=flow.float32,
                placement=dist.get_layer_placement(layer_idx),
                sbp=dist.get_nd_sbp([flow.sbp.broadcast, flow.sbp.broadcast]),
            ))
        flow.nn.init.zeros_(self.beta)

        self.gamma = flow.nn.Parameter(
            flow.empty(
                normalized_shape,
                dtype=flow.float32,
                placement=dist.get_layer_placement(layer_idx),
                sbp=dist.get_nd_sbp([flow.sbp.broadcast, flow.sbp.broadcast]),
            ))
        flow.nn.init.ones_(self.gamma)
Ejemplo n.º 2
0
    def __init__(self,
                 layer_idx,
                 input_size,
                 output_size,
                 init_method,
                 need_gelu=False):
        super().__init__()
        self.need_gelu = need_gelu

        args = get_args()
        self.bias_gelu_fusion = args.bias_gelu_fusion

        # col parallel linear weight sbp: [B, S(1)]
        self.weight = flow.nn.Parameter(
            flow.empty(
                (input_size, output_size),
                dtype=flow.float32,
                placement=dist.get_layer_placement(layer_idx),
                sbp=dist.get_nd_sbp([flow.sbp.broadcast,
                                     flow.sbp.split(1)]),
            ))
        init_method(self.weight)

        # col parallel linear bias sbp: [B, S(0)]
        self.bias = flow.nn.Parameter(
            flow.empty(
                (output_size, ),
                dtype=flow.float32,
                placement=dist.get_layer_placement(layer_idx),
                sbp=dist.get_nd_sbp([flow.sbp.broadcast,
                                     flow.sbp.split(0)]),
            ))
        flow.nn.init.zeros_(self.bias)
Ejemplo n.º 3
0
    def __init__(self, seq_length, hidden_size, vocab_size):
        super().__init__()
        self.seq_length = seq_length
        self.hidden_size = hidden_size
        self.vocab_size = vocab_size

        args = get_args()
        self.dropout = flow.nn.Dropout(p=args.hidden_dropout)
        self.enable_amp = args.fp16

        # word token embedding shape (vocab_size, hidden_size)
        # sbp: [B, S(0)]
        self.wte = flow.nn.Parameter(
            flow.empty(
                (self.vocab_size, self.hidden_size),
                dtype=flow.float32,
                placement=dist.get_layer_placement(0),
                sbp=dist.get_nd_sbp([flow.sbp.broadcast,
                                     flow.sbp.split(0)]),
            ))

        # word position embedding shape (seq_len, hidden_size)
        # sbp: [B, B]
        self.wpe = flow.nn.Parameter(
            flow.empty(
                (self.seq_length, self.hidden_size),
                dtype=flow.float32,
                placement=dist.get_layer_placement(0),
                sbp=dist.get_nd_sbp([flow.sbp.broadcast, flow.sbp.broadcast]),
            ))

        flow.nn.init.normal_(self.wte, std=args.init_method_std)
        flow.nn.init.normal_(self.wpe, std=args.init_method_std)
Ejemplo n.º 4
0
    def test_consistent_tensor_2d_sbp_init(test_case):
        V = 10
        H = 4
        S = 6

        P = flow.placement("cuda", {0: [0, 1, 2, 3]}, (2, 2))

        wte = flow.nn.Parameter(
            flow.empty(
                (V, H),
                dtype=flow.float32,
                placement=P,
                sbp=[flow.sbp.broadcast, flow.sbp.split(0)],
            )
        )

        wpe = flow.nn.Parameter(
            flow.empty(
                (S, H),
                dtype=flow.float32,
                placement=P,
                sbp=[flow.sbp.broadcast, flow.sbp.broadcast],
            )
        )

        flow.nn.init.normal_(wte, std=0.02)
        flow.nn.init.normal_(wpe, std=0.02)
Ejemplo n.º 5
0
        def func(t):
            shape = t.shape
            dtype = t.dtype

            with oneflow._oneflow_internal.lazy_mode.guard(False):
                if t.is_consistent:
                    eager_out = oneflow.empty(
                        shape, dtype=dtype, placement=t.placement, sbp=t.sbp,
                    )
                else:
                    eager_out = oneflow.empty(shape, dtype=dtype, device=t.device)

            return eager_out
Ejemplo n.º 6
0
def drop_connect(x, drop_ratio):
    keep_ratio = 1.0 - drop_ratio
    mask = oneflow.empty([x.shape[0], 1, 1, 1], dtype=x.dtype, device=x.device)
    mask.bernoulli_(keep_ratio)
    x.div_(keep_ratio)
    x.mul_(mask)
    return x
Ejemplo n.º 7
0
    def test_to_placement(test_case):
        rank = flow.env.get_rank()
        # pid = os.getpid()
        # print(f"[{pid}][{rank}] ToConsistentGraphTestCase.test_to_placement")

        if rank == 0:
            x = flow.ones((2, 3), dtype=flow.float32)
        elif rank == 1:
            x = flow.empty(tuple())
        else:
            raise ValueError

        c_x = x.to_consistent(placement=flow.placement("cpu", {0: [0]}),
                              sbp=flow.sbp.broadcast)
        # print(f"c_x shape: {c_x.shape}, placment: {c_x.placement}, sbp: {c_x.sbp}")

        p1 = flow.placement("cpu", {0: [0, 1]})
        m1 = ToPlacementModule(p1)
        g1 = MyGraph(m1)
        y1 = g1(c_x)

        # print(f"y1 shape: {y1.shape}, placment: {y1.placement}, sbp: {y1.sbp}")
        test_case.assertTrue(y1.placement == p1)
        test_case.assertTrue(y1.sbp[0] == flow.sbp.broadcast)
        test_case.assertTrue(y1.to_local().numpy().mean() == 1.0)

        p2 = flow.placement("cuda", {0: [0, 1]})
        m2 = ToPlacementModule(p2)
        g2 = MyGraph(m2)
        y2 = g2(y1)

        # print(f"y2 shape: {y2.shape}, placment: {y2.placement}, sbp: {y2.sbp}")
        test_case.assertTrue(y2.placement == p2)
        test_case.assertTrue(y2.sbp[0] == flow.sbp.broadcast)
        test_case.assertTrue(y2.to_local().numpy().mean() == 1.0)
Ejemplo n.º 8
0
def gather(tensor, gather_list=None, dst=0):
    """
    Gathers a list of tensors in a single process.

    Args:
        tensor (Tensor): Input tensor.
        gather_list (list[Tensor], optional): List of appropriately-sized
            tensors to use for gathered data (default is None, must be specified
            on the destination rank)
        dst (int, optional): Destination rank (default is 0)

    """
    assert isinstance(tensor, flow._oneflow_internal.Tensor)
    assert tensor.is_local
    shape = tensor.shape
    dtype = tensor.dtype
    tensor = tensor.expand(*([1] + list(shape)))
    device_type = tensor.device.type
    placement = flow.env.all_device_placement(device_type)
    tensor = tensor.to_consistent(placement=placement,
                                  sbp=flow.sbp.split(0)).to_consistent(
                                      placement=placement,
                                      sbp=flow.sbp.broadcast)

    if gather_list is None:
        gather_list = [
            flow.empty(shape, dtype=dtype)
            for _ in range(flow.env.get_world_size())
        ]

    assert gather_list is not None
    assert isinstance(gather_list, list)
    assert len(gather_list) == flow.env.get_world_size()
    for i in range(tensor.shape[0]):
        gather_list[i] = tensor[i].to_local()
Ejemplo n.º 9
0
 def __init__(
     self, num_parameters: int = 1, init: float = 0.25, device=None, dtype=None
 ) -> None:
     super().__init__()
     self.num_parameters = num_parameters
     self.weight = flow.nn.Parameter(
         flow.empty(num_parameters, dtype=dtype, device=device).fill_(init)
     )
Ejemplo n.º 10
0
    def __init__(self, embedding_size, num_classes, cfg, partial_fc=False, bias=False):
        super(FC7, self).__init__()
        self.weight = flow.nn.Parameter(flow.empty(num_classes, embedding_size))
        flow.nn.init.normal_(self.weight, mean=0, std=0.01)

        self.partial_fc = partial_fc

        size = flow.env.get_world_size()
        num_local = (cfg.num_classes + size - 1) // size
        self.num_sample = int(num_local * cfg.sample_rate)
        self.total_num_sample = self.num_sample * size
Ejemplo n.º 11
0
def _test_local_empty(test_case, shape, dtype, device, requires_grad):
    x = flow.empty(
        shape,
        dtype=dtype,
        device=flow.device(device),
        requires_grad=requires_grad if dtype == flow.float32 else False,
    )
    test_case.assertFalse(x.is_global)
    test_case.assertEqual(x.shape, flow.Size(shape))
    test_case.assertEqual(x.dtype, dtype)
    test_case.assertEqual(x.device, flow.device(device))
    if dtype == flow.float32:
        test_case.assertEqual(x.requires_grad, requires_grad)
Ejemplo n.º 12
0
        def build_real_output(fake_eager_out):
            lbn = out2name[fake_eager_out] + "/out"
            assert lbn in self._full_job_proto.helper.lbn2logical_blob_desc
            blob_conf = self._full_job_proto.helper.lbn2logical_blob_desc[lbn]

            shape = tuple(blob_conf.shape.dim)
            dtype = fake_eager_out.dtype

            with oneflow._oneflow_internal.lazy_mode.guard(False):
                if fake_eager_out.is_global:
                    eager_out = oneflow.empty(
                        shape,
                        dtype=dtype,
                        placement=fake_eager_out.placement,
                        sbp=fake_eager_out.sbp,
                    )
                else:
                    eager_out = oneflow.empty(shape,
                                              dtype=dtype,
                                              device=fake_eager_out.device)

            return eager_out
Ejemplo n.º 13
0
    def __init__(
        self,
        layer_idx,
        input_size,
        output_size,
        init_method,
        dropout_rate,
    ):
        super().__init__()
        self.dropout_rate = dropout_rate

        args = get_args()
        self.bias_dropout_fusion = args.bias_dropout_fusion
        if not self.bias_dropout_fusion:
            self.dropout = flow.nn.Dropout(p=dropout_rate)

        # col parallel linear weight sbp: [B, S(0)]
        self.weight = flow.nn.Parameter(
            flow.empty(
                (input_size, output_size),
                dtype=flow.float32,
                placement=dist.get_layer_placement(layer_idx),
                sbp=dist.get_nd_sbp([flow.sbp.broadcast,
                                     flow.sbp.split(0)]),
            ))
        init_method(self.weight)

        # col parallel linear bias sbp: [B, B]
        self.bias = flow.nn.Parameter(
            flow.empty(
                (output_size, ),
                dtype=flow.float32,
                placement=dist.get_layer_placement(layer_idx),
                sbp=dist.get_nd_sbp([flow.sbp.broadcast, flow.sbp.broadcast]),
            ))
        flow.nn.init.zeros_(self.bias)
Ejemplo n.º 14
0
def _test_consistent_empty(test_case, shape, dtype, placement, sbp, requires_grad):
    placement = flow.placement(placement, {0: [0]})
    x = flow.empty(
        shape,
        dtype=dtype,
        placement=placement,
        sbp=sbp,
        requires_grad=requires_grad if dtype == flow.float32 else False,
    )
    test_case.assertTrue(x.is_consistent)
    test_case.assertEqual(x.shape, flow.Size(shape))
    test_case.assertEqual(x.dtype, dtype)
    test_case.assertEqual(x.placement, placement)
    test_case.assertEqual(x.sbp[0], sbp)
    if dtype == flow.float32:
        test_case.assertEqual(x.requires_grad, requires_grad)
Ejemplo n.º 15
0
    def test_save_and_load(self):
        placement_arg = {
            "placement": flow.placement("cuda", ranks=[0]),
            "sbp": flow.sbp.broadcast,
        }
        graph = InferGraph(placement_arg)
        image_placeholder = flow.empty(
            (1, 3, 224, 224),
            dtype=flow.float32,
            placement=flow.placement("cpu", ranks=[0]),
            sbp=flow.sbp.broadcast,
        )
        graph._compile(image_placeholder)
        saved_path = os.path.join("saved_model", graph.name)
        if not os.path.exists(saved_path):
            os.makedirs(saved_path)
        flow.save(graph, saved_path)

        saved_ir_path = os.path.join(saved_path, "model.mlir")
        serialized_job = oneflow._oneflow_internal.nn.graph.LoadSerializedJobFromIR(
            saved_ir_path)
        job = job_pb.Job()
        job.ParseFromString(serialized_job)

        op_list = []
        op_list_ = []

        for op in job.net.op:
            op_list.append(op)

        for op in graph._forward_job_proto.net.op:
            op_list_.append(op)

        def sort_by_op_name(op):
            return op.name

        op_list.sort(key=sort_by_op_name)
        op_list_.sort(key=sort_by_op_name)

        for (op, op_) in zip(op_list, op_list_):
            # TODO: convert loc in MLIR
            op_.ClearField("loc")
            self.assertTrue(op == op_, {"op": op, "op_": op_})
def _test_reshape_like_impl(test_case, pair, placement, in_sbp, like_sbp):
    shape, to_shape = pair

    nd_arr = np.random.rand(*shape)
    np_out = nd_arr.reshape(to_shape)

    x = flow.tensor(nd_arr)
    like = flow.empty(to_shape)
    y = x.to_global(flow.env.all_device_placement("cpu"),
                    flow.sbp.broadcast).to_global(placement=placement,
                                                  sbp=in_sbp)
    like = like.to_global(flow.env.all_device_placement("cpu"),
                          flow.sbp.broadcast).to_global(placement=placement,
                                                        sbp=like_sbp)
    z = flow._C.reshape_like(y, like)
    local_z = z.to_global(
        placement,
        sbp=[flow.sbp.broadcast
             for _ in range(len(placement.ranks.shape))]).to_local()
    if flow.env.get_rank() == 0:
        test_case.assertTrue(np.array_equal(np_out, local_z.numpy()))
    def test_save_and_load(self):
        placement_arg = {
            "placement": flow.placement("cuda", ranks=[0]),
            "sbp": flow.sbp.broadcast,
        }
        graph = InferGraph(placement_arg)
        image_placeholder = flow.empty(
            (1, 3, 224, 224),
            dtype=flow.float32,
            placement=flow.placement("cpu", ranks=[0]),
            sbp=flow.sbp.broadcast,
        )
        graph._compile(image_placeholder)
        saved_path = os.path.join("saved_model", graph.name)
        if not os.path.exists(saved_path):
            os.makedirs(saved_path)
        flow.save(graph, saved_path)

        saved_ir_path = os.path.join(saved_path, "model.mlir")
        serialized_job = oneflow._oneflow_internal.nn.graph.LoadSerializedJobFromIR(
            saved_ir_path)
        job = job_pb.Job()
        job.ParseFromString(serialized_job)
Ejemplo n.º 18
0
def load_bin_cv(path, image_size):
    bins, issame_list = pickle.load(open(path, "rb"), encoding="bytes")
    data_list = []
    for flip in [0, 1]:
        data = flow.empty(
            len(issame_list) * 2, 3, image_size[0], image_size[1])
        data_list.append(data)
    for i in range(len(issame_list) * 2):
        _bin = bins[i]
        img_ori = cv.imdecode(_bin, cv.IMREAD_COLOR)[:, :, ::-1]

        for flip in [0, 1]:
            img = img_ori.copy()
            if flip == 1:
                img = cv.flip(img, 1)
            img = np.array(img).transpose((2, 0, 1))
            img = (img - 127.5) * 0.00784313725
            data_list[flip][i] = flow.tensor(img, dtype=flow.float)

        if i % 1000 == 0:
            logging.info("loading bin:%d", i)
    logging.info(data_list[0].shape)
    return data_list, issame_list
Ejemplo n.º 19
0
 def build(self):
     x = flow.empty(*shape, placement=placement, sbp=sbp)
     return x
Ejemplo n.º 20
0
def pad_packed_sequence(
    sequence: PackedSequence,
    batch_first: bool = False,
    padding_value: float = 0.0,
    total_length: Optional[int] = None,
) -> Tuple[Tensor, Tensor]:
    """The interface is consistent with PyTorch.
    The documentation is referenced from: https://pytorch.org/docs/stable/generated/torch.nn.utils.rnn.pad_packed_sequence.html.
    
    Pads a packed batch of variable length sequences.

    It is an inverse operation to :func:`pack_padded_sequence`.

    The returned Tensor's data will be of size ``T x B x *``, where `T` is the length
    of the longest sequence and `B` is the batch size. If ``batch_first`` is True,
    the data will be transposed into ``B x T x *`` format.

    .. note::
        :attr:`total_length` is useful to implement the
        ``pack sequence -> recurrent network -> unpack sequence`` pattern in a
        :class:`~oneflow.nn.Module` wrapped in :class:`~oneflow.nn.DataParallel`.
        See :ref:`this FAQ section <pack-rnn-unpack-with-data-parallelism>` for
        details.

    Args:
        sequence (PackedSequence): batch to pad
        batch_first (bool, optional): if ``True``, the output will be in ``B x T x *``
            format.
        padding_value (float, optional): values for padded elements.
        total_length (int, optional): if not ``None``, the output will be padded to
            have length :attr:`total_length`. This method will throw :class:`ValueError`
            if :attr:`total_length` is less than the max sequence length in
            :attr:`sequence`.

    Returns:
        Tuple of Tensor containing the padded sequence, and a Tensor
        containing the list of lengths of each sequence in the batch.
        Batch elements will be re-ordered as they were ordered originally when
        the batch was passed to ``pack_padded_sequence`` or ``pack_sequence``.

    For example:

    .. code-block:: python

        >>> from oneflow.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
        >>> import oneflow as flow

        >>> seq = flow.tensor([[4,5,6], [1,2,0], [3,0,0]])
        >>> lens = [3, 2, 1]
        >>> packed = pack_padded_sequence(seq, lens, batch_first=True, enforce_sorted=True)
        >>> packed.data
        tensor([4, 1, 3, 5, 2, 6], dtype=oneflow.int64)
        >>> packed.batch_sizes
        tensor([3, 2, 1], dtype=oneflow.int64)
        >>> seq_unpacked, lens_unpacked = pad_packed_sequence(packed, batch_first=True)
        >>> seq_unpacked
        tensor([[4, 5, 6],
                [1, 2, 0],
                [3, 0, 0]], dtype=oneflow.int64)
        >>> lens_unpacked
        tensor([3., 2., 1.], dtype=oneflow.float32)


    """
    max_seq_length = sequence.batch_sizes.shape[0]
    if total_length is not None:
        if total_length < max_seq_length:
            raise ValueError(
                "Expected total_length to be at least the length "
                "of the longest sequence in input, but got "
                "total_length={} and max sequence length being {}".format(
                    total_length, max_seq_length))
    else:
        total_length = max_seq_length

    batch_sizes_t = sequence.batch_sizes.contiguous()
    assert (
        len(batch_sizes_t.shape) == 1 and batch_sizes_t.device.type == "cpu"
        and batch_sizes_t.dtype == flow.int64
    ), f"'sequence.batch_sizes' should be a 1D CPU int64 tensor, but got {len(batch_sizes_t.shape)} D {batch_sizes_t.device.type} {batch_sizes_t.dtype} tensor"

    batch_sizes = batch_sizes_t.numpy()
    max_batch_size = int(batch_sizes[0])
    max_real_seq_length = batch_sizes_t.shape[0]
    max_seq_length = max_real_seq_length
    if total_length > 0:
        assert (
            total_length >= max_seq_length
        ), f"Expected total_length to be at least the length of the longest sequence in input, but got total_length={total_length} and max sequence length being {max_seq_length}"
        max_seq_length = total_length

    output_size = [
    ]  # == [max_seq_length, max_batch_size, *sequence.data.size()[1:]]
    output_size.append(max_seq_length)
    output_size.append(max_batch_size)
    output_size = output_size + list(sequence.data.shape[1:])
    padded_output = flow.full(
        output_size,
        padding_value,
        dtype=sequence.data.dtype,
        device=sequence.data.device,
        requires_grad=sequence.data.requires_grad,
    )

    # This will be modified at every iteration, but we reserve memory for it now.
    tmp_view_size = output_size  # == [-1, -1, *sequence.data.size()[1:]]
    lengths = flow.empty(max_batch_size)
    data_offset = 0
    prev_batch_size = max_batch_size
    prev_i = 0
    lengths_idx = max_batch_size - 1
    for i in range(max_real_seq_length + 1):
        batch_size = batch_sizes[i] if i != max_real_seq_length else 0
        if batch_size != prev_batch_size:
            l = prev_batch_size * (i - prev_i)
            tmp_view_size[0] = i - prev_i
            tmp_view_size[1] = prev_batch_size
            padded_output[
                prev_i:i,
                0:prev_batch_size] = sequence.data[data_offset:data_offset +
                                                   l].view(tmp_view_size)
            data_offset += l
            prev_i = i

        dec = prev_batch_size - batch_size
        if dec > 0:
            for j in range(dec):
                lengths[lengths_idx] = i
                lengths_idx = lengths_idx - 1
        prev_batch_size = batch_size

    if batch_first:
        permute_dims = (1, 0)
        for i in range(2, padded_output.ndim):
            permute_dims.append(i)
        padded_output = padded_output.permute(permute_dims)

    unsorted_indices = sequence.unsorted_indices
    if unsorted_indices is not None:
        batch_dim = 0 if batch_first else 1
        return (
            padded_output.index_select(batch_dim, unsorted_indices),
            lengths[unsorted_indices],
        )
    return padded_output, lengths
Ejemplo n.º 21
0
def _test_consistent_empty(test_case, shape, placement, sbp):
    x = flow.empty(*shape, placement=placement, sbp=sbp)

    test_case.assertEqual(x.shape, flow.Size(shape))
    test_case.assertEqual(x.sbp, sbp)
    test_case.assertEqual(x.placement, placement)