def test_static_graph(self): dtype = 'float64' positive_2_int32 = fluid.layers.fill_constant([1], "int32", 3) positive_2_int64 = fluid.layers.fill_constant([1], "int64", 3) shape_tensor_int32 = fluid.data(name="shape_tensor_int32", shape=[2], dtype="int32") shape_tensor_int64 = fluid.data(name="shape_tensor_int64", shape=[2], dtype="int64") out_1 = paddle.empty(shape=[200, 3], dtype=dtype) out_2 = paddle.empty(shape=shape_tensor_int32, dtype=dtype) out_3 = paddle.empty(shape=shape_tensor_int64, dtype=dtype) out_4 = paddle.empty(shape=[200, positive_2_int32], dtype=dtype) out_5 = paddle.empty(shape=[200, positive_2_int64], dtype=dtype) place = paddle.CPUPlace() exe = paddle.static.Executor(place) res_1, res_2, res_3, res_4, res_5 = exe.run( fluid.default_main_program(), feed={ "shape_tensor_int32": np.array([200, 3]).astype("int32"), "shape_tensor_int64": np.array([200, 3]).astype("int64"), }, fetch_list=[out_1, out_2, out_3, out_4, out_5]) self.__check_out__(res_1, dtype) self.__check_out__(res_2, dtype) self.__check_out__(res_3, dtype) self.__check_out__(res_4, dtype) self.__check_out__(res_5, dtype)
def test_async_read_only_1dim(self): src = paddle.rand([40], dtype="float32").pin_memory() dst = paddle.empty([40], dtype="float32") buffer_ = paddle.empty([20]).pin_memory() with cuda.stream_guard(self.stream): core.async_read(src, dst, self.index, buffer_, self.empty, self.empty) array1 = paddle.gather(src, self.index) array2 = dst[:len(self.index)] self.assertTrue(np.allclose(array1.numpy(), array2.numpy()))
def func_setUp(self): self.empty = paddle.to_tensor(np.array([], dtype="int64"), place=paddle.CPUPlace()) data = np.random.randn(100, 50, 50).astype("float32") self.src = paddle.to_tensor(data, place=paddle.CUDAPinnedPlace()) self.dst = paddle.empty(shape=[100, 50, 50], dtype="float32") self.index = paddle.to_tensor(np.array([1, 3, 5, 7, 9], dtype="int64")).cpu() self.buffer = paddle.empty(shape=[50, 50, 50], dtype="float32").pin_memory() self.stream = cuda.Stream()
def test_dygraph_api_attr(self): paddle.disable_static() shape = [200, 3] dtype = 'float64' out = paddle.empty(shape=shape, dtype=dtype) self.__check_out__(out, dtype) paddle.enable_static()
def test_dygraph_api_out_3(self): paddle.disable_static() shape_data = np.array([200, 3]).astype('int64') shape = paddle.to_tensor(shape_data) out = paddle.empty(shape=shape) self.__check_out__(out) paddle.enable_static()
def test_init_process_group(self): with _test_eager_guard(): paddle.distributed.init_parallel_env() paddle.distributed.new_group() group = paddle.distributed.new_group([-1, -2]) assert group.process_group == None group = paddle.distributed.collective.Group(-1, 2, 0, [-1, -2]) ret = paddle.distributed.barrier(group) assert ret == None paddle.enable_static() in_tensor = paddle.empty((1, 2)) in_tensor2 = paddle.empty((1, 2)) paddle.distributed.broadcast(in_tensor, src=0) paddle.distributed.all_gather([in_tensor, in_tensor2], in_tensor) print("test ok\n")
def init_mems(self, batch_size, d_model): if self.mem_len > 0: mems = [] for _ in range(self.n_layer + 1): empty = paddle.empty( shape=[batch_size, 0, d_model], dtype=global_dtype) mems.append(empty) return mems else: return None
def test_uniform_random_inplace_op_empty_tensor(self): places = ['cpu'] if fluid.core.is_compiled_with_cuda(): places.append('gpu') test_shapes = [(200, 0), (0, 200)] for place in places: paddle.set_device(place) for test_shape in test_shapes: tensor = paddle.empty(shape=test_shape) tensor.uniform_() tensor_shape_np = np.array(tensor.shape) origin_shape = np.array(test_shape) self.assertTrue((tensor_shape_np == origin_shape).all())
def _alltoall(in_tensor_list, group=None, use_calc_stream=True): if group is not None and not group.is_member(): return if in_dygraph_mode(): group = paddle.distributed.collective._get_default_group( ) if group is None else group out = paddle.empty(in_tensor_list.shape, in_tensor_list.dtype) task = group.process_group.alltoall(in_tensor_list, out) task.wait() return out else: ring_id = 0 if group is None else group.id return paddle._C_ops.alltoall(in_tensor_list, 'use_calc_stream', use_calc_stream, 'ring_id', ring_id)
def _all_gather(tensor, group=None, use_calc_stream=True): if group is not None and not group.is_member(): return if in_dygraph_mode(): group = paddle.distributed.collective._get_default_group( ) if group is None else group tensor_shape = list(tensor.shape) tensor_shape[0] *= group.nranks out = paddle.empty(tensor_shape, tensor.dtype) task = group.process_group.all_gather(tensor, out) task.wait() return out else: ring_id = 0 if group is None else group.id nranks = paddle.distributed.collective._get_global_group( ).nranks if group is None else group.nranks return paddle._C_ops.c_allgather(tensor, 'use_calc_stream', use_calc_stream, 'ring_id', ring_id, 'nranks', nranks)
def _c_allgather(x, nranks, ring_id=0, use_calc_stream=False): op_type = 'c_allgather' if in_dygraph_mode(): group = paddle.distributed.collective._get_default_group() tensor_shape = list(x.shape) tensor_shape[0] *= nranks out = paddle.empty(tensor_shape, x.dtype) task = group.process_group.all_gather(x, out) task.wait() return out if _in_legacy_dygraph(): attrs = ('nranks', nranks, 'ring_id', ring_id, 'use_calc_stream', use_calc_stream) return _C_ops.c_allgather(x, *attrs) helper = LayerHelper(op_type, **locals()) out_shape = list(x.shape[:]) if out_shape[0] > 0: out_shape[0] *= nranks out = helper.create_variable(name=unique_name.generate_with_ignorable_key( '.'.join([x.name, op_type])), shape=out_shape, dtype=x.dtype, type=x.type, persistable=x.persistable) helper.append_op(type=op_type, inputs={'X': [x]}, outputs={'Out': [out]}, attrs={ 'nranks': nranks, 'ring_id': ring_id, 'use_calc_stream': use_calc_stream }) return out
def test_dtype(): shape = [200, 3] dtype = 'uint8' result = paddle.empty(shape=shape, dtype=dtype)
def setUp(self): self.src = paddle.rand(shape=[100, 50, 50, 5], dtype="float32") self.dst = paddle.empty( shape=[200, 50, 50, 5], dtype="float32").pin_memory() self.stream = cuda.Stream()
def _local_scatter(inp, pos): if pos.shape != [0]: inp_buf = paddle.index_select(inp, pos, 0) else: inp_buf = paddle.empty([0, inp.shape[1]], dtype=inp.dtype) return inp_buf
def taylor(M: int, nbar=4, sll=30, norm=True, sym: bool = True, dtype: str = 'float64') -> Tensor: """Compute a Taylor window. The Taylor window taper function approximates the Dolph-Chebyshev window's constant sidelobe level for a parameterized number of near-in sidelobes. Parameters: M(int): window size nbar, sil, norm: the window-specific parameter. sym(bool):whether to return symmetric window. The default value is True dtype(str): the datatype of returned tensor. Returns: Tensor: the window tensor Notes: This function is consistent with scipy.signal.windows.taylor(). """ if _len_guards(M): return paddle.ones((M, ), dtype=dtype) M, needs_trunc = _extend(M, sym) # Original text uses a negative sidelobe level parameter and then negates # it in the calculation of B. To keep consistent with other methods we # assume the sidelobe level parameter to be positive. B = 10**(sll / 20) A = _acosh(B) / math.pi s2 = nbar**2 / (A**2 + (nbar - 0.5)**2) ma = paddle.arange(1, nbar, dtype=dtype) Fm = paddle.empty((nbar - 1, ), dtype=dtype) signs = paddle.empty_like(ma) signs[::2] = 1 signs[1::2] = -1 m2 = ma * ma for mi in range(len(ma)): numer = signs[mi] * paddle.prod(1 - m2[mi] / s2 / (A**2 + (ma - 0.5)**2)) if mi == 0: denom = 2 * paddle.prod(1 - m2[mi] / m2[mi + 1:]) elif mi == len(ma) - 1: denom = 2 * paddle.prod(1 - m2[mi] / m2[:mi]) else: denom = 2 * paddle.prod(1 - m2[mi] / m2[:mi]) * paddle.prod( 1 - m2[mi] / m2[mi + 1:]) Fm[mi] = numer / denom def W(n): return 1 + 2 * paddle.matmul( Fm.unsqueeze(0), paddle.cos(2 * math.pi * ma.unsqueeze(1) * (n - M / 2. + 0.5) / M)) w = W(paddle.arange(0, M, dtype=dtype)) # normalize (Note that this is not described in the original text [1]) if norm: scale = 1.0 / W((M - 1) / 2) w *= scale w = w.squeeze() return _truncate(w, needs_trunc)
def test_fixed_random_number(self): if not paddle.is_compiled_with_cuda(): return # Note(zhouwei): The Number of threads is determined by # 'multiProcessorCount * maxThreadsPerMultiProcessor'. So, different # GPU have different number of threads, which result in different # random value. Only test on V100 GPU here. if not "V100" in paddle.device.cuda.get_device_name(): return print("Test Fixed Random number on V100 GPU------>") paddle.disable_static() paddle.set_device('gpu') paddle.seed(2021) x = paddle.empty([64, 3, 1024, 1024], dtype="float32") x.exponential_(1.0) x_np = x.numpy() expect = [ 0.80073667, 0.2249291, 0.07734892, 1.25392, 0.14013891, 0.45736602, 1.9735607, 0.30490234, 0.57100505, 0.8115938 ] self.assertTrue(np.allclose(x_np[0, 0, 0, 0:10], expect)) expect = [ 1.4296371e+00, 9.5411777e-01, 5.2575850e-01, 2.4805880e-01, 1.2322118e-04, 8.4604341e-01, 2.1111444e-01, 1.4143821e+00, 2.8194717e-01, 1.1360573e+00 ] self.assertTrue(np.allclose(x_np[16, 1, 300, 200:210], expect)) expect = [ 1.3448033, 0.35146526, 1.7380928, 0.32012638, 0.10396296, 0.51344526, 0.15308502, 0.18712929, 0.03888268, 0.20771872 ] self.assertTrue(np.allclose(x_np[32, 1, 600, 500:510], expect)) expect = [ 0.5107464, 0.20970327, 2.1986802, 1.580056, 0.31036147, 0.43966478, 0.9056133, 0.30119267, 1.4797124, 1.4319834 ] self.assertTrue(np.allclose(x_np[48, 2, 900, 800:810], expect)) expect = [ 3.4640615, 1.1019983, 0.41195083, 0.22681557, 0.291846, 0.53617656, 1.5791925, 2.4645927, 0.04094889, 0.9057725 ] self.assertTrue(np.allclose(x_np[63, 2, 1023, 1000:1010], expect)) x = paddle.empty([10, 10], dtype="float32") x.exponential_(3.0) x_np = x.numpy() expect = [ 0.02831675, 0.1691551, 0.6798956, 0.69347525, 0.0243443, 0.22180498, 0.30574575, 0.9839696, 0.2834912, 0.59420055 ] self.assertTrue(np.allclose(x_np[5, 0:10], expect)) x = paddle.empty([16, 2, 1024, 768], dtype="float64") x.exponential_(0.25) x_np = x.numpy() expect = [ 10.0541229, 12.67860643, 1.09850734, 7.35289643, 2.65471225, 3.86217432, 2.97902086, 2.92744479, 2.67927152, 0.19667352 ] self.assertTrue(np.allclose(x_np[0, 0, 0, 100:110], expect)) expect = [ 0.68328125, 3.1454553, 0.92158376, 1.95842188, 1.05296941, 12.93242051, 5.20255978, 3.3588624, 1.57377174, 5.73194183 ] self.assertTrue(np.allclose(x_np[4, 0, 300, 190:200], expect)) expect = [ 1.37973974, 3.45036798, 7.94625406, 1.62610973, 0.31032122, 4.13596493, 1.98494535, 1.13207041, 8.30592769, 2.81460147 ] self.assertTrue(np.allclose(x_np[8, 1, 600, 300:310], expect)) expect = [ 2.27710811, 12.25003028, 2.96409124, 4.72405788, 0.67917249, 4.35856718, 0.46870976, 2.31120149, 9.61595826, 4.64446271 ] self.assertTrue(np.allclose(x_np[12, 1, 900, 500:510], expect)) expect = [ 0.95883744, 1.57316361, 15.22524512, 20.49559882, 13.70008548, 3.29430143, 3.90390424, 0.9146657, 0.80972249, 0.33376219 ] self.assertTrue(np.allclose(x_np[15, 1, 1023, 750:760], expect)) x = paddle.empty([512, 768], dtype="float64") x.exponential_(0.3) x_np = x.numpy() expect = [ 8.79266704, 4.79596009, 2.75480243, 6.04670011, 0.35379556, 0.76864868, 3.17428251, 0.26556859, 12.22485885, 10.51690383 ] self.assertTrue(np.allclose(x_np[0, 200:210], expect)) expect = [ 5.6341126, 0.52243418, 5.36410796, 6.83672002, 11.9243311, 5.85985566, 5.75169548, 0.13877972, 6.1348385, 3.82436519 ] self.assertTrue(np.allclose(x_np[300, 400:410], expect)) expect = [ 4.94883581, 0.56345306, 0.85841585, 1.92287801, 6.10036656, 1.19524847, 3.64735434, 5.19618716, 2.57467974, 3.49152791 ] self.assertTrue(np.allclose(x_np[500, 700:710], expect)) x = paddle.empty([10, 10], dtype="float64") x.exponential_(4.0) x_np = x.numpy() expect = [ 0.15713826, 0.56395964, 0.0680941, 0.00316643, 0.27046853, 0.19852724, 0.12776634, 0.09642974, 0.51977551, 1.33739699 ] self.assertTrue(np.allclose(x_np[5, 0:10], expect)) paddle.enable_static()
def __init__(self, rank, local_rank, world_size, batch_size, resume, margin_softmax, num_classes, sample_rate=1.0, embedding_size=512, prefix="./"): super(PartialFC, self).__init__() self.num_classes: int = num_classes self.rank: int = rank self.local_rank: int = local_rank self.world_size: int = world_size self.batch_size: int = batch_size self.margin_softmax: callable = margin_softmax self.sample_rate: float = sample_rate self.embedding_size: int = embedding_size self.prefix: str = prefix self.num_local: int = num_classes // world_size + int( rank < num_classes % world_size) self.class_start: int = num_classes // world_size * rank + min( rank, num_classes % world_size) self.num_sample: int = int(self.sample_rate * self.num_local) self.weight_name = os.path.join( self.prefix, "rank:{}_softmax_weight.pkl".format(self.rank)) self.weight_mom_name = os.path.join( self.prefix, "rank:{}_softmax_weight_mom.pkl".format(self.rank)) if resume: try: self.weight: paddle.Tensor = paddle.load(self.weight_name) print("softmax weight resume successfully!") except (FileNotFoundError, KeyError, IndexError): self.weight = paddle.normal( 0, 0.01, (self.num_local, self.embedding_size)) print("softmax weight resume fail!") try: self.weight_mom: paddle.Tensor = paddle.load( self.weight_mom_name) print("softmax weight mom resume successfully!") except (FileNotFoundError, KeyError, IndexError): self.weight_mom: paddle.Tensor = paddle.zeros_like(self.weight) print("softmax weight mom resume fail!") else: self.weight = paddle.normal(0, 0.01, (self.num_local, self.embedding_size)) self.weight_mom: paddle.Tensor = paddle.zeros_like(self.weight) print("softmax weight init successfully!") print("softmax weight mom init successfully!") self.index = None if int(self.sample_rate) == 1: self.update = lambda: 0 self.sub_weight = paddle.create_parameter( shape=self.weight.shape, dtype='float32', default_initializer=paddle.nn.initializer.Assign(self.weight)) self.sub_weight_mom = self.weight_mom else: self.sub_weight = paddle.create_parameter( shape=[1, 1], dtype='float32', default_initializer=paddle.nn.initializer.Assign( paddle.empty((1, 1))))
def _p2p_helper(tensor_send_next, tensor_send_prev, recv_prev, recv_next): global _hcg tensor_recv_prev = None tensor_recv_next = None # send / recv message recv_shape_msg = _send_recv_meta.recv_shape_message recv_dtype_msg = _send_recv_meta.recv_dtype_message recv_stop_gradient = _send_recv_meta.recv_stop_gradient send_shape_msg = _send_recv_meta.send_shape_message send_dtype_msg = _send_recv_meta.send_dtype_message # model parallel message mp_group = _hcg.get_model_parallel_group() mp_degree = _hcg.get_model_parallel_world_size() mp_rank = _hcg.get_model_parallel_rank() if recv_prev: if isinstance(recv_shape_msg, tuple): tensor_recv_prev = [] for idx, shape in enumerate(recv_shape_msg): tmp = paddle.empty(shape=shape, dtype=number_2_dtype(recv_dtype_msg[idx])) tmp.stop_gradient = recv_stop_gradient[idx] tensor_recv_prev.append(tmp) tensor_recv_prev = tuple(tensor_recv_prev) else: tensor_recv_prev = paddle.empty( shape=recv_shape_msg, dtype=number_2_dtype(recv_dtype_msg)) tensor_recv_prev.stop_gradient = recv_stop_gradient if recv_next: if isinstance(send_shape_msg, tuple): tensor_recv_next = [] for idx, shape in enumerate(send_shape_msg): tensor_recv_next.append( paddle.empty(shape=shape, dtype=number_2_dtype(send_dtype_msg[idx]))) tensor_recv_next = tuple(tensor_recv_next) else: tensor_recv_next = paddle.empty( shape=send_shape_msg, dtype=number_2_dtype(send_dtype_msg)) # start to p2p communicate if tensor_send_prev is not None: if isinstance(tensor_send_prev, tuple): for d in tensor_send_prev: paddle.distributed.wait(d, use_calc_stream=True) send_partial(d, dst=0, nranks=mp_degree, rank_id=mp_rank, group=_hcg.send_prev_group, use_calc_stream=False) else: paddle.distributed.wait(tensor_send_prev, use_calc_stream=True) send_partial(tensor_send_prev, dst=0, nranks=mp_degree, rank_id=mp_rank, group=_hcg.send_prev_group, use_calc_stream=False) if tensor_recv_prev is not None: if isinstance(tensor_recv_prev, tuple): for d in tensor_recv_prev: recv_partial(d, src=0, nranks=mp_degree, rank_id=mp_rank, group=_hcg.recv_prev_group, use_calc_stream=True) allgather_partial(d, nranks=mp_degree, rank_id=mp_rank, group=mp_group, use_calc_stream=True) else: recv_partial(tensor_recv_prev, src=0, nranks=mp_degree, rank_id=mp_rank, group=_hcg.recv_prev_group, use_calc_stream=True) allgather_partial(tensor_recv_prev, nranks=mp_degree, rank_id=mp_rank, group=mp_group, use_calc_stream=True) if tensor_send_next is not None: if isinstance(tensor_send_next, tuple): for d in tensor_send_next: paddle.distributed.wait(d, use_calc_stream=True) send_partial(d, dst=1, nranks=mp_degree, rank_id=mp_rank, group=_hcg.send_next_group, use_calc_stream=False) else: paddle.distributed.wait(tensor_send_next, use_calc_stream=True) send_partial(tensor_send_next, dst=1, nranks=mp_degree, rank_id=mp_rank, group=_hcg.send_next_group, use_calc_stream=False) if tensor_recv_next is not None: if isinstance(tensor_recv_next, tuple): for d in tensor_recv_next: recv_partial(d, src=1, nranks=mp_degree, rank_id=mp_rank, group=_hcg.recv_next_group, use_calc_stream=True) allgather_partial(d, nranks=mp_degree, rank_id=mp_rank, group=mp_group, use_calc_stream=True) else: recv_partial(tensor_recv_next, src=1, nranks=mp_degree, rank_id=mp_rank, group=_hcg.recv_next_group, use_calc_stream=True) allgather_partial(tensor_recv_next, nranks=mp_degree, rank_id=mp_rank, group=mp_group, use_calc_stream=True) return tensor_recv_prev, tensor_recv_next
def idx_empty(var): var_shape = list(var.shape) var_shape[0] = 0 return paddle.empty(var_shape, dtype=var.dtype)
def test_dygraph_api_out(self): paddle.disable_static() shape = [200, 3] out = paddle.empty(shape=shape) self.__check_out__(out) paddle.enable_static()
def __init__( self, embed_dim, # vision image_resolution, vision_layers, vision_width, vision_patch_size, # text context_length, vocab_size, transformer_width, transformer_heads, transformer_layers, ): super().__init__() self.context_length = context_length self.embed_dim = embed_dim if isinstance(vision_layers, (tuple, list)): vision_heads = vision_width * 32 // 64 self.visual = ModifiedResNet( layers=vision_layers, output_dim=embed_dim, heads=vision_heads, input_resolution=image_resolution, width=vision_width, ) else: vision_heads = vision_width // 64 self.visual = VisualTransformer( input_resolution=image_resolution, patch_size=vision_patch_size, width=vision_width, layers=vision_layers, heads=vision_heads, output_dim=embed_dim, ) self.transformer = Transformer( width=transformer_width, layers=transformer_layers, heads=transformer_heads, attn_mask=self.build_attention_mask(), ) self.vocab_size = vocab_size self.token_embedding = nn.Embedding(vocab_size, transformer_width) positional_embedding = self.create_parameter( shape=(self.context_length, transformer_width), default_initializer=Assign( paddle.empty((self.context_length, transformer_width)) ), ) self.add_parameter("positional_embedding", positional_embedding) self.ln_final = nn.LayerNorm(transformer_width) text_projection = self.create_parameter( shape=(transformer_width, embed_dim), default_initializer=Assign(paddle.empty((transformer_width, embed_dim))), ) self.add_parameter("text_projection", text_projection) logit_scale = self.create_parameter( shape=(1,), default_initializer=Assign(paddle.ones([1])) ) self.add_parameter("logit_scale", logit_scale) self.initialize_parameters()