def run_trainer(self, args): train_prog = fluid.Program() startup_prog = fluid.Program() endpoints = args["endpoints"].split(",") rank = args["trainerid"] current_endpoint = args["currentendpoint"] nranks = 2 paddle.distributed.init_parallel_env() if args['backend'] == 'nccl': device_id = int(os.getenv("FLAGS_selected_gpus", "0")) place = fluid.CUDAPlace( device_id) #if args.use_gpu else fluid.CPUPlace() elif args['backend'] == 'bkcl': device_id = int(os.getenv("FLAGS_selected_xpus", "0")) place = fluid.XPUPlace(device_id) else: place = fluid.CPUPlace() in_feat = 2 n_expert = 2 world_size = 2 tot_expert = n_expert * world_size paddle.disable_static() # Call paddle.distributed.alltoall() under legacy dygraph _enable_legacy_dygraph() np.random.seed(os.getpid()) local_expert_count = np.random.randint(1, 4, size=tot_expert).astype("int64") local_expert_count = paddle.to_tensor(local_expert_count) global_expert_count = [] paddle.distributed.alltoall( paddle.split(local_expert_count, 2, axis=0), global_expert_count) global_expert_count = paddle.concat(global_expert_count, axis=0) global_expert_count = global_expert_count.numpy() local_expert_count = local_expert_count.numpy() fwd_expert_count = sum(global_expert_count) np.random.seed(os.getpid()) local_input_buf = np.random.rand(fwd_expert_count, in_feat).astype("float32") paddle.enable_static() if args['static_mode']: result = self.get_model(train_prog, startup_prog, rank) exe = fluid.Executor(place) exe.run(startup_prog) fetch_list = [] for elem in result: fetch_list.append(elem.name) out = exe.run(train_prog, feed={ 'local_expert_count': local_expert_count, 'global_expert_count': global_expert_count, 'local_input_buf': local_input_buf }, fetch_list=fetch_list) sys.stdout.buffer.write(pickle.dumps(out))
def fill_tensor(queue, event): # make sure run in legacy dygraph if in_dygraph_mode(): _enable_legacy_dygraph() data = queue.get() with paddle.no_grad(): data[0][:] = 5 data[1][:] = 5 event.set()
def test_backward_downscale_in_infer(self): _enable_legacy_dygraph() for place in self.places: with fluid.dygraph.guard(place): input = paddle.uniform([40, 40], dtype="float32") input.stop_gradient = False out, mask = core.ops.dropout(input, 'dropout_prob', 0.5) out.backward() self.assertTrue( np.array_equal(input.gradient( ), self.cal_grad_downscale_in_infer(mask.numpy())))
def test_backward_upscale_train(self): _enable_legacy_dygraph() for place in self.places: with fluid.dygraph.guard(place): prob = 0.5 input = paddle.uniform([40, 40], dtype="float32") input.stop_gradient = False out, mask = core.ops.dropout(input, 'dropout_prob', prob, "dropout_implementation", "upscale_in_train") out.backward() self.assertTrue( np.allclose(input.gradient( ), self.cal_grad_upscale_train(mask.numpy(), prob)))
# Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from __future__ import unicode_literals from __future__ import print_function import numpy as np import paddle.fluid as fluid import os from paddle.fluid.layer_helper import LayerHelper from paddle.fluid.framework import _global_flags from paddle.fluid.framework import _enable_legacy_dygraph _enable_legacy_dygraph() def check(): print("check: _global_flags()['FLAGS_use_mkldnn']=", _global_flags()["FLAGS_use_mkldnn"]) print("check: fluid.get_flags('FLAGS_use_mkldnn')=", fluid.get_flags(['FLAGS_use_mkldnn'])) print("check: DNNL_VERBOSE=", os.environ['DNNL_VERBOSE']) print("check: FLAGS_tracer_mkldnn_ops_on=", _global_flags()['FLAGS_tracer_mkldnn_ops_on']) print("check: FLAGS_tracer_mkldnn_ops_off=", _global_flags()['FLAGS_tracer_mkldnn_ops_off']) a_np = np.random.uniform(-2, 2, (10, 20, 30)).astype(np.float32) b_np = np.random.uniform(-5, 5, (10, 20, 30)).astype(np.float32) helper = LayerHelper(fluid.unique_name.generate(str("test")), act="relu")