def check_grad(self, inputs_to_check, output_names, no_grad_set=None, numeric_grad_delta=0.005, in_place=False, max_relative_error=0.005, user_defined_grads=None): self.scope = core.Scope() op_inputs = self.inputs if hasattr(self, "inputs") else dict() op_outputs = self.outputs if hasattr(self, "outputs") else dict() op_attrs = self.attrs if hasattr(self, "attrs") else dict() self.op = create_op(self.scope, self.op_type, op_inputs, op_outputs, op_attrs) if no_grad_set is None: no_grad_set = set() if not type(output_names) is list: output_names = [output_names] numeric_grads = user_defined_grads or [ get_numeric_gradient( self.scope, self.op, self.inputs, input_to_check, output_names, delta=numeric_grad_delta, in_place=in_place) for input_to_check in inputs_to_check ] cpu_place = core.CPUPlace() cpu_analytic_grads = self._get_gradient(inputs_to_check, cpu_place, output_names, no_grad_set) self.__assert_is_close(numeric_grads, cpu_analytic_grads, inputs_to_check, max_relative_error, "Gradient Check On %s" % str(cpu_place)) if core.is_compile_gpu() and self.op.support_gpu(): gpu_place = core.GPUPlace(0) gpu_analytic_grads = self._get_gradient(inputs_to_check, gpu_place, output_names, no_grad_set) self.__assert_is_close(numeric_grads, gpu_analytic_grads, inputs_to_check, max_relative_error, "Gradient Check On %s" % str(gpu_place))
def test_sparse_sgd(self): places = [core.CPUPlace()] if core.is_compile_gpu(): places.append(core.GPUPlace(0)) for place in places: self.check_with_place(place)
def test_forward_backward(self): def test_with_place(place, tensor_format, shape): # attr epsilon = 0.00001 momentum = 0.9 if len(shape) == 2: x_shape = shape c = shape[1] else: # n, h, w, c = 2, 3, 4, 2 n, h, w, c = shape[0], shape[1], shape[2], shape[3] if data_format == "NHWC": x_shape = [n, h, w, c] elif data_format == "NCHW": x_shape = [n, c, h, w] else: raise ValueError("Unknown data type.") scale_shape = [c] x_val = np.random.random_sample(x_shape).astype(np.float32) scale_val = np.random.random_sample(scale_shape).astype(np.float32) bias_val = np.random.random_sample(scale_shape).astype(np.float32) mean = np.zeros(scale_shape).astype(np.float32) variance = np.ones(scale_shape).astype(np.float32) # run forward y_out, saved_mean, var_ref = _reference_training( x_val, scale_val, bias_val, epsilon, data_format) # update moving mean and variance mean_out = saved_mean * (1. - momentum) + momentum * mean variance_out = var_ref * (1. - momentum) + momentum * variance saved_variance = 1. / np.sqrt(var_ref + epsilon) # for gradient test # y_grad = np.ones(x_shape).astype(np.float32) y_grad = np.zeros(x_shape).astype(np.float32) if len(y_grad.shape) == 2: y_grad[0, 0] = 1. else: y_grad[0, 0, 0, 0] = 1. # y_grad = np.random.random_sample(x_shape).astype(np.float32) x_grad_ref, scale_grad_ref, bias_grad_ref = _reference_grad( x_val, y_grad, scale_val, saved_mean, var_ref, epsilon, data_format) scope = core.Scope() # create input x_tensor = create_or_get_tensor(scope, "x_val", x_val, place) scale_tensor = create_or_get_tensor(scope, "scale_val", scale_val, place) bias_tensor = create_or_get_tensor(scope, "bias_val", bias_val, place) mean_tensor = create_or_get_tensor(scope, "mean", mean, place) variance_tensor = create_or_get_tensor(scope, "variance", variance, place) # create output y_tensor = create_or_get_tensor(scope, "y_out", None, place) saved_mean_tensor = create_or_get_tensor(scope, "saved_mean", None, place) saved_variance_tensor = create_or_get_tensor( scope, "saved_variance", None, place) mean_out_tensor = mean_tensor variance_out_tensor = variance_tensor batch_norm_op = Operator( "batch_norm", # inputs X="x_val", Scale="scale_val", Bias="bias_val", Mean="mean", Variance="variance", # outputs Y="y_out", MeanOut="mean", VarianceOut="variance", SavedMean="saved_mean", SavedVariance="saved_variance", # attrs is_test=False, tensor_format=tensor_format, momentum=momentum, epsilon=epsilon) ctx = core.DeviceContext.create(place) batch_norm_op.run(scope, ctx) # check forward result self.__assert_close(y_tensor, y_out, "y_out") self.__assert_close(saved_mean_tensor, saved_mean, "saved_mean") self.__assert_close(saved_variance_tensor, saved_variance, "saved_variance") self.__assert_close(mean_out_tensor, mean_out, "mean_out") if isinstance(place, core.GPUPlace): atol = 5e-2 else: atol = 1e-4 self.__assert_close(variance_out_tensor, variance_out, "variance_out", atol) print "op test forward passed: ", str(place), tensor_format # run backward batch_norm_op_grad = get_backward_op(scope, batch_norm_op, set()) set_output_grad( scope, ["y_out", "mean", "variance", "saved_mean", "saved_variance"], place, feed_dict={"y_out": y_grad}) batch_norm_op_grad.run(scope, ctx) x_grad_tensor = create_or_get_tensor(scope, grad_var_name("x_val"), None, place) scale_grad_tensor = create_or_get_tensor( scope, grad_var_name("scale_val"), None, place) bias_grad_tensor = create_or_get_tensor(scope, grad_var_name("bias_val"), None, place) # check gradient output self.__assert_close(x_grad_tensor, x_grad_ref, "x_grad") self.__assert_close(scale_grad_tensor, scale_grad_ref, "scale_grad") self.__assert_close(bias_grad_tensor, bias_grad_ref, "bias_grad") print "op test backward passed: ", str(place), tensor_format places = [core.CPUPlace()] if core.is_compile_gpu() and core.op_support_gpu("batch_norm"): places.append(core.GPUPlace(0)) for place in places: for data_format in ["NCHW", "NHWC"]: test_with_place(place, data_format, [2, 3, 4, 5]) test_with_place(place, data_format, [2, 3])
def check_output(self, atol=1e-5): places = [core.CPUPlace()] if core.is_compile_gpu() and core.op_support_gpu(self.op_type): places.append(core.GPUPlace(0)) for place in places: self.check_output_with_place(place, atol)
def main(): cost = model() sgd_optimizer = SGDOptimizer(learning_rate=0.2) opts = sgd_optimizer.minimize(cost) if USE_GPU: place = core.GPUPlace(0) else: place = core.CPUPlace() exe = Executor(place) exe.run(framework.default_startup_program()) train_reader = paddle.batch(paddle.reader.shuffle( paddle.dataset.movielens.train(), buf_size=8192), batch_size=BATCH_SIZE) feeding = { 'user_id': 0, 'gender_id': 1, 'age_id': 2, 'job_id': 3, 'movie_id': 4, 'category_id': 5, 'movie_title': 6, 'score': 7 } def func_feed(feeding, data): feed_tensors = {} for (key, idx) in feeding.iteritems(): tensor = core.LoDTensor() if key != "category_id" and key != "movie_title": if key == "score": numpy_data = np.array(map(lambda x: x[idx], data)).astype("float32") else: numpy_data = np.array(map(lambda x: x[idx], data)).astype("int64") else: numpy_data = map(lambda x: np.array(x[idx]).astype("int64"), data) lod_info = [len(item) for item in numpy_data] offset = 0 lod = [offset] for item in lod_info: offset += item lod.append(offset) numpy_data = np.concatenate(numpy_data, axis=0) tensor.set_lod([lod]) numpy_data = numpy_data.reshape([numpy_data.shape[0], 1]) tensor.set(numpy_data, place) feed_tensors[key] = tensor return feed_tensors PASS_NUM = 100 for pass_id in range(PASS_NUM): for data in train_reader(): outs = exe.run(framework.default_main_program(), feed=func_feed(feeding, data), fetch_list=[cost]) out = np.array(outs[0]) if out[0] < 6.0: # if avg cost less than 6.0, we think our code is good. exit(0)
def main(): cost, u, m = model() optimizer = SGD(learning_rate=0.2) # optimizer = Adam(learning_rate=1e-4) opts = optimizer.minimize(cost) if USE_GPU: place = core.GPUPlace(0) else: place = core.CPUPlace() exe = Executor(place) exe.run(framework.default_startup_program()) # print framework.default_main_program().block(0).vars.keys() print framework.default_main_program().block(0).vars.get( "embedding_0.tmp_0") train_reader = paddle.batch(paddle.dataset.movielens.train(), batch_size=BATCH_SIZE) feeding = { 'user_id': 0, 'gender_id': 1, 'age_id': 2, 'job_id': 3, 'movie_id': 4, # 'category_id': 5, 'movie_title': 6, 'score': 7 } def func_feed(feeding, data): feed_tensors = {} for (key, idx) in feeding.iteritems(): tensor = core.LoDTensor() if key != "category_id" and key != "movie_title": if key == "score": numpy_data = np.array(map(lambda x: x[idx], data)).astype("float32") else: numpy_data = np.array(map(lambda x: x[idx], data)).astype("int64") else: numpy_data = map(lambda x: np.array(x[idx]).astype("int64"), data) lod_info = [len(item) for item in numpy_data] offset = 0 lod = [offset] for item in lod_info: offset += item lod.append(offset) numpy_data = np.concatenate(numpy_data, axis=0) tensor.set_lod([lod]) numpy_data = numpy_data.reshape([numpy_data.shape[0], 1]) tensor.set(numpy_data, place) feed_tensors[key] = tensor return feed_tensors PASS_NUM = 5 for pass_id in range(PASS_NUM): batch_id = 0 ts = time.time() for data in train_reader(): outs = exe.run(framework.default_main_program(), feed=func_feed(feeding, data), fetch_list=[cost, u, m]) out = np.array(outs[0]) if batch_id % 100 == 0: print("pass %d, batch %d, cost: %f" % (pass_id, batch_id, out[0])) print(outs[1]) print(outs[2]) batch_id += 1 # if out[0] < 6.0: # # if avg cost less than 6.0, we think our code is good. # exit(0) print("pass %d, cost: %f, time: %f" % (pass_id, out[0], time.time() - ts))
def test_uniform_random_gpu(self): if core.is_compile_gpu(): self.uniform_random_test(place=core.GPUPlace(0))
def test_gpu(self): if core.is_compile_gpu(): self.gaussian_random_test(place=core.GPUPlace(0))