def _create_variable_like_keras_dense_layer(self, name, shape, dtype): initializer = functools.partial(init_ops_v2.GlorotUniform(), shape, dtype=dtype) return variables.Variable(initial_value=initializer, name=name, trainable=True)
def testGlorotUniform(self): shape = (5, 6, 4, 2) fan_in, fan_out = init_ops_v2._compute_fans(shape) std = np.sqrt(2. / (fan_in + fan_out)) self._range_test(init_ops_v2.GlorotUniform(seed=123), shape, target_mean=0., target_std=std)
def create_variable_like_keras_layer(name, shape, dtype): """Utitlity for create variables that works like variable in keras layer.""" initializer = functools.partial(init_ops_v2.GlorotUniform(), shape, dtype=dtype) return variables.Variable(initial_value=initializer, name=name, trainable=True)
def _test_minimize_loss_graph(self, task_type, task_id, num_gpus): d, master_target, config = self._get_test_object( task_type, task_id, num_gpus) with ops.Graph().as_default(), \ self.cached_session(config=config, target=master_target) as sess, \ d.scope(): initializer = functools.partial(init_ops_v2.GlorotUniform(), (1, 1), dtype=dtypes.float32) kernel = variables.Variable(initial_value=initializer, name='gpu_%d/kernel' % d.extended._num_devices_per_worker, trainable=True) def loss_fn(x): y = array_ops.reshape(gen_math_ops.mat_mul(x, kernel), []) - constant_op.constant(1.) return y * y # TODO(yuefengz, apassos): eager.backprop.implicit_grad is not safe for # multiple graphs (b/111216820). def grad_fn(x): loss = loss_fn(x) var_list = (variables.trainable_variables() + ops.get_collection( ops.GraphKeys.TRAINABLE_RESOURCE_VARIABLES)) grads = gradients.gradients(loss, var_list) ret = list(zip(grads, var_list)) return ret def update(v, g): return v.assign_sub(0.05 * g, use_locking=True) one = constant_op.constant([[1.]]) def step(): """Perform one optimization step.""" # Run forward & backward to get gradients, variables list. g_v = d.extended.call_for_each_replica(grad_fn, args=[one]) # Update the variables using the gradients and the update() function. before_list = [] after_list = [] for g, v in g_v: fetched = d.extended.read_var(v) before_list.append(fetched) with ops.control_dependencies([fetched]): # TODO(yuefengz): support non-Mirrored variable as destinations. g = d.extended.reduce_to(reduce_util.ReduceOp.SUM, g, destinations=v) with ops.control_dependencies( d.extended.update(v, update, args=(g, ), group=False)): after_list.append(d.extended.read_var(v)) return before_list, after_list before_out, after_out = step() if (d.extended._local_device_type == 'GPU' and context.num_gpus() < d.extended._num_devices_per_worker): return True sess.run(variables.global_variables_initializer()) for i in range(10): b, a = sess.run((before_out, after_out)) if i == 0: before, = b after, = a error_before = abs(before - 1) error_after = abs(after - 1) # Error should go down self.assertLess(error_after, error_before)
def _test_minimize_loss_graph(self, task_type, task_id, num_gpus): d, master_target, sess_config = self._get_test_objects( task_type, task_id, num_gpus) if task_type: # Multi-worker assert hasattr(d.extended, '_cluster_spec') and d.extended._cluster_spec num_workers = len(d.extended._cluster_spec.as_dict().get(WORKER)) if CHIEF in d.extended._cluster_spec.as_dict(): num_workers += 1 else: # local num_workers = 1 with ops.Graph().as_default(), \ self.cached_session(target=master_target, config=sess_config) as sess, \ d.scope(): initializer = functools.partial(init_ops_v2.GlorotUniform(), (1, 1), dtype=dtypes.float32) kernel = variables.Variable(initial_value=initializer, name='kernel', trainable=True) def loss_fn(x): y = array_ops.reshape(math_ops.matmul(x, kernel), []) - constant_op.constant(1.) return y * y # TODO(yuefengz, apassos): eager.backprop.implicit_grad is not safe for # multiple graphs (b/111216820). def grad_fn(x): loss = loss_fn(x) var_list = (variables.trainable_variables() + ops.get_collection( ops.GraphKeys.TRAINABLE_RESOURCE_VARIABLES)) grads = gradients.gradients(loss, var_list) ret = list(zip(grads, var_list)) return ret def update(v, g): return v.assign_sub(0.05 * g, use_locking=True) one = constant_op.constant([[1.]]) def step(): """Perform one optimization step.""" # Run forward & backward to get gradients, variables list. g_v = d.extended.call_for_each_replica(grad_fn, args=(one, )) # Update the variables using the gradients and the update() function. before_list = [] after_list = [] for g, v in g_v: fetched = d.extended.read_var(v) before_list.append(fetched) with ops.control_dependencies([fetched]): # TODO(yuefengz): support non-Mirrored variable as destinations. g = d.extended.reduce_to(reduce_util.ReduceOp.SUM, g, destinations=v) with ops.control_dependencies( d.extended.update(v, update, args=(g, ), group=False)): after_list.append(d.extended.read_var(v)) return before_list, after_list before_out, after_out = step() if (not task_type or multi_worker_util.is_chief( d.extended._cluster_spec, task_type, task_id)): self.evaluate(variables.global_variables_initializer()) # Workers waiting for chief worker's initializing variables. self._init_condition.acquire() self._init_reached += 1 while self._init_reached != num_workers: self._init_condition.wait() self._init_condition.notify_all() self._init_condition.release() for i in range(10): b, a = sess.run((before_out, after_out)) if i == 0: before, = b after, = a error_before = abs(before - 1) error_after = abs(after - 1) # Error should go down self.assertLess(error_after, error_before)