def test_serialization_deserialization(self): np.random.seed(12345) with tf.Session() as sess: dim_in = np.random.randint(1, 100) dim_out = np.random.randint(1, 100) num_layers = np.random.randint(1, 10) layer_sizes = np.random.randint(1, 100, num_layers) gen = networks.FullyConnectedNetworkGenerator(dim_in, dim_out, layer_sizes) weights = gen.construct_network_weights() init = tf.global_variables_initializer() sess.run(init) # Serialize weights to a 1-d numpy array serialized_weights = networks.serialize_weights(sess, weights) self.assertEqual( len(serialized_weights.shape), 1, msg='Serialized weights should have dimension 1.') expected_length = 0 for name in weights: expected_length += np.prod(weights[name].shape) self.assertEqual( expected_length, serialized_weights.shape[0], 'Serialized weights should have the same length as original weights.') # Deserialize and compare weights deserialized_weights = networks.deserialize_weights( weights, serialized_weights) for name in weights: original_weight = weights[name].eval(session=sess) np.testing.assert_array_equal( original_weight, deserialized_weights[name], 'Weights should be exactly the same before/after serialization')
def test_fully_connected(self): np.random.seed(12345) with self.session() as sess: for _ in range(10): dim_in = np.random.randint(1, 100) dim_out = np.random.randint(1, 100) inp = tf.placeholder(tf.float32, shape=(None, dim_in), name='input') for num_layers in [1, 2, 10]: layer_sizes = np.random.randint(1, 100, num_layers) gen = networks.FullyConnectedNetworkGenerator(dim_in, dim_out, layer_sizes) weights = gen.construct_network_weights() network = gen.construct_network(inp, weights) init = tf.global_variables_initializer() sess.run(init) test_out = sess.run(network, {inp: np.random.randn(100, dim_in)}) self.assertFalse(np.isnan(test_out).any())
def test_learn_simple_policy(self): """Train a gaussian "policy" to react differently to various inputs. Inputs are sampled from a 2D Gaussian distribution. Outputs are one dimensional. """ input_means = np.array([[-1., -1], [-1, 1], [1, -1], [1, 1]]) input_std = .1 output_means = np.array([[0.], [1], [2], [3]]) network_generator = networks.FullyConnectedNetworkGenerator( 2, 1, ( 64, 64, ), tf.nn.relu) weights = network_generator.construct_network_weights() net_in = tf.placeholder(tf.float32, shape=(None, 2), name='input') net_out = network_generator.construct_network(net_in, weights) policy = policies.GaussianPolicy(net_in, net_out, 1, -5.) actions = tf.placeholder(tf.float32, shape=(None, 1), name='actions') log_lik = policy.log_likelihood_op(actions) optimizer = tf.train.AdamOptimizer(0.001) minimizer = optimizer.minimize(-tf.reduce_mean(log_lik)) pol_mean, _ = policy.mean_op() with self.session() as sess: init = tf.global_variables_initializer() sess.run(init) for _ in range(1000): sample_input = np.repeat(input_means, 100, axis=0) sample_input += np.random.normal(0, input_std, sample_input.shape) sample_output = np.repeat(output_means, 100, axis=0) sess.run(minimizer, { net_in: sample_input, actions: sample_output }) output_means_res = sess.run(pol_mean, {net_in: input_means}) mae = np.mean(np.abs(output_means - output_means_res)) self.assertAlmostEqual(mae, 0, places=1)
MOVE_POINT_ROTATE_MAML = dict( random_seed=random.randint(0, 1000000), num_outer_iterations=1000, task_generator=functools.partial(move_point_env.MovePointEnv, start_pos=(0, 0), end_pos=(1, 0), goal_reached_distance=-1, trial_length=10), task_env_modifiers=[{ '_action_rotation': i } for i in np.linspace(-np.pi, np.pi, 5000)], network_generator=networks.FullyConnectedNetworkGenerator( dim_input=2, dim_output=2, layer_sizes=( 50, 50, ), activation_fn=tf.nn.tanh), input_dims=2, pol_log_std_init=-3., output_dims=2, reward_disc=0.9, learn_offset=False, policy=policies.GaussianPolicy, tasks_batch_size=10, num_inner_rollouts=25, outer_optimizer_algo=tf.train.AdamOptimizer, advantage_function='returns-values', whiten_values=False, always_full_rollouts=False,