def testBuildDummyQNetIncorrectTFVars(self):
     tf_vars = [
         tf.Variable([[.1], [.2], [.3]], name='weights', dtype=tf.float32)
     ]
     ph_dict = utils.create_placeholders_for_q_net(tf_vars)
     with self.assertRaises(AssertionError):
         utils.build_dummy_q_net([[3.]], [[2., 1.]], ph_dict, tf_vars)
    def testBuildDummyQNetTwoLayers(self):
        hidden_weights = np.array([[-.1, .1], [-.2, .2], [-.3, .3]])
        hidden_bias = np.array([.3, .3])
        output_weights = np.array([[.5], [.5]])
        output_bias = np.array([-.05])
        tf_vars = [
            tf.Variable(hidden_weights,
                        name='hidden_weights',
                        dtype=tf.float32),
            tf.Variable(hidden_bias, name='hidden_bias', dtype=tf.float32),
            tf.Variable(output_weights,
                        name='output_weights',
                        dtype=tf.float32),
            tf.Variable(output_bias, name='output_bias', dtype=tf.float32)
        ]
        ph_dict = utils.create_placeholders_for_q_net(tf_vars)
        output = utils.build_dummy_q_net([[3.]], [[2., 1.]], ph_dict, tf_vars)

        with self.test_session() as sess:
            self.assertAllClose(
                [[.6]],
                sess.run(output,
                         feed_dict={
                             ph_dict['{}_ph'.format(tf_vars[0].name)]:
                             hidden_weights,
                             ph_dict['{}_ph'.format(tf_vars[1].name)]:
                             hidden_bias,
                             ph_dict['{}_ph'.format(tf_vars[2].name)]:
                             output_weights,
                             ph_dict['{}_ph'.format(tf_vars[3].name)]:
                             output_bias,
                         }))
Example #3
0
  def testBuildDummyQNetSingleLinearUnit(self):
    weights = np.array([[.1], [.2], [.3]])
    bias = np.array([-.5])
    tf_vars = [tf.Variable(weights, name='weights', dtype=tf.float32),
               tf.Variable(bias, name='bias', dtype=tf.float32)]
    ph_dict = utils.create_placeholders_for_q_net(tf_vars)
    output = utils.build_dummy_q_net([[3.]], [[2., 1.]], ph_dict, tf_vars)

    with self.test_session() as sess:
      self.assertAllClose([[.5]], sess.run(output, feed_dict={
          ph_dict['{}_ph'.format(tf_vars[0].name)]: weights,
          ph_dict['{}_ph'.format(tf_vars[1].name)]: bias,
      }))