Exemple #1
0
def test_Sum(tmpdir):
    in1_data = np.asarray([[1., 2., 3., 4.]], np.float32)
    in2_data = np.asarray([[0., 5., -3., 2.]], np.float32)

    in1 = C.input_variable(np.shape(in1_data))
    in2 = C.input_variable(np.shape(in2_data))
    model = C.sum([in1, in2])

    verify_two_input(model, in1_data, in2_data, tmpdir, 'Sum_2')
Exemple #2
0
def test_Sum(tmpdir, dtype):
    with C.default_options(dtype = dtype):
        in1_data = np.asarray([[1., 2., 3., 4.]], dtype = dtype)
        in2_data = np.asarray([[0., 5., -3., 2.]], dtype = dtype)

        in1 = C.input_variable(np.shape(in1_data))
        in2 = C.input_variable(np.shape(in2_data))
        model = C.sum([in1, in2])

        verify_two_input(model, in1_data, in2_data, tmpdir, 'Sum_2')
Exemple #3
0
def test_Sum(tmpdir, dtype):
    with C.default_options(dtype=dtype):
        in1_data = np.asarray([[1., 2., 3., 4.]], dtype=dtype)
        in2_data = np.asarray([[0., 5., -3., 2.]], dtype=dtype)

        in1 = C.input_variable(np.shape(in1_data))
        in2 = C.input_variable(np.shape(in2_data))
        model = C.sum([in1, in2])

        verify_two_input(model, in1_data, in2_data, tmpdir, 'Sum_2')
 def _build_network(self, pretrained_policy):
     self.input = C.input_variable(self.observation_space_shape,
                                   name='image frame')
     self.target = C.input_variable((1, ), name='q_target')
     self.action_index = C.input_variable((1, ))
     one_hot_action = C.ops.squeeze(
         C.one_hot(self.action_index, self.num_actions))
     if pretrained_policy is None:
         h = C.layers.Dense(64, activation=C.relu,
                            name='dense_1')(self.input)
         h = C.layers.Dense(32, activation=C.relu, name='dense_1')(h)
         self.action_probabilities = C.layers.Dense(self.num_actions,
                                                    activation=C.softmax,
                                                    name='dense_1')(h)
     else:
         self.action_probabilities = C.Function.load(pretrained_policy)(
             self.input)
     selected_action_probablity = C.ops.times_transpose(
         self.action_probabilities, one_hot_action)
     self.log_probability = C.ops.log(selected_action_probablity)
     self.loss = C.sum(self.log_probability * self.target)