def test_Sum(tmpdir): in1_data = np.asarray([[1., 2., 3., 4.]], np.float32) in2_data = np.asarray([[0., 5., -3., 2.]], np.float32) in1 = C.input_variable(np.shape(in1_data)) in2 = C.input_variable(np.shape(in2_data)) model = C.sum([in1, in2]) verify_two_input(model, in1_data, in2_data, tmpdir, 'Sum_2')
def test_Sum(tmpdir, dtype): with C.default_options(dtype = dtype): in1_data = np.asarray([[1., 2., 3., 4.]], dtype = dtype) in2_data = np.asarray([[0., 5., -3., 2.]], dtype = dtype) in1 = C.input_variable(np.shape(in1_data)) in2 = C.input_variable(np.shape(in2_data)) model = C.sum([in1, in2]) verify_two_input(model, in1_data, in2_data, tmpdir, 'Sum_2')
def test_Sum(tmpdir, dtype): with C.default_options(dtype=dtype): in1_data = np.asarray([[1., 2., 3., 4.]], dtype=dtype) in2_data = np.asarray([[0., 5., -3., 2.]], dtype=dtype) in1 = C.input_variable(np.shape(in1_data)) in2 = C.input_variable(np.shape(in2_data)) model = C.sum([in1, in2]) verify_two_input(model, in1_data, in2_data, tmpdir, 'Sum_2')
def _build_network(self, pretrained_policy): self.input = C.input_variable(self.observation_space_shape, name='image frame') self.target = C.input_variable((1, ), name='q_target') self.action_index = C.input_variable((1, )) one_hot_action = C.ops.squeeze( C.one_hot(self.action_index, self.num_actions)) if pretrained_policy is None: h = C.layers.Dense(64, activation=C.relu, name='dense_1')(self.input) h = C.layers.Dense(32, activation=C.relu, name='dense_1')(h) self.action_probabilities = C.layers.Dense(self.num_actions, activation=C.softmax, name='dense_1')(h) else: self.action_probabilities = C.Function.load(pretrained_policy)( self.input) selected_action_probablity = C.ops.times_transpose( self.action_probabilities, one_hot_action) self.log_probability = C.ops.log(selected_action_probablity) self.loss = C.sum(self.log_probability * self.target)