コード例 #1
0
ファイル: network.py プロジェクト: BYU-PCCL/Jedi
    def __init__(self, args, environment):
        self.args = args
        self.environment = environment
        self.tensorboard_process = None
        self.training_iterations = 0
        self.batch_loss = 0
        self.learning_rate = 0

        self.sess = self.start_session(args)
        self.global_step = tf.Variable(0, name='global_step', trainable=False)
        self.learning_rate_op = self.build_learning_rate(step=self.global_step)

        with op.context(floatx=tf.float32, floatsafe=False):

            self.train_op = None
            self.priority_op = None
            self.loss_op = None
            self.agent_output_action = None
            self.agent_output = None
            self.agent_network = None

            self.build_networks()

            assert (self.train_op is not None and
                    self.priority_op is not None and
                    self.loss_op is not None and
                    self.agent_output_action is not None and
                    self.agent_output is not None), 'Network implementation must define the operations found on this line'

            self.assign_ops = self.build_assign_ops()

        self.initialize()
コード例 #2
0
ファイル: network.py プロジェクト: BYU-PCCL/Jedi
    def build(self, states):
        with op.context(default_activation_fn='relu'):
            conv1, w1, b1 = op.conv2d(states, size=8, filters=32, stride=4, name='conv1')
            conv2, w2, b2 = op.conv2d(conv1, size=4, filters=64, stride=2, name='conv2')
            conv3, w3, b3 = op.conv2d(conv2, size=3, filters=64, stride=1, name='conv3')
            fc4, w4, b4 = op.linear(op.flatten(conv3, name="fc4"), 512, name='fc4')
            output, w5, b5 = op.linear(fc4, self.environment.get_num_actions(), activation_fn='none', name='output')

            return output
コード例 #3
0
ファイル: network.py プロジェクト: BYU-PCCL/Jedi
    def build(self, states):
        with op.context(default_activation_fn='relu'):
            fc1,    w1, b1 = op.linear(op.flatten(states, name="fc1_flatten"), 500, name='fc1')
            fc2,    w2, b2 = op.linear(fc1, 500, name='fc2')
            value,  w3, b3 = op.linear(fc2, self.environment.get_num_actions(), activation_fn='none', name='value')
            advantages, w4, b4 = op.linear(fc2, self.environment.get_num_actions(), activation_fn='none', name='advantages')

            # Dueling DQN - http://arxiv.org/pdf/1511.06581v3.pdf
            output = value + (advantages - op.mean(advantages, keep_dims=True))

        return output
コード例 #4
0
ファイル: network.py プロジェクト: BYU-PCCL/Jedi
    def build(self, states):
        with op.context(default_activation_fn='relu'):
            conv1,    w1, b1 = op.conv2d(states, size=8, filters=32, stride=4, name='conv1')
            conv2,    w2, b2 = op.conv2d(conv1, size=4, filters=64, stride=2, name='conv2')
            conv3,    w3, b3 = op.conv2d(conv2, size=3, filters=64, stride=1, name='conv3')
            fc4,      w4, b4 = op.linear(op.flatten(conv3, name="fc4"), 512, name='fc4')
            output,   w5, b5 = op.linear(fc4, self.environment.get_num_actions(), activation_fn='none', name='output')
            raw_sigma, w6, b6 = op.linear(fc4, self.environment.get_num_actions(), name='variance')

            raw_sigma += 0.0001  # to avoid divide by zero
            sigma = tf.exp(raw_sigma)

        return output, sigma
コード例 #5
0
ファイル: network.py プロジェクト: BYU-PCCL/Jedi
    def build(self, states):

        with tf.variable_scope('net'), op.context(default_activation_fn='relu'):
            conv1,     w1, b1 = op.conv2d(states, size=8, filters=32, stride=4, name='conv1')
            conv2,     w2, b2 = op.conv2d(conv1, size=4, filters=64, stride=2, name='conv2')
            conv3,     w3, b3 = op.conv2d(conv2, size=3, filters=64, stride=1, name='conv3')
            fc4,       w4, b4 = op.linear(op.flatten(conv3), 256, name='fc4')

            h,         w5, b5 = op.linear(fc4, 256, name='h')
            h1,        w6, b6 = op.linear(h, 256, name='h1')
            hhat,      w7, b7 = op.linear(h1, 256, name='hhat')

            fc8,       w8, b8 = op.linear(op.merge(h, hhat, name="fc8"), 256, name='fc8')
            output,    w9, b9 = op.linear(fc8, self.environment.get_num_actions(), activation_fn='none', name='output')

        with tf.name_scope('prediction'), tf.variable_scope('net', reuse=True), op.context(default_activation_fn='relu'):
            hhat_conv1, _, _ = op.conv2d(self.inputs.lookaheads, size=8, filters=32, stride=4, name='conv1')
            hhat_conv2, _, _ = op.conv2d(hhat_conv1, size=4, filters=64, stride=2, name='conv2')
            hhat_conv3, _, _ = op.conv2d(hhat_conv2, size=3, filters=64, stride=1, name='conv3')
            hhat_truth, _, _ = op.linear(op.flatten(hhat_conv3), 256, name='fc4')

            self.constraint_error = tf.reduce_mean((hhat - hhat_truth)**2, reduction_indices=1, name='prediction_error')

        return output
コード例 #6
0
ファイル: network.py プロジェクト: BYU-PCCL/Jedi
    def build(self, states):
        with op.context(default_activation_fn='relu'):
            conv1, w1, b1 = op.conv2d(states, size=8, filters=32, stride=4, name='conv1')
            conv2, w2, b2 = op.conv2d(conv1, size=4, filters=64, stride=2, name='conv2')
            conv3, w3, b3 = op.conv2d(conv2, size=3, filters=64, stride=1, name='conv3')
            conv3_flatten = op.flatten(conv3, name="conv3_flatten")

            fc4_value, w4, b4 = op.linear(conv3_flatten, 512, name='fc4_value')
            value, w5, b5 = op.linear(fc4_value, 1, activation_fn='none', name='value')

            fc4_advantage, w6, b6 = op.linear(conv3_flatten, 512, name='fc4_advantages')
            advantages, w7, b7 = op.linear(fc4_advantage, self.environment.get_num_actions(), activation_fn='none', name='advantages')

            # Dueling DQN - http://arxiv.org/pdf/1511.06581v3.pdf
            output = value + (advantages - op.mean(advantages, keep_dims=True))

            return output
コード例 #7
0
ファイル: network.py プロジェクト: BYU-PCCL/Jedi
    def build(self, states):
        with op.context(default_activation_fn='relu'):
            # Common Perception
            l1,     w1, b1 = op.conv2d(states, size=8, filters=32, stride=4, name='conv1')

            # A Side
            l2a,    w2, b2 = op.conv2d(l1, size=4, filters=64, stride=2, name='a_conv2')
            l2a_fc, w3, b3 = op.linear(op.flatten(l2a, name="a_fc4"), 32, activation_fn='none', name='a_fc3')

            # B Side
            l2b,    w4, b4 = op.conv2d(l1, size=4, filters=64, stride=2, name='b_conv2')
            l2b_fc, w5, b5 = op.linear(op.flatten(l2b, name="b_fc4"), 32, activation_fn='none', name='b_fc3')

            # Causal Matrix
            l2a_fc_e = op.expand(l2a_fc, 2, name='a')  # now ?x32x1
            l2b_fc_e = op.expand(l2b_fc, 1, name='b')  # now ?x1x32
            causes = op.flatten(tf.batch_matmul(l2a_fc_e, l2b_fc_e, name='causes'))

            l4,      w6, b6 = op.linear(causes, 512, name='l4')
            output,  w5, b5 = op.linear(l4, self.environment.get_num_actions(), activation_fn='none', name='output')

            return output