def __init__(self, cfg_parser: ConfigurationManager):
        super().__init__(cfg_parser, QuantileRegressionAgent.head)
        self.cfg = cfg_parser.parse_and_return_dictionary(
            "AGENT", QuantileRegressionAgent.required_params)

        self.loss = self.build_loss_op()

        self.prepare(self.loss)
    def __init__(self, cfg_parser: ConfigurationManager, agent: Agent):
        act_plcy_cfg = cfg_parser.parse_and_return_dictionary(
            "POLICY.EXPLORATION_STRATEGY", Policy.required_params)

        if act_plcy_cfg["TYPE"] == "EPSILON_GREEDY":
            self.policy = EpsilonGreedy(cfg_parser, agent)
        elif act_plcy_cfg["TYPE"] == "SOFTMAX":
            self.policy = SoftMax(agent)
        else:
            raise NotImplementedError
Example #3
0
    def __init__(self, config_parser: ConfigurationManager,
                 net: network.GeneralNetwork):
        super().__init__(config_parser, net)

        self.cfg = config_parser.parse_and_return_dictionary(
            "HEAD", SoftmaxFixedAtomsDistributionalHead.required_params)

        self.q_dist = tf.nn.softmax(self.q_dist,
                                    name="state_action_value_dist",
                                    axis=-1)
Example #4
0
    def __init__(self, env: Environment, config_parser: ConfigurationManager):
        gym.Wrapper.__init__(self, env.env)
        self.cfg = config_parser.parse_and_return_dictionary(
            "ENVIRONMENT", NetworkActionToEnvAction.required_params)

        if "ACTION_SPECIFICATIONS" in self.cfg:
            self.actions = self.cfg["ACTION_SPECIFICATIONS"]
        else:
            self.actions = list(
                range(config_parser.parsed_json["DEFAULT_NUM_ACTIONS"]))
Example #5
0
    def __init__(self, config_parser: ConfigurationManager):
        self.cfg = config_parser.parse_and_return_dictionary(
            "ENVIRONMENT", Head.required_params)

        if "ACTION_SPECIFICATIONS" in self.cfg:
            self.num_actions = len(self.cfg["ACTION_SPECIFICATIONS"])
        else:
            self.num_actions = config_parser.parsed_json["DEFAULT_NUM_ACTIONS"]

        self.greedy_action = None
Example #6
0
    def __init__(self, cfg_parser: ConfigurationManager, head):
        super().__init__()

        from util.util import build_train_and_target_general_network_with_head, get_session
        self.sess = get_session(cfg_parser)

        self.train_network_base, self.train_network, \
            self.target_network_base, self.target_network, self.copy_op, self.saver = \
                build_train_and_target_general_network_with_head(head, cfg_parser)

        from memory.experience_replay import ExperienceReplay
        self.experience_replay = ExperienceReplay(cfg_parser)

        self.cfg_parser = cfg_parser

        from function_approximator.head import QNetworkHead

        self.train_network: QNetworkHead

        self.target_network: QNetworkHead

        cfg_parser["NUM_ACTIONS"] = self.train_network.num_actions

        self.cfg = cfg_parser.parse_and_return_dictionary(
            "AGENT", BaseDQNBasedAgent.required_params)

        self.train_step = None

        self.action_placeholder = tf.placeholder(name="action",
                                                 dtype=tf.int32,
                                                 shape=[
                                                     None,
                                                 ])
        self.reward_placeholder = tf.placeholder(name="reward",
                                                 dtype=tf.float32,
                                                 shape=(None, ))
        # TODO: Optimize memory uint8 -> bool (check if casting works to float)
        self.terminal_placeholder = tf.placeholder(name="terminal",
                                                   dtype=tf.uint8,
                                                   shape=(None, ))

        self.predict_calls = 0
        self.train_calls = 0
        self.num_updates = tf.Variable(initial_value=0,
                                       dtype=tf.int32,
                                       trainable=False)

        self.batch_dim_range = tf.range(tf.shape(self.train_network_base.x)[0],
                                        dtype=tf.int32)

        self.policy = None
    def __init__(self, cfg_parser: ConfigurationManager):
        super().__init__(cfg_parser, CategoricalAgent.head)
        self.cfg = cfg_parser.parse_and_return_dictionary(
            "AGENT", CategoricalAgent.required_params)

        self.cfg["NB_ATOMS"] = self.cfg_parser["HEAD.NB_ATOMS"]

        self.Z, self.delta_z = np.linspace(self.cfg["V_MIN"],
                                           self.cfg["V_MAX"],
                                           self.cfg["NB_ATOMS"],
                                           retstep=True)

        self.loss = self.build_loss_op()

        self.prepare(self.loss)
Example #8
0
    def __init__(self, config_parser: ConfigurationManager):
        self.cfg = config_parser.parse_and_return_dictionary(
            "NETWORK", GeneralNetwork.required_params)

        if "STATE_DIMENSIONS" in self.cfg:
            obs_shape = [int(i) for i in self.cfg["STATE_DIMENSIONS"]]
        else:
            obs_shape = config_parser.parsed_json["DEFAULT_OBS_DIMS"]

        # Input
        self.x = tf.placeholder(name="state",
                                dtype=tf.float32,
                                shape=(None, *obs_shape))

        # Convolutional Layers
        self.conv_outputs = []
        for CONV_LAYER_SPEC in self.cfg["CONVOLUTIONAL_LAYERS_SPEC"]:
            self.conv_outputs.append(
                layers.conv2d(
                    name="conv_layer_" + str(len(self.conv_outputs) + 1),
                    inputs=self.x
                    if len(self.conv_outputs) == 0 else self.conv_outputs[-1],
                    filters=CONV_LAYER_SPEC["filters"],
                    kernel_size=CONV_LAYER_SPEC["kernel_size"],
                    strides=CONV_LAYER_SPEC["strides"],
                    activation=tf.nn.relu))

        if len(self.cfg["CONVOLUTIONAL_LAYERS_SPEC"]) > 0:
            # Flatten
            self.flattened_conv_output = tf.layers.flatten(
                name="conv_output_flattener", inputs=self.conv_outputs[-1])

            last_out = self.flattened_conv_output
        else:
            last_out = self.x

        # Hidden Layer
        self.dense_outputs = []
        for DENSE_LAYER_SPEC in self.cfg["DENSE_LAYERS_SPEC"]:
            self.dense_outputs.append(
                layers.dense(
                    name="fc_layer_" + str(len(self.dense_outputs) + 1),
                    inputs=last_out if len(
                        self.dense_outputs) == 0 else self.dense_outputs[-1],
                    units=DENSE_LAYER_SPEC,
                    activation=tf.nn.relu))

        self.last_op = self.dense_outputs[-1]
Example #9
0
def get_session(cfg_params: ConfigurationManager):
    required_params = []

    tf_params = cfg_params.parse_and_return_dictionary("TENSORFLOW",
                                                       required_params)

    config = tf.ConfigProto()

    if "ALLOW_GPU_GROWTH" not in tf_params or not tf_params["ALLOW_GPU_GROWTH"]:
        config.gpu_options.allow_growth = True

    if "INTRA_OP_PARALLELISM" in tf_params:
        config.intra_op_parallelism_threads = tf_params["INTRA_OP_PARALLELISM"]
    if "INTER_OP_PARALLELISM" in tf_params:
        config.inter_op_parallelism_threads = tf_params["INTER_OP_PARALLELISM"]

    return tf.Session(config=config)
Example #10
0
    def __init__(self, config_parser: ConfigurationManager,
                 net: network.GeneralNetwork):
        super().__init__(config_parser)

        self.cfg = config_parser.parse_and_return_dictionary(
            "HEAD", FixedAtomsDistributionalHead.required_params)

        # State-Action-Value Distributions (as a flattened vector)
        self.flattened_dist = layers.dense(name="flattened_dists",
                                           inputs=net.last_op,
                                           units=self.num_actions *
                                           self.cfg["NB_ATOMS"],
                                           activation=None)

        # Unflatten
        self.q_dist = tf.reshape(self.flattened_dist,
                                 [-1, self.num_actions, self.cfg["NB_ATOMS"]],
                                 name="per_action_dist")

        self.q = tf.reduce_mean(self.q_dist, axis=-1)

        self.greedy_action = tf.cast(tf.squeeze(tf.argmax(self.q, axis=-1)),
                                     dtype=tf.int32)
Example #11
0
    def __init__(self, config_parser: ConfigurationManager):
        self.cfg = config_parser.parse_and_return_dictionary(
            "EXPERIENCE_REPLAY", ExperienceReplay.required_params)

        self.memory = deque(maxlen=self.cfg["EXPERIENCE_REPLAY_SIZE"])
Example #12
0
    def __init__(self, config_parser: ConfigurationManager,
                 net: network.GeneralNetwork):
        super().__init__(config_parser)

        self.cfg = config_parser.parse_and_return_dictionary(
            "HEAD", IQNHead.required_params)

        self.psi = net.last_op

        self.num_samples = tf.placeholder(dtype=tf.int32,
                                          shape=[],
                                          name="num_samples")

        # Preprocessed tau (choose number of samples and pass through beta as necessary)
        from action_policy.distorted_expectation import distorted_expectation, get_uniform_dist
        self.uniform_tau = get_uniform_dist(psi=self.psi,
                                            N_placeholder=self.num_samples)

        self.distorted_tau = distorted_expectation(
            config_parser, psi=self.psi, N_placeholder=self.num_samples)
        import math as m
        pi = tf.constant(m.pi)

        cos_embed = tf.layers.Dense(units=self.cfg["EMBEDDING_SIZE"],
                                    activation=tf.nn.relu,
                                    name="cosine_embedding")

        self.distorted_tau_phi = cos_embed(
            tf.cos(
                tf.einsum(
                    'bn,j->bnj', self.distorted_tau,
                    tf.range(self.cfg["EMBEDDING_SIZE"], dtype=tf.float32)) *
                pi))

        mul_distorted = tf.einsum('bnj,bj->bnj', self.distorted_tau_phi,
                                  self.psi)

        ###

        self.uniform_tau_phi = cos_embed(
            tf.cos(
                tf.einsum(
                    'bn,j->bnj', self.uniform_tau,
                    tf.range(self.cfg["EMBEDDING_SIZE"], dtype=tf.float32)) *
                pi))

        mul_uniform = tf.einsum('bnj,bj->bnj', self.uniform_tau_phi, self.psi)

        ###

        q_dist_layer = tf.layers.Dense(units=self.num_actions,
                                       activation=None,
                                       name="q_dist")

        self.q_dist = tf.transpose(q_dist_layer(mul_uniform), perm=[0, 2, 1])

        self.q_undistorted = tf.reduce_mean(self.q_dist, axis=-1)

        self.q_dist_distorted = tf.transpose(q_dist_layer(mul_distorted),
                                             perm=[0, 2, 1])

        self.q = tf.reduce_mean(self.q_dist_distorted, axis=-1)

        self.greedy_action = tf.cast(tf.squeeze(tf.argmax(self.q, axis=-1)),
                                     dtype=tf.int32)