コード例 #1
0
    def __init__(self, name=None, dim_out=None, observation_dimensions=None, num_actions=None, gate_fun=None,
                 fully_connected_layers=None, SEED=None, model_dictionary=None, reward_path=False):
        super().__init__()

        " Model dictionary for saving and restoring "
        if model_dictionary is None:
            self._model_dictionary = {"model_name": name,
                                      "output_dims": dim_out,
                                      "observation_dimensions": observation_dimensions,
                                      "num_actions": num_actions,
                                      "gate_fun": gate_fun,
                                      "full_layers": fully_connected_layers,
                                      "reward_path": reward_path}
        else:
            self._model_dictionary = model_dictionary

        " Loading Variables From Dictionary "
        fully_connected_layers = self._model_dictionary["full_layers"]
        name = self._model_dictionary["model_name"]
        dim_out = self._model_dictionary["output_dims"]
        gate_fun = self._model_dictionary["gate_fun"]
        reward_path = self._model_dictionary["reward_path"]

        " Reward Path Flag "
        if reward_path:
            train_vars_dims = 2
        else:
            train_vars_dims = 1

        " Dimensions "
        dim_in = []
        for i in range(train_vars_dims):
            di = [np.prod(self._model_dictionary["observation_dimensions"])] \
                 + self._model_dictionary["output_dims"][i][:-1]
            dim_in.append(di)
        actions = self._model_dictionary["num_actions"]
        row_and_action_number = 2

        " Placehodler "
        self.x_frames = tf.placeholder(tf.float32,                                      # input frames
                                       shape=(None, np.prod(self._model_dictionary["observation_dimensions"])))
        self.x_actions = tf.placeholder(tf.int32, shape=(None, row_and_action_number))  # input actions
        self.y = tf.placeholder(tf.float32, shape=None)                                 # target
        self.isampling = tf.placeholder(tf.float32, shape=None)                         # importance sampling term
        " Variables for Training "
        self.train_vars = []
        y_hats = []

        for i in range(train_vars_dims):
            " Fully Connected Layers "
            train_vars = []
            current_y_hat = self.x_frames
            for j in range(fully_connected_layers):
                # layer n + m: fully connected
                W, b, z_hat, y_hat = layers.fully_connected(
                    name, "full_"+str(j + 1)+"_"+str(i), current_y_hat, dim_in[i][j], dim_out[i][j],
                    tf.random_normal_initializer(stddev=1.0 / np.sqrt(dim_in[i][j]), seed=SEED), gate_fun)

                current_y_hat = y_hat
                train_vars.extend([W, b])
            y_hats.append(current_y_hat)
            self.train_vars.append(train_vars)

        combined_y_hat = tf.concat(y_hats, 1)

        """ Output layer """
        # output layer: fully connected
        if reward_path:
            final_dim_in = dim_out[0][-1] + dim_out[1][-1]
        else:
            final_dim_in = dim_out[0][-1]
        W, b, z_hat, self.y_hat = layers.fully_connected(
            name, "output_layer", combined_y_hat, final_dim_in, actions,
            tf.random_normal_initializer(stddev=1.0 / np.sqrt(final_dim_in), seed=SEED), linear_transfer)
        for lst in self.train_vars:
            lst.extend([W, b])

        # Obtaining y_hat and Scaling by the Importance Sampling
        y_hat = tf.gather_nd(self.y_hat, self.x_actions)
        y_hat = tf.multiply(y_hat, self.isampling)
        y = tf.multiply(self.y, self.isampling)
        # Temporal Difference Error
        self.td_error = tf.subtract(y_hat, y)
        # Loss
        self.train_loss = tf.reduce_sum(tf.pow(self.td_error, 2)) # Squared TD error
コード例 #2
0
    def __init__(self, config=None, name="default", SEED=None):
        super().__init__()

        assert isinstance(config, Config)
        """ 
        Parameters in config:
        Name:                   Type:           Default:            Description: (Omitted when self-explanatory)
        dim_out                 list            [10,10,10]          the output dimensions of each layer, i.e. neurons
        filter_dims             list            [2,2]               the dimensions of each filter
        strides                 list            [4, 2]              strides use by each convolutional layer
        obs_dims                list            [4,84,84]           the dimensions of the observations seen by the agent
        num_actions             int             2                   the number of actions available to the agent
        gate_fun                tf gate fun     tf.nn.relu          the gate function used across the whole network
        conv_layers             int             2                   number of convolutional layers
        full_layers             int             1                   number of fully connected layers
        max_pool                bool            True                indicates whether to max pool between each conv layer
        frames_format           str             "NCHW"              Specifies the format of the frames fed to the network
        norm_factor             float           1                   Normalizes the frames by the value provided               
        """
        self.dim_out = check_attribute_else_default(config, 'dim_out', [10,10,10])
        self.filter_dims = check_attribute_else_default(config, 'filter_dims', [2,2])
        self.strides = check_attribute_else_default(config, 'strides', [4,2])
        channels, height, width = check_attribute_else_default(config, 'obs_dims', [4, 84, 84])
        num_actions = check_attribute_else_default(config, 'num_actions', 2)
        self.gate_fun = check_attribute_else_default(config, 'gate_fun', tf.nn.relu)
        self.convolutional_layers = check_attribute_else_default(config, 'conv_layers', 2)
        self.fully_connected_layers = check_attribute_else_default(config, 'full_layers', 1)
        self.max_pool = check_attribute_else_default(config, 'max_pool', True)
        self.frames_format = check_attribute_else_default(config, 'frames_format', 'NCHW')
        self.norm_factor = check_attribute_else_default(config, 'norm_factor', 1.)

        """
        Other Parameters:
        name - name of the network. Should be a string.
        """
        self.name = name
        row_and_action_number = 2
        total_layers = self.convolutional_layers + self.fully_connected_layers

        " Placehodler "
        self.x_frames = tf.placeholder(tf.float32, shape=(None, channels, height, width))   # input frames
        self.x_frames = tf.divide(self.x_frames, self.norm_factor)
        self.x_actions = tf.placeholder(tf.int32, shape=(None, row_and_action_number))      # input actions
        self.y = tf.placeholder(tf.float32, shape=None)                                     # target

        " Variables for Training "
        self.train_vars = []

        """ Convolutional layers """
        dim_in_conv = [channels] + self.dim_out[:self.convolutional_layers - 1]
        current_s_hat = self.x_frames
        if self.frames_format == "NHWC":
            current_s_hat = tf.transpose(current_s_hat, [0, 2, 3, 1])

        for i in range(self.convolutional_layers):
            if self.frames_format == "NHWC":
                out_height = np.ceil(current_s_hat.shape[1]._value / self.strides[i])
                out_width = np.ceil(current_s_hat.shape[2]._value / self.strides[i])
                centers_shape = np.array((out_height, out_width, self.dim_out[i]), dtype=np.uint32)
            else: # Format = "NCHW"
                out_height = np.ceil(current_s_hat.shape[2]._value / self.strides[i])
                out_width = np.ceil(current_s_hat.shape[3]._value /self.strides[i])
                centers_shape = np.array((self.dim_out[i], out_height, out_width), dtype=np.uint32)
            centers = tf.constant(np.random.uniform(0,1, size=centers_shape), dtype=tf.float32)
            stddev = tf.constant(1/self.dim_out[i], dtype=tf.float32)
            # layer n: convolutional
            W, b, z_hat, r_hat = layers.convolution_2d_rbf(
                self.name, "conv_rbf_"+str(i+1), current_s_hat, self.filter_dims[i], dim_in_conv[i], self.dim_out[i],
                tf.random_normal_initializer(stddev=1.0 / np.sqrt(self.filter_dims[i]**2 * dim_in_conv[i] + 1),
                                             seed=SEED),
                center=centers, stddev=stddev, stride=self.strides[i], format=self.frames_format)
            # layer n + 1/2: pool
            if self.max_pool:
                s_hat = tf.nn.max_pool(
                    r_hat, ksize=[1, 1, 2, 2], strides=[1, 1, 2, 2], padding="SAME")
            else:
                s_hat = r_hat

            current_s_hat = s_hat
            self.train_vars.extend([W, b])

        """ Fully Connected layers """
        shape = current_s_hat.get_shape().as_list()
        current_y_hat = tf.reshape(current_s_hat, [-1, shape[1] * shape[2] * shape[3]])
        # shape[-3:] are the last 3 dimensions. Shape has 4 dimensions: dim 1 = None, dim 2 =
        dim_in_fully = [np.prod(shape[-3:])] + self.dim_out[self.convolutional_layers: total_layers-1]
        dim_out_fully = self.dim_out[self.convolutional_layers:]
        for j in range(self.fully_connected_layers):
            centers_shape = (dim_in_fully[j], dim_out_fully[j])
            centers = tf.constant(np.random.uniform(low=0, high=1, size=centers_shape), dtype=tf.float32)
            stddev = tf.constant(1/dim_out_fully[j], dtype=np.float32)

            # layer n + m: fully connected
            W, b, z_hat, y_hat = layers.fully_connected_rbf(
                self.name, "full_rbf_"+str(j+1), current_y_hat, dim_in_fully[j], dim_out_fully[j],
                tf.random_normal_initializer(stddev=1.0 / np.sqrt(dim_in_fully[j]), seed=SEED),
                center=centers, stddev=stddev)

            current_y_hat = y_hat
            self.train_vars.extend([W, b])

        """ Output layer """
        # output layer: fully connected
        W, b, z_hat, self.y_hat = layers.fully_connected(
            self.name, "output_layer", current_y_hat, self.dim_out[-1], num_actions,
            tf.random_normal_initializer(stddev=1.0 / np.sqrt(self.dim_out[-1]), seed=SEED), linear_transfer)
        self.train_vars.extend([W, b])
        self.train_vars = [self.train_vars]

        # Obtaining y_hat and Scaling by the Importance Sampling
        y_hat = tf.gather_nd(self.y_hat, self.x_actions)
        y = self.y
        # Temporal Difference Error
        self.td_error = tf.subtract(y, y_hat)
        # Loss
        self.train_loss = tf.reduce_sum(tf.pow(self.td_error, 2))
コード例 #3
0
    def __init__(self, name=None, dim_out=None, filter_dims=None, observation_dimensions=None, num_actions=None,
                 gate_fun=None, convolutional_layers=None, fully_connected_layers=None, SEED=None,
                 model_dictionary=None, eta=1.0, reward_path=False):
        super().__init__()
        if model_dictionary is None:
            self._model_dictionary = {"model_name": name,
                                      "output_dims": dim_out,
                                      "filter_dims": filter_dims,
                                      "observation_dimensions": observation_dimensions,
                                      "num_actions": num_actions,
                                      "gate_fun": gate_fun,
                                      "conv_layers": convolutional_layers,
                                      "full_layers": fully_connected_layers,
                                      "eta": eta,
                                      "reward_path": reward_path}
        else:
            self._model_dictionary = model_dictionary
        " Loading Variables From Dictionary "
        eta = self._model_dictionary["eta"]
        fully_connected_layers = self._model_dictionary["full_layers"]
        convolutional_layers = self._model_dictionary["conv_layers"]
        name = self._model_dictionary["model_name"]
        dim_out = self._model_dictionary["output_dims"]
        gate_fun = self._model_dictionary["gate_fun"]
        filter_dims = self._model_dictionary["filter_dims"]
        reward_path = self._model_dictionary["reward_path"]
        " Reward Path Flag "
        if reward_path:
            train_vars_dims = 2
        else:
            train_vars_dims = 1
        " Dimensions "
        height, width, channels = self._model_dictionary["observation_dimensions"]
        actions = self._model_dictionary["num_actions"]
        row_and_action_number = 2
        total_layers = convolutional_layers + fully_connected_layers
        " Placehodler "
        self.x_frames = tf.placeholder(tf.float32, shape=(None, height, width, channels))   # input frames
        self.x_actions = tf.placeholder(tf.int32, shape=(None, row_and_action_number))      # input actions
        self.y = tf.placeholder(tf.float32, shape=None)                                     # target
        self.isampling = tf.placeholder(tf.float32, shape=None)                             # importance sampling term
        " Variables for Training "
        self.train_vars = []
        y_hats = []

        for k in range(train_vars_dims):
            """ Convolutional layers """
            temp_train_vars = []
            dim_in_conv = [channels] + dim_out[k][:convolutional_layers - 1]
            current_s_hat = self.x_frames
            for i in range(convolutional_layers):
                # layer n: convolutional
                W, b, z_hat, r_hat = layers.convolution_2d(
                    name, "conv_"+str(i+1)+"_"+str(k), current_s_hat, filter_dims[i], dim_in_conv[i], dim_out[k][i],
                    tf.random_normal_initializer(stddev=1.0 / np.sqrt(filter_dims[i]**2 * dim_in_conv[i] + 1), seed=SEED),
                    gate_fun)
                # layer n + 1/2: pool
                s_hat = tf.nn.max_pool(
                    r_hat, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")

                current_s_hat = s_hat
                temp_train_vars.extend([W, b])

            """ Fully Connected layers """
            shape = current_s_hat.get_shape().as_list()
            current_y_hat = tf.reshape(current_s_hat, [-1, shape[1] * shape[2] * shape[3]])
            # shape[-3:] are the last 3 dimensions. Shape has 4 dimensions: dim 1 = None, dim 2 =
            dim_in_fully = [np.prod(shape[-3:])] + dim_out[k][convolutional_layers: total_layers-1]
            dim_out_fully = dim_out[k][convolutional_layers:]
            for j in range(fully_connected_layers):
                # layer n + m: fully connected
                W, b, z_hat, y_hat = layers.fully_connected(
                    name, "full_"+str(j+1)+"_"+str(k), current_y_hat, dim_in_fully[j], dim_out_fully[j],
                    tf.random_normal_initializer(stddev=1.0 / np.sqrt(dim_in_fully[j]), seed=SEED), gate_fun)

                current_y_hat = y_hat
                temp_train_vars.extend([W, b])

            y_hats.append(current_y_hat)
            self.train_vars.append(temp_train_vars)

        combined_y_hat = tf.concat(y_hats, 1)

        """ Output layer """
        # output layer: fully connected
        if reward_path:
            final_dim_in = dim_out[0][-1] + dim_out[1][-1]
        else:
            final_dim_in = dim_out[0][-1]
        W, b, z_hat, self.y_hat = layers.fully_connected(
            name, "output_layer", combined_y_hat,final_dim_in, actions,
            tf.random_normal_initializer(stddev=1.0 / np.sqrt(final_dim_in), seed=SEED), linear_transfer)
        for lst in self.train_vars:
            lst.extend([W, b])

        # Obtaining y_hat and Scaling by the Importance Sampling
        # y_hat = tf.gather_nd(self.y_hat, self.x_actions)
        y_hat = tf.multiply(self.y_hat, self.isampling)
        y = tf.multiply(self.y, self.isampling)
        # Temporal Difference Error
        self.td_error = tf.subtract(y_hat, y)
        self.squared_td_error = tf.reduce_sum(tf.pow(self.td_error, 2))

        # Regularizer
        regularizer = 0
        for lst in self.train_vars:
            for variable in lst:
                regularizer += tf.nn.l2_loss(variable)

        # Loss
        self.train_loss = self.squared_td_error + (eta * regularizer)
コード例 #4
0
    def __init__(self, config=None, name="default", SEED=None):
        super().__init__()

        assert isinstance(config, Config)
        """ 
        Parameters in config:
        Name:                   Type:           Default:            Description: (Omitted when self-explanatory)
        dim_out                 list            [10,10,10]          the output dimensions of each layer, i.e. neurons
        obs_dims                list            [2]                 the dimensions of the observations seen by the agent
        num_actions             int             3                   the number of actions available to the agent
        gate_fun                tf gate fun     tf.nn.relu          the gate function used across the whole network
        full_layers             int             3                   number of fully connected layers
        """
        self.dim_out = check_attribute_else_default(config, 'dim_out', [10,10,10])
        self.obs_dims = check_attribute_else_default(config, 'obs_dims', [2])
        self.num_actions = check_attribute_else_default(config, 'num_actions', 3)
        self.gate_fun = check_attribute_else_default(config, 'gate_fun', tf.nn.relu)
        self.full_layers = check_attribute_else_default(config, 'full_layers', 3)

        """
        Other Parameters:
        name - name of the network. Should be a string.
        """
        self.name = name

        " Dimensions "
        dim_in = [np.prod(self.obs_dims)] + self.dim_out[:-1]
        row_and_action_number = 2
        " Placehodler "
        self.x_frames = tf.placeholder(tf.float32, shape=(None, dim_in[0]))             # input frames
        self.x_actions = tf.placeholder(tf.int32, shape=(None, row_and_action_number))  # input actions
        self.y = tf.placeholder(tf.float32, shape=None)                                 # target
        " Variables for Training "
        self.train_vars = []

        " Fully Connected Layers "
        current_y_hat = self.x_frames
        for j in range(self.full_layers):
            # layer n + m: fully connected
            W, b, z_hat, y_hat = layers.fully_connected(
                self.name, "full_" + str(j + 1), current_y_hat, dim_in[j], self.dim_out[j],
                tf.random_normal_initializer(stddev=1.0 / np.sqrt(dim_in[j]), seed=SEED), self.gate_fun)

            current_y_hat = y_hat
            self.train_vars.extend([W, b])

        """ Output layer """
        # output layer: fully connected
        W, b, z_hat, self.y_hat = layers.fully_connected(
            self.name, "output_layer", current_y_hat, self.dim_out[-1], self.num_actions,
            tf.random_normal_initializer(stddev=1.0 / np.sqrt(self.dim_out[-1]), seed=SEED), linear_transfer)
        self.train_vars.extend([W, b])
        self.train_vars = [self.train_vars]

        # Obtaining y_hat and Scaling by the Importance Sampling
        y_hat = tf.gather_nd(self.y_hat, self.x_actions)
        y = self.y
        # Temporal Difference Error
        self.td_error = tf.subtract(y, y_hat)
        # Loss
        self.train_loss = tf.reduce_sum(tf.pow(self.td_error, 2))