Beispiel #1
0
 def _create_placeholders_for_vars(self, scope, graph_keys=tf.GraphKeys.TRAINABLE_VARIABLES):
     var_list = tf.get_collection(graph_keys, scope=scope)
     placeholders = []
     for var in var_list:
         var_name = remove_scope_from_name(var.name, scope.split('/')[0])
         placeholders.append((var_name, tf.placeholder(tf.float32, shape=var.shape, name="%s_ph" % var_name)))
     return OrderedDict(placeholders)
Beispiel #2
0
    def build_graph(self):
        """
        Builds computational graph for policy
        """
        with tf.variable_scope(self.name, reuse=tf.AUTO_REUSE):
            # build the actual policy network
            args = create_rnn(
                name='rnn',
                cell_type=self._cell_type,
                output_dim=self.output_dim,
                hidden_sizes=self.hidden_sizes,
                hidden_nonlinearity=self.hidden_nonlinearity,
                output_nonlinearity=self.output_nonlinearity,
                input_dim=(
                    None,
                    None,
                    self.input_dim,
                ),
                input_var=self.input_var,
                state_var=self.state_var,
            )

            self.input_var, self.state_var, self.output_var, self.next_state_var, self.cell = args

        current_scope = tf.get_default_graph().get_name_scope()
        trainable_policy_vars = tf.get_collection(
            tf.GraphKeys.TRAINABLE_VARIABLES, scope=current_scope)
        self._params = OrderedDict([
            (remove_scope_from_name(var.name, current_scope), var)
            for var in trainable_policy_vars
        ])
Beispiel #3
0
    def build_graph(self):
        """
        Builds computational graph for policy
        """
        with tf.variable_scope(self.name, reuse=tf.AUTO_REUSE):
            # build the actual policy network

            self.input_var, self.cnn_output_var = create_cnn(
                name='cnn',
                hidden_nonlinearity=self.hidden_nonlinearity,
                kernel_sizes=self.kernel_sizes,
                strides=self.strides,
                num_filters=self.num_filters,
                input_dim=(None, ) + self.input_dim,
                input_var=self.input_var,
            )
            _, self.output_var = create_mlp(
                name='mlp',
                output_dim=self.output_dim,
                hidden_sizes=self.hidden_sizes,
                hidden_nonlinearity=self.hidden_nonlinearity,
                output_nonlinearity=self.output_nonlinearity,
                input_var=self.cnn_output_var,
                batch_normalization=self.batch_normalization,
            )

        current_scope = tf.get_default_graph().get_name_scope()
        trainable_policy_vars = tf.get_collection(
            tf.GraphKeys.TRAINABLE_VARIABLES, scope=current_scope)
        self._params = OrderedDict([
            (remove_scope_from_name(var.name, current_scope), var)
            for var in trainable_policy_vars
        ])
    def build_graph(self):
        """
        Builds computational graph for policy
        """
        with tf.variable_scope(self.name):
            # build the actual policy network
            rnn_outs = create_rnn(name='mean_network',
                                  cell_type=self._cell_type,
                                  output_dim=self.action_dim,
                                  hidden_sizes=self.hidden_sizes,
                                  hidden_nonlinearity=self.hidden_nonlinearity,
                                  output_nonlinearity=self.output_nonlinearity,
                                  input_dim=(None, None, self.obs_dim,),
                                  )

            self.obs_var, self.hidden_var, self.mean_var, self.next_hidden_var, self.cell = rnn_outs

            with tf.variable_scope("log_std_network"):
                log_std_var = tf.get_variable(name='log_std_var',
                                              shape=(1, self.action_dim,),
                                              dtype=tf.float32,
                                              initializer=tf.constant_initializer(self.init_log_std),
                                              trainable=self.learn_std
                                              )

                self.log_std_var = tf.maximum(log_std_var, self.min_log_std, name='log_std')

            # symbolically define sampled action and distribution
            self._dist = DiagonalGaussian(self.action_dim)

            # save the policy's trainable variables in dicts
            current_scope = tf.get_default_graph().get_name_scope()
            trainable_policy_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=current_scope)
            self.policy_params = OrderedDict([(remove_scope_from_name(var.name, current_scope), var) for var in trainable_policy_vars])
Beispiel #5
0
    def build_graph(self):
        """
        Builds computational graph for policy
        """
        # with tf.variable_scope(self.name, reuse=tf.AUTO_REUSE):
        # build the actual policy network
        self.input_var, self.output_var = create_mlp(
            name='mlp',
            output_dim=self.output_dim,
            hidden_sizes=self.hidden_sizes,
            hidden_nonlinearity=self.hidden_nonlinearity,
            output_nonlinearity=self.output_nonlinearity,
            input_dim=(
                None,
                self.input_dim,
            ),
            input_var=self.input_var,
            batch_normalization=self.batch_normalization,
        )

        # save the policy's trainable variables in dicts
        # current_scope = tf.get_default_graph().get_name_scope()
        current_scope = self.name
        trainable_policy_vars = tf.get_collection(
            tf.GraphKeys.TRAINABLE_VARIABLES, scope=current_scope)
        self._params = OrderedDict([
            (remove_scope_from_name(var.name, current_scope), var)
            for var in trainable_policy_vars
        ])
    def build_graph(self):
        """
        Builds computational graph for policy
        """
        with tf.variable_scope(self.name):
            # build the actual policy network
            rnn_outs = create_rnn(
                name='probs_network',
                cell_type=self._cell_type,
                output_dim=self.action_dim,
                hidden_sizes=self.hidden_sizes,
                hidden_nonlinearity=self.hidden_nonlinearity,
                output_nonlinearity=tf.nn.softmax,
                input_dim=(
                    None,
                    None,
                    self.obs_dim,
                ),
            )

            self.obs_var, self.hidden_var, self.probs_var, self.next_hidden_var, self.cell = rnn_outs

            # symbolically define sampled action and distribution
            self._dist = Discrete(self.action_dim)

            # save the policy's trainable variables in dicts
            current_scope = tf.get_default_graph().get_name_scope()
            trainable_policy_vars = tf.get_collection(
                tf.GraphKeys.TRAINABLE_VARIABLES, scope=current_scope)
            self.policy_params = OrderedDict([
                (remove_scope_from_name(var.name, current_scope), var)
                for var in trainable_policy_vars
            ])
Beispiel #7
0
    def build_graph(self):
        """
        Builds computational graph for policy
        """
        with tf.variable_scope(self.name, reuse=tf.AUTO_REUSE):
            # build the actual policy network
            self.obs_var, self.mean_var = create_mlp(
                name='mean_network',
                output_dim=self.action_dim,
                hidden_sizes=self.hidden_sizes,
                hidden_nonlinearity=self.hidden_nonlinearity,
                output_nonlinearity=self.output_nonlinearity,
                input_dim=(
                    None,
                    self.obs_dim,
                ),
            )

            with tf.variable_scope("log_std_network", reuse=tf.AUTO_REUSE):
                log_std_var = tf.get_variable(
                    name='log_std_var',
                    shape=(
                        1,
                        self.action_dim,
                    ),
                    dtype=tf.float32,
                    initializer=tf.constant_initializer(self.init_log_std),
                    trainable=self.learn_std,
                )

                self.log_std_var = tf.maximum(log_std_var,
                                              self.min_log_std,
                                              name='log_std')

            # symbolically define sampled action and distribution
            self.action_var = self.mean_var + tf.random_normal(
                shape=tf.shape(self.mean_var)) * tf.exp(log_std_var)
            self._dist = DiagonalGaussian(self.action_dim)

            # save the policy's trainable variables in dicts
            # current_scope = tf.get_default_graph().get_name_scope()
            current_scope = self.name
            trainable_policy_vars = tf.get_collection(
                tf.GraphKeys.TRAINABLE_VARIABLES, scope=current_scope)
            self.policy_params = OrderedDict([
                (remove_scope_from_name(var.name, current_scope), var)
                for var in trainable_policy_vars
            ])

            self.policy_params_ph = self._create_placeholders_for_vars(
                scope=self.name + "/mean_network")
            log_std_network_phs = self._create_placeholders_for_vars(
                scope=self.name + "/log_std_network")
            self.policy_params_ph.update(log_std_network_phs)
            self.policy_params_keys = self.policy_params_ph.keys()
Beispiel #8
0
    def __init__(self,
                 latent_dim,
                 img_size=(64, 64),
                 channels=3,
                 lr=1e-4,
                 step=0,
                 batch_size=32):
        """
        VAE Class

        Args:
            ds (int): dimension of the latent space
            img_size (tuple (int, int)): size of the image
            channels (int): number of channels [3 for rgb, 1 for grayscale]
            sess (tf.Session): tf.Session
            lr (float): learning rate
            out_dir (Path): output of the data directory
            step (int): initial training step
            batch_size (int): batch size
        """
        Serializable.quick_init(self, locals())

        self.latent_dim = latent_dim
        self.img_size = img_size
        self.n_channels = channels
        self.do = img_size[0] * img_size[1] * channels
        self.batch_shape = [-1, img_size[0], img_size[1], channels]
        self.lr = lr
        self.batch_size = batch_size

        self._assign_ops = None
        self._assign_phs = None

        self.writer = tf.summary.FileWriter(logger.get_dir())
        with tf.variable_scope('vae', reuse=tf.AUTO_REUSE):

            self.initialize_placeholders()
            self.initialize_objective()
            self.global_step = step

            with tf.variable_scope('decoder', reuse=tf.AUTO_REUSE):
                self.z = tf.placeholder(tf.float32, [None, self.latent_dim])
                self.decoder = self.decode_sym(self.z).probs

            current_scope = tf.get_default_graph().get_name_scope()
            trainable_policy_vars = tf.get_collection(
                tf.GraphKeys.TRAINABLE_VARIABLES, scope=current_scope)
            self.vae_params = OrderedDict([
                (remove_scope_from_name(var.name, current_scope), var)
                for var in trainable_policy_vars
            ])
    def build_graph(self):
        """
        Builds computational graph for policy
        """
        with tf.variable_scope(self.name, reuse=tf.AUTO_REUSE):
            # build the actual policy network
            self.obs_var, self.output_var = create_mlp(
                name='network',
                output_dim=2 * self.action_dim,
                hidden_sizes=self.hidden_sizes,
                hidden_nonlinearity=self.hidden_nonlinearity,
                output_nonlinearity=self.output_nonlinearity,
                input_dim=(
                    None,
                    self.obs_dim,
                ),
            )

            self.mean_var, self.log_std_var = tf.split(self.output_var,
                                                       2,
                                                       axis=-1)

            self.log_std_var = tf.clip_by_value(self.log_std_var,
                                                LOG_SIG_MIN,
                                                LOG_SIG_MAX,
                                                name='log_std')

            # symbolically define sampled action and distribution
            self.action_var = self.mean_var + tf.random_normal(
                shape=tf.shape(self.mean_var)) * tf.exp(self.log_std_var)

            self._dist = DiagonalGaussian(self.action_dim,
                                          squashed=self.squashed)

            # save the policy's trainable variables in dicts
            current_scope = self.name
            trainable_policy_vars = tf.get_collection(
                tf.GraphKeys.TRAINABLE_VARIABLES, scope=current_scope)
            self.policy_params = OrderedDict([
                (remove_scope_from_name(var.name, current_scope), var)
                for var in trainable_policy_vars
            ])

            self.policy_params_ph = self._create_placeholders_for_vars(
                scope=self.name + "/network")
            self.policy_params_keys = self.policy_params_ph.keys()
Beispiel #10
0
    def build_graph(self):
        """
        Builds computational graph for policy
        """
        with tf.variable_scope(self.name, reuse=tf.AUTO_REUSE):
            # build the actual policy network
            self.input_var, self.output_var = create_mlp(name='v_network',
                                                     output_dim=1,
                                                     hidden_sizes=self.hidden_sizes,
                                                     hidden_nonlinearity=self.hidden_nonlinearity,
                                                     output_nonlinearity=self.output_nonlinearity,
                                                     input_dim=(None, self.obs_dim + self.action_dim,),
                                                     )

            # save the policy's trainable variables in dicts
            # current_scope = tf.get_default_graph().get_name_scope()
            current_scope = self.name
            trainable_vfun_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=current_scope)
            self.vfun_params = OrderedDict([(remove_scope_from_name(var.name, current_scope), var) for var in trainable_vfun_vars])

            self.vfun_params_ph = self._create_placeholders_for_vars(scope=self.name + "/v_network")
            self.vfun_params_keys = self.vfun_params_ph.keys()
Beispiel #11
0
    def build_graph(self):
        """
        Builds computational graph for policy
        """
        with tf.variable_scope(self.name):

            instr_embedding = self._get_instr_embedding(obs.instr)
            x = torch.transpose(torch.transpose(obs.image, 1, 3), 2, 3)
            x = self.image_conv(x)
            for controler in self.controllers:
                x = controler(x, instr_embedding)
            x = F.relu(self.film_pool(x))
            x = x.reshape(x.shape[0], -1)

            hidden = (memory[:, :self.semi_memory_size],
                      memory[:, self.semi_memory_size:])
            hidden = self.memory_rnn(x, hidden)
            embedding = hidden[0]
            memory = torch.cat(hidden, dim=1)

            embedding = torch.cat((embedding, instr_embedding), dim=1)
            x = self.actor(embedding)
            dist = Categorical(logits=F.log_softmax(x, dim=1))

            # memory_rnn = tf.nn.rnn_cell.LSTMCell(self.memory_dim)  # TODO: set these

            rnn_outs = create_rnn(
                name='probs_network',
                cell_type='lstm',
                output_dim=self.action_dim,
                hidden_sizes=self.hidden_sizes,
                hidden_nonlinearity=self.hidden_nonlinearity,
                output_nonlinearity=tf.nn.softmax,
                input_dim=(
                    None,
                    None,
                    self.obs_dim,
                ),
            )

            # obs_var, hidden_var, probs_var, next_hidden_var, cell = create_rnn(name='probs_network2',
            #                       cell_type='lstm',
            #                       output_dim=self.action_dim,
            #                       hidden_sizes=self.hidden_sizes,
            #                       hidden_nonlinearity=self.hidden_nonlinearity,
            #                       output_nonlinearity=tf.nn.softmax,
            #                       input_dim=(None, None, self.obs_dim,),
            #                       )

            from tensorflow.keras import datasets, layers, models
            import matplotlib.pyplot as plt

            # x = "INPUT"
            # input_conv = nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
            #
            # model = models.Sequential()
            # model.add(layers.Conv2D(128, (2,2), padding='SAME', input_shape=(8, 8, 3)))
            # model.add(layers.BatchNormalization())
            # model.add(layers.ReLU())
            # model.add(layers.MaxPooling2D((2, 2), strides=2))
            # model.add(layers.Conv2D(128, (3, 3), padding='SAME'))
            # model.add(layers.BatchNormalization())
            # model.add(layers.ReLU())
            # model.add(layers.MaxPooling2D((2, 2), strides=2))
            # model.compile('rmsprop', 'mse')

            film_pool = layers.MaxPooling2D((2, 2), strides=2)
            word_embedding = layers.Embedding(
                obs_space["instr"], self.instr_dim)  # TODO: get this!
            gru_dim = self.instr_dim  # TODO: set this
            gru_dim //= 2
            instr_rnn = tf.keras.layers.GRUCell(self.instr_dim,
                                                gru_dim,
                                                batch_first=True,
                                                bidirectional=True)
            self.final_instr_dim = self.instr_dim

            memory_rnn = tf.keras.layers.LSTMCell(
                self.image_dim, self.memory_dim)  # TODO: set these

            # Resize image embedding
            embedding_size = self.semi_memory_size  # TODO: set this
            # if self.use_instr and not "filmcnn" in arch:
            #     self.embedding_size += self.final_instr_dim  # TODO: consider keepint this!

            num_module = 2
            self.controllers = []
            for ni in range(num_module):
                if ni < num_module - 1:
                    mod = ExpertControllerFiLM(
                        in_features=self.final_instr_dim,
                        out_features=128,
                        in_channels=128,
                        imm_channels=128)
                else:
                    mod = ExpertControllerFiLM(
                        in_features=self.final_instr_dim,
                        out_features=self.image_dim,
                        in_channels=128,
                        imm_channels=128)
                self.controllers.append(mod)
                self.add_module('FiLM_Controler_' + str(ni), mod)
            #
            # Define actor's model
            self.actor = nn.Sequential(nn.Linear(self.embedding_size, 64),
                                       nn.Tanh(),
                                       nn.Linear(64, action_space.n))

            # rnn_outs = create_rnn(name='probs_network',
            #                       cell_type=self._cell_type,
            #                       output_dim=self.action_dim,
            #                       hidden_sizes=self.hidden_sizes,
            #                       hidden_nonlinearity=self.hidden_nonlinearity,
            #                       output_nonlinearity=tf.nn.softmax,
            #                       input_dim=(None, None, self.obs_dim,),
            #                       )

            self.obs_var, self.hidden_var, self.probs_var, self.next_hidden_var, self.cell = rnn_outs
            self.probs_var = (self.probs_var + probs_var) / 2

            # symbolically define sampled action and distribution
            self._dist = Discrete(self.action_dim)

            # save the policy's trainable variables in dicts
            current_scope = tf.get_default_graph().get_name_scope()
            trainable_policy_vars = tf.get_collection(
                tf.GraphKeys.TRAINABLE_VARIABLES, scope=current_scope)
            self.policy_params = OrderedDict([
                (remove_scope_from_name(var.name, current_scope), var)
                for var in trainable_policy_vars
            ])