Example #1
0
    def __init__(self, env, args):
        # TODO: Create a suitable model.
        #
        # Apart from the model defined in `reinforce`, define also another
        # model for computing baseline (with one output, using a dense layer
        # without activation).
        #
        # Using Adam optimizer with given `args.learning_rate` for both models
        # is a good default.
        inputs = tf.keras.Input(shape=env.observation_space.shape)
        baseline_inputs = tf.keras.Input(shape=env.observation_space.shape)

        x = tf.keras.layers.Conv2D(filters=args.cnn_filters,
                                   kernel_size=8,
                                   strides=2)(inputs)
        x = tf.keras.layers.ReLU()(x)
        # x = tf.keras.layers.Conv2D(filters=args.cnn_filters * 2, kernel_size=4, strides=2)(x)
        # x = tf.keras.layers.ReLU()(x)
        policy_features = tf.keras.layers.Flatten()(x)

        x = tf.keras.layers.Conv2D(filters=args.cnn_filters,
                                   kernel_size=8,
                                   strides=2)(baseline_inputs)
        x = tf.keras.layers.ReLU()(x)
        # x = tf.keras.layers.Conv2D(filters=args.cnn_filters * 2, kernel_size=4, strides=2)(x)
        # x = tf.keras.layers.ReLU()(x)
        baseline_features = tf.keras.layers.Flatten()(x)

        hidden = policy_features
        hidden_b = baseline_features
        for i in range(args.hidden_layers):
            hidden = tf.keras.layers.Dense(args.hidden_layer_size,
                                           activation=args.activation,
                                           kernel_regularizer='l2')(hidden)
            hidden = tf.keras.layers.Dropout(args.dropout)(hidden)

            hidden_b = tf.keras.layers.Dense(args.hidden_layer_size,
                                             activation=args.activation,
                                             kernel_regularizer='l2')(hidden_b)
            hidden_b = tf.keras.layers.Dropout(args.dropout)(hidden_b)

        out = tf.keras.layers.Dense(env.action_space.n,
                                    activation='softmax')(hidden)
        out_b = tf.keras.layers.Dense(1)(hidden_b)
        out_b = tf.keras.layers.Flatten()(out_b)

        self._model = tf.keras.Model(inputs, out)
        # self._model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(), optimizer=tf.keras.optimizers.Adam(0, clipnorm=args.grad_clipping))
        self._model.compile(
            loss=tf.keras.losses.SparseCategoricalCrossentropy(),
            optimizer=RAdamOptimizer(args.learning_rate))

        self._baseline_model = tf.keras.Model(baseline_inputs, out_b)

        loss = tf.keras.losses.Huber()
        # self._baseline_model.compile(loss=loss, optimizer=tf.keras.optimizers. optimizer=tf.keras.optimizers.Adam(0, clipnorm=args.grad_clipping))
        self._baseline_model.compile(loss=loss,
                                     optimizer=RAdamOptimizer(
                                         args.learning_rate))
Example #2
0
    def __init__(self, env, args):
        # TODO: Analogously to paac, your model should contain two components:
        # - actor, which predicts distribution over the actions
        # - critic, which predicts the value function
        #
        # The given states are tile encoded, so they are integral indices of
        # tiles intersecting the state. Therefore, you should convert them
        # to dense encoding (one-hot-like, with with `args.tiles` ones).
        # (Or you can even use embeddings for better efficiency.)
        #
        # The actor computes `mus` and `sds`, each of shape [batch_size, actions].
        # Compute each independently using states as input, adding a fully connected
        # layer with `args.hidden_layer_size` units and ReLU activation. Then:
        # - For `mus`, add a fully connected layer with `actions` outputs.
        #   To avoid `mus` moving from the required range, you should apply
        #   properly scaled `tf.tanh` activation.
        # - For `sds`, add a fully connected layer with `actions` outputs
        #   and `tf.nn.softplus` activation.
        #
        # The critic should be a usual one, passing states through one hidden
        # layer with `args.hidden_layer_size` ReLU units and then predicting
        # the value function.
        policy_in = tf.keras.Input(shape=args.tiles)
        x = tf.keras.layers.Embedding(env.observation_space.nvec[-1],
                                      args.hidden_layer_size,
                                      input_length=args.tiles)(policy_in)
        x = tf.keras.layers.GlobalAveragePooling1D(
            data_format="channels_last")(x)
        x = tf.keras.layers.Dense(args.hidden_layer_size, activation='relu')(x)

        self.mu = tf.keras.layers.Dense(
            1, activation=lambda x: tf.constant(2.0) * tf.tanh(x))(x)
        self.sd = tf.keras.layers.Dense(
            1, activation=tf.keras.activations.softplus)(x)
        policy_out = tf.keras.layers.Concatenate()([self.mu, self.sd])

        self.actor = tf.keras.Model(policy_in, policy_out)
        self.policy_optimizer = RAdamOptimizer(args.learning_rate)

        value_in = tf.keras.Input(shape=args.tiles)
        x = tf.keras.layers.Embedding(env.observation_space.nvec[-1],
                                      args.hidden_layer_size,
                                      input_length=args.tiles)(value_in)
        x = tf.keras.layers.GlobalAveragePooling1D(
            data_format="channels_last")(x)
        x = tf.keras.layers.Dense(args.hidden_layer_size, activation='relu')(x)
        value_out = tf.keras.layers.Dense(1)(x)
        self.critic = tf.keras.Model(value_in, value_out)
        self.critic.compile(optimizer=RAdamOptimizer(args.learning_rate),
                            loss=tf.keras.losses.MeanSquaredError())
Example #3
0
File: paac.py Project: kubic71/mff
    def __init__(self, env, args):
        policy_in = tf.keras.Input(shape=env.observation_space.shape)
        x = tf.keras.layers.Dense(args.hidden_layer_size,
                                  activation='relu')(policy_in)
        policy_out = tf.keras.layers.Dense(env.action_space.n,
                                           activation='softmax')(x)

        self.policy = tf.keras.Model(policy_in, policy_out)

        self.policy.compile(
            optimizer=RAdamOptimizer(args.learning_rate),
            loss=tf.keras.losses.SparseCategoricalCrossentropy())

        value_in = tf.keras.Input(shape=env.observation_space.shape)
        x = tf.keras.layers.Dense(args.hidden_layer_size,
                                  activation='relu')(value_in)
        value_out = tf.keras.layers.Dense(1)(x)

        self.value = tf.keras.Model(value_in, value_out)

        self.value.compile(optimizer=RAdamOptimizer(args.learning_rate),
                           loss=tf.keras.losses.MeanSquaredError())
Example #4
0
 def fit(self,
         dishsize=[250, 150, 50, 20],
         misdishsize=[200, 100, 50, 20],
         glr=3e-6,
         dlr=4e-6):
     tf.reset_default_graph()
     self.X = tf.placeholder(tf.float32, [None, self.n_dim], name="X")
     self.missX = tf.placeholder(tf.float32, [None, self.raw_dim],
                                 name="missX")
     self.Z = tf.placeholder(tf.float32, [None, self.g_dim], name="Z")
     self.Conditions = tf.placeholder(tf.float32, [None, self.n_control],
                                      name="Condition")
     self.batch_size = tf.placeholder(tf.int32, None, name="BatchSize")
     self.is_training = tf.placeholder(tf.bool)
     ## Data 생성
     values = generator(self.Z,
                        self.n_dim,
                        self.Conditions,
                        bn=True,
                        is_training=self.is_training,
                        onehot_key_store=self.onehot_key_store)
     out, G_sample, G_x, G_arg_x = values
     ## 생성된 데이터에 대해서 결측치 확인 여부
     G_sample_stop = tf.stop_gradient(G_sample)
     G_miss = MissGenerator(G_sample_stop,
                            self.raw_dim,
                            self.Conditions,
                            reuse=False)
     delta = tf.constant(0.5)
     ## 진짜 데이터에서 미싱 데이터를 1.5로 처리하기
     imputedX = tf.where(tf.math.is_nan(self.X),
                         tf.ones_like(self.X) * 1.5, self.X)
     ## 생성된 데이터 결측 확률 threshold를 통해서, missing indicator 만들기
     miss_indicator = tf.where(G_miss > delta, tf.ones_like(G_miss),
                               tf.zeros_like(G_miss))
     self.miss_indicator2, NumMissGenerator = MissGeneratorByVar(
         miss_indicator, self.overall_where)
     ## 결측치  G_sample(확률 값으로) G_x (one hot)
     miss_G_sample = G_sample * (1 - self.miss_indicator2) + tf.constant(
         [1.5]) * NumMissGenerator
     self.miss_G_sample_eval = G_x * (
         1 - self.miss_indicator2) + tf.constant([1.5]) * NumMissGenerator
     _, real_logit = discriminator(imputedX,
                                   self.Conditions,
                                   gpu_n=1,
                                   hsize=dishsize,
                                   reuse=False)
     _, fake_logit = discriminator(miss_G_sample,
                                   self.Conditions,
                                   gpu_n=1,
                                   hsize=dishsize,
                                   reuse=True)
     miss_real_logit = MissDiscriminator(self.missX,
                                         self.Conditions,
                                         gpu_n=0,
                                         hsize=misdishsize,
                                         reuse=False)
     miss_fake_logit = MissDiscriminator(G_miss,
                                         self.Conditions,
                                         gpu_n=0,
                                         hsize=misdishsize,
                                         reuse=True)
     _ = [
         tf.summary.histogram(i.name, i)
         for i in tf.get_collection("weight_variables")
     ]
     ###########################################
     e = tf.random_uniform([self.batch_size, 1], 0, 1)
     x_hat = e * imputedX + (1 - e) * miss_G_sample
     grad = tf.gradients(
         discriminator(x_hat,
                       self.Conditions,
                       hsize=dishsize,
                       reuse=True,
                       gpu_n=1), x_hat)[0]
     slopes = tf.sqrt(1e-8 + tf.reduce_sum(tf.square(grad), axis=[1]))
     gradient_penalty = 5 * tf.reduce_mean((slopes - 1.)**2)
     loss_func = "gan-gp"
     ##lsgan | agan | gan | gan-gp | dragan | hinge
     with tf.variable_scope("Discriminator_Loss"):
         with tf.variable_scope("Original_Loss"):
             self.disc_loss = discriminator_loss(Ra=True,
                                                 loss_func=loss_func,
                                                 real=real_logit,
                                                 fake=fake_logit)
             self.disc_loss += gradient_penalty
         with tf.variable_scope("Indicator_Loss"):
             self.miss_disc_loss = discriminator_loss(Ra=True,
                                                      loss_func=loss_func,
                                                      real=miss_real_logit,
                                                      fake=miss_fake_logit)
         # if loss_func in ["wgan-gp", "gan-gp"] :
     with tf.variable_scope("Generator_Loss"):
         with tf.variable_scope("Original_Loss"):
             self.gen_loss = generator_loss(Ra=True,
                                            loss_func=loss_func,
                                            real=real_logit,
                                            fake=fake_logit)
         with tf.variable_scope("Indicator_Loss"):
             self.miss_gen_loss = generator_loss(Ra=True,
                                                 loss_func=loss_func,
                                                 real=miss_real_logit,
                                                 fake=miss_fake_logit)
     ######################################################################
     tf.summary.scalar(f"gradient_penalty_loss", gradient_penalty)
     tf.summary.scalar(f"disc_loss", self.disc_loss)
     tf.summary.scalar(f"miss_disc_loss", self.miss_disc_loss)
     tf.summary.scalar(f"generate_loss", self.gen_loss)
     tf.summary.scalar(f"miss_generate_loss", self.miss_gen_loss)
     t_vars = tf.trainable_variables()
     self.global_step = tf.get_variable(
         'global_step', [],
         initializer=tf.constant_initializer(0),
         trainable=False)
     gen_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                  scope="GAN/Generator")
     disc_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                   scope="GAN/Discriminator")
     miss_gen_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                       scope="GAN/MissGenerator")
     miss_disc_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                        scope="GAN/MissDiscriminator")
     #         self.gen_loss = gen_loss + miss_gen_loss
     # tf.train.RMSPropOptimizer
     glearning_rate = tf.train.exponential_decay(
         glr,
         self.global_step,
         decay_steps=100,
         decay_rate=0.999,
         staircase=False,
     )
     dlearning_rate = tf.train.exponential_decay(
         dlr,
         self.global_step,
         decay_steps=100,
         decay_rate=0.999,
         staircase=False,
     )
     with tf.variable_scope("Optimizer"):
         self.gen_step = RAdamOptimizer(
             learning_rate=glearning_rate).minimize(
                 self.gen_loss,
                 var_list=gen_vars)  # G Train step + miss_gen_vars
         self.miss_gen_step = RAdamOptimizer(
             learning_rate=glearning_rate).minimize(
                 self.miss_gen_loss, var_list=miss_gen_vars)  # G Train step
         # + miss_disc_vars
         self.disc_step = tf.train.RMSPropOptimizer(
             learning_rate=dlearning_rate).minimize(
                 self.disc_loss, var_list=disc_vars)  # D Train step
         #     miss_gen_step = RAdamOptimizer(learning_rate=learning_rate).minimize(miss_gen_loss,
         #                                                                     var_list = + gen_vars ) # G Train step
         self.miss_disc_step = RAdamOptimizer(
             learning_rate=dlearning_rate).minimize(
                 self.miss_disc_loss,
                 var_list=miss_disc_vars)  # D Train step
     print("fitting!!")
Example #5
0
    def fit(self, ):
        self.update_dict_(self.env)
        self.update_dict_(self.__dict__)
        select_w_init = np.random.randint(0, 2, size=1)[0]
        seed_n = np.random.randint(1, 1000, size=1)[0]
        #         self.patience = patience
        #         self.cut_off = cutoff
        #         self.ck_max_norm = max_norm
        #         self.ck_SN = SN
        #         self.Gact = Gact
        #         self.Dact = Dact
        #         self.epoch = epoch + 1
        #         trainX, trainM = TrainSet
        #         testX, testM = ValidSet
        #         self.p_hint = hint
        #         self.mb_size = mb_size
        #         self.alpha = alpha
        self.relu_w_init = [
            tf.keras.initializers.he_uniform(seed=seed_n),
            tf.keras.initializers.he_normal(seed=seed_n)
        ][select_w_init]
        self.tanh_w_init = [
            tf.keras.initializers.glorot_normal(seed=seed_n),
            tf.keras.initializers.glorot_uniform(seed=seed_n)
        ][select_w_init]
        self.s_elu_w_init = [
            tf.keras.initializers.lecun_normal(seed=seed_n),
            tf.keras.initializers.lecun_uniform(seed=seed_n)
        ][select_w_init]
        self.nomal_w_init = tf.keras.initializers.truncated_normal(seed=seed_n)
        self.ck_max_norm = self.max_norm
        self.ck_SN = self.SN
        self.p_hint = self.hint
        weight_regularizer = self.weight_regularizer
        lr = self.lr
        trainX, trainM = self.TrainSet
        testX, testM = self.ValidSet
        self.trainX = typecheck(trainX)
        ##################### Matrix는 1-미싱 => 미싱 아닌 부분
        self.trainM = typecheck(1 - 1 * trainM)
        self.testX = typecheck(testX)
        self.testM = typecheck(1 - 1 * testM)
        self.total_X = np.concatenate((trainX, testX))
        self.total_M = np.concatenate((self.trainM, self.testM))
        self.Train_No, self.Dim = self.trainX.shape
        self.total = self.Dim

        ## modeling
        tf.reset_default_graph()
        self.define()
        ## M 은 미싱이 아닌 것!
        ## 미싱이 부분에 진짜를 가짜인 부분에 생성된 것을
        result = self.generator(self.New_X)
        Logit, G_sample, OnehotResult, ArgResult = result

        if self.fac_var == []:
            for v, col in enumerate(self.in_var):
                value = tf.slice(G_sample, [0, v], [-1, 1])  #
                tf.summary.histogram("Input_" + col.replace(" ", "_"), value)
        else:
            self.ArgResult = tf.identity(ArgResult, name="Arg_G")
            for v, col in enumerate(self.in_var):
                value = tf.slice(ArgResult, [0, v], [-1, 1])  #
                tf.summary.histogram("Input_" + col.replace(" ", "_"), value)
        Hat_New_X = self.M * self.New_X + (1 - self.M) * G_sample

        ## M은 미싱인 부분
        # imputed = self.New_X * (1-self.M) + G_sample * self.M
        self.Hat_New_X = tf.identity(Hat_New_X, name="imputed")
        self.G_sample = tf.identity(G_sample, name="generated")
        # Discriminator
        D_prob = self.discriminator(Hat_New_X, self.H)
        t_vars = tf.trainable_variables()
        if weight_regularizer > 0:
            G_L2 = []
            D_L2 = []
            for v in t_vars:
                if re.search('Weight', v.name):
                    if re.search("Generator", v.name):
                        print("G : ", v.name)
                        G_L2.append(tf.nn.l2_loss(v))
                    elif re.search("Discriminator", v.name):
                        print("D : ", v.name)
                        D_L2.append(tf.nn.l2_loss(v))
            self.Generator_W_l2 = tf.add_n(G_L2) * weight_regularizer
            self.Discriminator_W_l2 = tf.add_n(D_L2) * weight_regularizer
        else:
            self.Generator_W_l2 = tf.constant(0.0)
            self.Discriminator_W_l2 = tf.constant(0.0)
        for var in t_vars:
            tf.summary.histogram(var.op.name, var)
        self.D_1 = -self.M * tf.log(D_prob + 1e-8)
        self.D_2 = -(1 - self.M) * tf.log(1. - D_prob + 1e-8)
        self.D_3 = tf.reduce_mean(self.D_1 + self.D_2)
        self.D_loss = self.D_3 + self.Discriminator_W_l2
        self.G_loss1 = -tf.reduce_mean((1 - self.M) * tf.log(D_prob + 1e-8))
        ## 미싱이 아닌 부분 -> 미싱 부분
        if self.fac_var == []:
            Logit = self.G_final_Act(Logit)
        else:
            pass
        self.MSE_train_loss = self.CatNumEmb_Loss(Logit, self.X, self.M,
                                                  self.cond, self.key_cond,
                                                  self.weight_info)
        #         self.MSE_train_loss_2 =self.CatNumEmb_Loss(Logit , self.X , self.M , seg = "Test")
        self.G_loss = \
            self.G_loss1 + self.alpha * self.MSE_train_loss + self.Generator_W_l2
        #         self.MSE_test_loss =\
        #         tf.reduce_mean( tf.square( (1-self.M) * self.X - (1-self.M) * G_sample ) )

        with tf.variable_scope("Original/Loss"):
            tf.summary.scalar("Total_G_loss", self.G_loss)
            tf.summary.scalar("Not_Missing_Loss", self.MSE_train_loss)
            tf.summary.scalar("D_Loss", self.D_loss)

        self.clip_all_weights = tf.get_collection("max_norm")
        gen_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                     scope="GAN/Generator")
        disc_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                      scope="GAN/Discriminator")
        self.global_step = tf.get_variable(
            'global_step', [],
            initializer=tf.constant_initializer(0),
            trainable=False)
        self.lr = tf.train.cosine_decay_restarts(lr,
                                                 self.global_step,
                                                 first_decay_steps=100,
                                                 t_mul=1.2,
                                                 m_mul=0.95,
                                                 alpha=0.5)
        self.D_solver = RAdamOptimizer(learning_rate=self.lr,
                                       beta1=0.5,
                                       beta2=0.5,
                                       weight_decay=0.0).minimize(
                                           self.D_loss, var_list=disc_vars)
        self.G_solver = tf.train.RMSPropOptimizer(
            learning_rate=self.lr, ).minimize(self.G_loss, var_list=gen_vars)
        comment = "{} \n{}{}{}\n{}".format("=" * 56, " " * 24, "모델피팅",
                                           " " * 24, "=" * 56)
        return print(comment)
Example #6
0
class Network:
    def __init__(self, env, args):
        # TODO: Analogously to paac, your model should contain two components:
        # - actor, which predicts distribution over the actions
        # - critic, which predicts the value function
        #
        # The given states are tile encoded, so they are integral indices of
        # tiles intersecting the state. Therefore, you should convert them
        # to dense encoding (one-hot-like, with with `args.tiles` ones).
        # (Or you can even use embeddings for better efficiency.)
        #
        # The actor computes `mus` and `sds`, each of shape [batch_size, actions].
        # Compute each independently using states as input, adding a fully connected
        # layer with `args.hidden_layer_size` units and ReLU activation. Then:
        # - For `mus`, add a fully connected layer with `actions` outputs.
        #   To avoid `mus` moving from the required range, you should apply
        #   properly scaled `tf.tanh` activation.
        # - For `sds`, add a fully connected layer with `actions` outputs
        #   and `tf.nn.softplus` activation.
        #
        # The critic should be a usual one, passing states through one hidden
        # layer with `args.hidden_layer_size` ReLU units and then predicting
        # the value function.
        policy_in = tf.keras.Input(shape=args.tiles)
        x = tf.keras.layers.Embedding(env.observation_space.nvec[-1],
                                      args.hidden_layer_size,
                                      input_length=args.tiles)(policy_in)
        x = tf.keras.layers.GlobalAveragePooling1D(
            data_format="channels_last")(x)
        x = tf.keras.layers.Dense(args.hidden_layer_size, activation='relu')(x)

        self.mu = tf.keras.layers.Dense(
            1, activation=lambda x: tf.constant(2.0) * tf.tanh(x))(x)
        self.sd = tf.keras.layers.Dense(
            1, activation=tf.keras.activations.softplus)(x)
        policy_out = tf.keras.layers.Concatenate()([self.mu, self.sd])

        self.actor = tf.keras.Model(policy_in, policy_out)
        self.policy_optimizer = RAdamOptimizer(args.learning_rate)

        value_in = tf.keras.Input(shape=args.tiles)
        x = tf.keras.layers.Embedding(env.observation_space.nvec[-1],
                                      args.hidden_layer_size,
                                      input_length=args.tiles)(value_in)
        x = tf.keras.layers.GlobalAveragePooling1D(
            data_format="channels_last")(x)
        x = tf.keras.layers.Dense(args.hidden_layer_size, activation='relu')(x)
        value_out = tf.keras.layers.Dense(1)(x)
        self.critic = tf.keras.Model(value_in, value_out)
        self.critic.compile(optimizer=RAdamOptimizer(args.learning_rate),
                            loss=tf.keras.losses.MeanSquaredError())

    @wrappers.typed_np_function(np.float32, np.float32, np.float32)
    @tf.function
    def train(self, states, actions, returns):

        with tf.GradientTape() as critic_tape:
            pred_values = self.critic(states)
            critic_loss = self.critic.loss(returns, pred_values)

        critic_grads = critic_tape.gradient(critic_loss,
                                            self.critic.trainable_variables)
        self.critic.optimizer.apply_gradients(
            zip(critic_grads, self.critic.trainable_variables))

        with tf.GradientTape() as policy_tape:
            pred_actions = self.actor(states)
            mus = pred_actions[:, 0]
            sds = pred_actions[:, 1]

            # mus = tf.clip_by_value(mus, clip_value_min=-1, clip_value_max=1)
            # sds = tf.clip_by_value(sds, clip_value_min=0, clip_value_max=1)
            action_distribution = tfp.distributions.Normal(mus, sds)

            advantage = returns - pred_values[:, 0]
            nll = -action_distribution.log_prob(actions[:, 0])
            loss = nll * advantage
            policy_loss = tf.math.reduce_mean(loss)

            # entropy penalization
            entropy = tf.math.reduce_mean(tf.math.log(sds))
            # policy_loss -= args.beta * entropy

            # print(policy_loss)

        # print("Policy_loss", policy_loss)
        # print(self.actor.trainable_variables)
        policy_grad = policy_tape.gradient(policy_loss,
                                           self.actor.trainable_variables)
        # print(policy_grad)
        self.policy_optimizer.apply_gradients(
            zip(policy_grad, self.actor.trainable_variables))

        # TODO: Run the model on given `states` and compute
        # sds, mus and predicted values. Then create `action_distribution` using
        # `tfp.distributions.Normal` class and computed mus and sds.
        # In PyTorch, the corresponding class is `torch.distributions.normal.Normal`.
        #
        # TODO: Compute total loss as a sum of three losses:
        # - negative log likelihood of the `actions` in the `action_distribution`
        #   (using the `log_prob` method). You then need to sum the log probabilities
        #   of actions in a single batch example (using `tf.math.reduce_sum` with `axis=1`).
        #   Finally multiply the resulting vector by (returns - predicted values)
        #   and compute its mean. Note that the gradient must not flow through
        #   the predicted values (you can use `tf.stop_gradient` if necessary).
        # - negative value of the distribution entropy (use `entropy` method of
        #   the `action_distribution`) weighted by `args.entropy_regularization`.
        # - mean square error of the `returns` and predicted values.

    @wrappers.typed_np_function(np.float32)
    @tf.function
    def predict_actions(self, states):
        # TODO: Return predicted action distributions (mus and sds).
        mus_sds = tf.transpose(self.actor(states), (1, 0))
        # return tf.clip_by_value(mus_sds[0], -1, 1), tf.clip_by_value(mus_sds[1], 0, 1)
        return mus_sds

    @wrappers.typed_np_function(np.float32)
    @tf.function
    def predict_values(self, states):
        # TODO: Return predicted state-action values.
        return self.critic(states)[:, 0]
Example #7
0
    def create_graph(self):
        RSE_network.is_training = True
        """Creates graph for training"""
        self.base_cost = 0.0
        self.accuracy = 0
        num_sizes = len(self.bins)
        self.cost_list = []
        sum_weight = 0
        self.bin_losses = []
        saturation_loss = []
        total_mean_loss = 0

        # Create all bins and calculate losses for them

        with vs.variable_scope("var_lengths"):
            for seqLength, itemCount, ind in zip(self.bins, self.count_list,
                                                 range(num_sizes)):
                x_in = tf.compat.v1.placeholder(cnf.input_type,
                                                [itemCount, seqLength])
                y_in = tf.compat.v1.placeholder("int64",
                                                [itemCount, seqLength])
                self.x_input.append(x_in)
                self.y_input.append(y_in)
                RSE_network.saturation_costs = []
                RSE_network.gate_mem = []
                RSE_network.reset_mem = []
                RSE_network.candidate_mem = []
                RSE_network.prev_mem_list = []
                RSE_network.residual_list = []
                RSE_network.info_alpha = []

                if self.use_two_gpus:
                    device = "/device:GPU:" + (
                        "0" if seqLength >= self.bins[-1] else "1")
                    with tf.device(device):
                        c, a, mem1, logits, per_item_cost, _, _ = self.create_loss(
                            x_in, y_in, seqLength)
                else:
                    c, a, mem1, logits, per_item_cost, _, _ = self.create_loss(
                        x_in, y_in, seqLength)

                weight = 1.0

                sat_cost = (
                    tf.add_n(RSE_network.saturation_costs) /
                    (seqLength * len(RSE_network.saturation_costs) * itemCount)
                    if len(RSE_network.saturation_costs) > 0 else 0)
                saturation_loss.append(sat_cost * weight)
                self.bin_losses.append(per_item_cost)
                self.base_cost += c * weight
                sum_weight += weight
                self.accuracy += a
                self.cost_list.append(c)

                mean_loss = tf.reduce_mean(input_tensor=tf.square(mem1))
                total_mean_loss += mean_loss

                tf.compat.v1.get_variable_scope().reuse_variables()

        # calculate the total loss
        self.base_cost /= sum_weight
        self.accuracy /= num_sizes
        total_mean_loss /= num_sizes
        tf.compat.v1.summary.scalar("base/loss", self.base_cost)
        tf.compat.v1.summary.scalar("base/error", 1 - self.accuracy)
        tf.compat.v1.summary.scalar("base/error_longest", 1 - a)
        tf.compat.v1.summary.histogram("logits", logits)

        if cnf.task is not "musicnet":
            if RSE_network.gate_mem:
                gate_img = tf.stack(RSE_network.gate_mem)
                gate_img = gate_img[:, 0:1, :, :]
                gate_img = tf.cast(gate_img * 255, dtype=tf.uint8)
                tf.compat.v1.summary.image("gate",
                                           tf.transpose(a=gate_img,
                                                        perm=[3, 0, 2, 1]),
                                           max_outputs=16)
            if RSE_network.reset_mem:
                reset_img = tf.stack(RSE_network.reset_mem)
                reset_img = tf.clip_by_value(reset_img, -2, 2)
                tf.compat.v1.summary.histogram("reset", reset_img)
                reset_img = reset_img[:, 0:1, :, :]
                tf.compat.v1.summary.image(
                    "reset",
                    tf.transpose(a=reset_img, perm=[3, 0, 2, 1]),
                    max_outputs=16,
                )
            if RSE_network.prev_mem_list:
                prev_img = tf.stack(RSE_network.prev_mem_list)
                prev_img = prev_img[:, 0:1, :, :]
                prev_img = tf.cast(prev_img * 255, dtype=tf.uint8)
                tf.compat.v1.summary.image(
                    "prev_mem",
                    tf.transpose(a=prev_img, perm=[3, 0, 2, 1]),
                    max_outputs=16,
                )
            if RSE_network.residual_list:
                prev_img = tf.stack(RSE_network.residual_list)
                prev_img = prev_img[:, 0:1, :, :]
                prev_img = tf.cast(prev_img * 255, dtype=tf.uint8)
                tf.compat.v1.summary.image(
                    "residual_mem",
                    tf.transpose(a=prev_img, perm=[3, 0, 2, 1]),
                    max_outputs=16,
                )
            if RSE_network.info_alpha:
                prev_img = tf.stack(RSE_network.info_alpha)
                prev_img = prev_img[:, 0:1, :, :]
                tf.compat.v1.summary.image(
                    "info_alpha",
                    tf.transpose(a=prev_img, perm=[3, 0, 2, 1]),
                    max_outputs=16,
                )

            candidate_img = tf.stack(RSE_network.candidate_mem)
            candidate_img = candidate_img[:, 0:1, :, :]
            candidate_img = tf.cast((candidate_img + 1.0) * 127.5,
                                    dtype=tf.uint8)
            tf.compat.v1.summary.image(
                "candidate",
                tf.transpose(a=candidate_img, perm=[3, 0, 2, 1]),
                max_outputs=16,
            )

            mem1 = mem1[:, 0:1, :, :]
            tf.compat.v1.summary.image("mem",
                                       tf.transpose(a=mem1, perm=[3, 0, 2, 1]),
                                       max_outputs=16)

        saturation = tf.reduce_sum(
            input_tensor=tf.stack(saturation_loss)) / sum_weight
        tf.compat.v1.summary.scalar("base/activation_mean",
                                    tf.sqrt(total_mean_loss))

        self.sat_loss = saturation * self.saturation_weight
        cost = self.base_cost + self.sat_loss

        tvars = [v for v in tf.compat.v1.trainable_variables()]
        for var in tvars:
            name = var.name.replace("var_lengths", "")
            tf.compat.v1.summary.histogram(name + "/histogram", var)

        regvars = [var for var in tvars if "CvK" in var.name]
        print(regvars)
        reg_costlist = [
            tf.reduce_sum(input_tensor=tf.square(var)) for var in regvars
        ]
        reg_cost = tf.add_n(reg_costlist)
        tf.compat.v1.summary.scalar("base/regularize_loss", reg_cost)

        # optimizer

        self.local_lr = self.learning_rate

        optimizer = RAdamOptimizer(
            self.local_lr,
            epsilon=1e-5,
            L2_decay=0.01,
            L1_decay=0.00,
            decay_vars=regvars,
            total_steps=cnf.training_iters,
            warmup_proportion=cnf.num_warmup_steps / cnf.training_iters,
            clip_gradients=True,
        )

        self.optimizer = optimizer.minimize(cost, global_step=self.global_step)

        # some values for printout
        max_vals = []

        for var in tvars:
            var_v = optimizer.get_slot(var, "v")
            max_vals.append(tf.sqrt(var_v))

        self.gnorm = tf.linalg.global_norm(max_vals)
        tf.compat.v1.summary.scalar("base/gnorm", self.gnorm)
        self.cost_list = tf.stack(self.cost_list)
Example #8
0
    def load_model(self):
        # placeholders
        self.x = tf.compat.v1.placeholder(tf.int32, shape=[self.batch_size, None])
        self.y = tf.compat.v1.placeholder(tf.int32, shape=[self.batch_size, None])
        self.mems_i = [tf.compat.v1.placeholder(tf.float32, [self.mem_len, self.batch_size, self.d_model]) for _ in
                       range(self.n_layer)]
        # model
        self.global_step = tf.compat.v1.train.get_or_create_global_step()

        initializer = tf.compat.v1.keras.initializers.glorot_normal()
        proj_initializer = tf.compat.v1.keras.initializers.glorot_normal()

        with tf.compat.v1.variable_scope(tf.compat.v1.get_variable_scope()):
            xx = tf.transpose(self.x, [1, 0])
            yy = tf.transpose(self.y, [1, 0])
            loss, self.logits, self.new_mem = modules.transformer(
                dec_inp=xx,
                target=yy,
                mems=self.mems_i,
                n_token=self.n_token,
                n_layer=self.n_layer,
                d_model=self.d_model,
                d_embed=self.d_embed,
                n_head=self.n_head,
                d_head=self.d_head,
                d_inner=self.d_ff,
                dropout=self.dropout,
                dropatt=self.dropout,
                initializer=initializer,
                proj_initializer=proj_initializer,
                is_training=self.is_training,
                mem_len=self.mem_len,
                rezero=self.rezero,
                cutoffs=[],
                div_val=-1,
                tie_projs=[],
                same_length=False,
                clamp_len=-1,
                input_perms=None,
                target_perms=None,
                head_target=None,
                untie_r=False,
                proj_same_dim=True)
            variables = tf.trainable_variables()
        grads = tf.gradients(self.loss, variables)
        grads_and_vars = list(zip(grads, variables))

        self.avg_loss = tf.reduce_mean(loss)
        # vars
        decay_lr = tf.compat.v1.train.cosine_decay(
            self.learning_rate,
            global_step=self.global_step,
            decay_steps=400000,
            alpha=0.004)

        optimizer = RAdamOptimizer(decay_lr)
        optimizer = tf.train.experimental.enable_mixed_precision_graph_rewrite(optimizer)
        self.train_op = optimizer.apply_gradients(grads_and_vars, self.global_step)

        # saver
        self.saver = tf.compat.v1.train.Saver()
        config = tf.compat.v1.ConfigProto(allow_soft_placement=True)
        config.gpu_options.allow_growth = True
        config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1
        self.sess = tf.compat.v1.Session(config=config)
        self.saver.restore(self.sess, self.checkpoint_path)
Example #9
0
    def create_graph(self):
        """Creates graph for training"""
        self.cost = 0.0
        self.accuracy = 0
        num_sizes = len(self.bins)
        self.cost_list = []
        self.bin_losses = []

        # Create all bins and calculate losses for them

        with vs.variable_scope("var_lengths"):
            for seqLength, itemCount, ind in zip(self.bins, self.count_list, range(num_sizes)):
                x_in = tf.placeholder("int64", [itemCount, seqLength])
                y_in = tf.placeholder("int64", [itemCount, seqLength])
                self.x_input.append(x_in)
                self.y_input.append(y_in)
                network.saturation_costs = []
                network.gate_mem = []
                network.reset_mem = []
                network.candidate_mem = []
                network.prev_mem_list = []

                if self.use_two_gpus:
                    device = "/device:GPU:" + ("0" if seqLength >= self.bins[-1] else "1")
                    with tf.device(device):
                        c, a, mem1, _, perItemCost, _ = self.create_loss(x_in, y_in, seqLength)
                else:
                    c, a, mem1, _, perItemCost, _ = self.create_loss(x_in, y_in, seqLength)

                # /seqLength
                self.bin_losses.append(perItemCost)
                self.cost += c
                self.accuracy += a
                self.cost_list.append(c)
                tf.get_variable_scope().reuse_variables()

        # calculate the total loss
        self.cost /= num_sizes
        self.accuracy /= num_sizes

        # tensorboard output
        tf.summary.scalar("base/loss", self.cost)
        tf.summary.scalar("base/accuracy", self.accuracy)
        tf.summary.scalar("base/accuracy_longest", a)

        gate_img = tf.stack(network.gate_mem)
        gate_img = gate_img[:, 0:1, :, :]
        gate_img = tf.cast(gate_img * 255, dtype=tf.uint8)
        tf.summary.image("gate", tf.transpose(gate_img, [3, 0, 2, 1]), max_outputs=16)
        reset_img = tf.stack(network.reset_mem)
        reset_img = reset_img[:, 0:1, :, :]
        reset_img = tf.cast(reset_img * 255, dtype=tf.uint8)
        tf.summary.image("reset", tf.transpose(reset_img, [3, 0, 2, 1]), max_outputs=16)
        if network.prev_mem_list:
            prev_img = tf.stack(network.prev_mem_list)
            prev_img = prev_img[:, 0:1, :, :]
            prev_img = tf.cast(prev_img * 255, dtype=tf.uint8)
            tf.summary.image("prev_mem", tf.transpose(prev_img, [3, 0, 2, 1]), max_outputs=16)

        candidate_img = tf.stack(network.candidate_mem)
        candidate_img = candidate_img[:, 0:1, :, :]
        candidate_img = tf.cast((candidate_img + 1.0) * 127.5, dtype=tf.uint8)
        tf.summary.image("candidate", tf.transpose(candidate_img, [3, 0, 2, 1]), max_outputs=16)

        mem1 = mem1[:, 0:1, :, :]
        tf.summary.image("mem", tf.transpose(mem1, [3, 0, 2, 1]), max_outputs=16)

        tvars = tf.trainable_variables()
        for var in tvars:
            name = var.name.replace("var_lengths", "")
            tf.summary.histogram(name + '/histogram', var)

        # we use a small L2 regularization, although it is questionable if it helps
        regularizable_vars = [var for var in tvars if "CvK" in var.name]
        reg_costlist = [tf.reduce_sum(tf.square(var)) for var in regularizable_vars]
        reg_cost = tf.add_n(reg_costlist)
        tf.summary.scalar("base/regularize_loss", reg_cost)
        optimizer = RAdamOptimizer(self.learning_rate, epsilon=1e-5, L2_decay=0.01, decay_vars=regularizable_vars, total_steps=cnf.training_iters, warmup_proportion=0.0) #Adam optimizer works as well
        self.optimizer = optimizer.minimize(self.cost, global_step=self.global_step)

        # some values for printout
        max_vals = []

        for var in tvars:
            varV = optimizer.get_slot(var, "v")
            max_vals.append(varV)

        self.gnorm = tf.global_norm(max_vals)
        self.cost_list = tf.stack(self.cost_list)
Example #10
0
    folder_best_model = args.model_path
    name_best_model = os.path.join(folder_best_model, 'best')
    dataset_path = args.dataset
    loader = Loader.Loader(dataFolderPath=dataset_path, n_classes=n_classes, problemType='segmentation',
                           width=width, height=height, channels=channels_image, channels_events=channels_events)

    if not os.path.exists(folder_best_model):
        os.makedirs(folder_best_model)

    # build model and optimizer
    model = Segception.Segception_small(num_classes=n_classes, weights=None, input_shape=(None, None, channels))

    # optimizer
    learning_rate = tfe.Variable(lr)
    #optimizer = tf.train.AdamOptimizer(learning_rate)
    optimizer = RAdamOptimizer(learning_rate)

    # Init models (optional, just for get_params function)
    init_model(model, input_shape=(batch_size, width, height, channels))

    variables_to_restore = model.variables  # [x for x in model.variables if 'block1_conv1' not in x.name]
    variables_to_save = model.variables
    variables_to_optimize = model.variables

    # Init saver. can use also ckpt = tfe.Checkpoint((model=model, optimizer=optimizer,learning_rate=learning_rate, global_step=global_step)
    saver_model = tfe.Saver(var_list=variables_to_save)
    restore_model = tfe.Saver(var_list=variables_to_restore)

    # restore if model saved and show number of params
    # restore_state(restore_model, name_best_model)
    get_params(model)