def gauss_sample(gauss_params, quant_chann, use_log_scales=True):
    mean, std = mean_std_from_out_params(gauss_params, use_log_scales)
    distribution = Normal(loc=mean, scale=std)
    x = distribution.sample()
    x = tf.clip_by_value(x, -1., 1. - 2. / quant_chann)
    x_quantized = utils.cast_quantize(x, quant_chann)
    return x_quantized
Exemple #2
0
def make_dists_and_sample(latent_sample_seq):
    # latent_sample_seq constists of means and log_stds
    latent_dim = int(latent_sample_seq.get_shape().as_list()[-1] / 2)
    latent_dists = Normal(loc=latent_sample_seq[..., :latent_dim],
                          scale=tf.exp(latent_sample_seq[..., latent_dim:]))
    latent_sample_seq = tf.squeeze(latent_dists.sample(
        [1]))  # sample one sample from each distribution
    return latent_dists, latent_sample_seq
Exemple #3
0
 def _sample(self, mu, std_dev):
     """
 Sample from parametrized Gaussian distribution.
 :param mu: Gaussian mean.
 :param std_dev: Standard deviation of the Gaussian.
 :return: Sample z.
 """
     z_dists = Normal(loc=mu, scale=std_dev)
     z = tf.squeeze(z_dists.sample(
         [1]))  # sample one sample from each distribution
     return z
Exemple #4
0
def main_pendulum(logdir,
                  seed,
                  n_iter,
                  gamma,
                  min_timesteps_per_batch,
                  initial_stepsize,
                  desired_kl,
                  vf_type,
                  vf_params,
                  animate=False):
    tf.set_random_seed(seed)
    np.random.seed(seed)
    env = gym.make("Pendulum-v0")
    ob_dim = env.observation_space.shape[0]
    ac_dim = env.action_space.shape[0]
    logz.configure_output_dir(logdir)
    if vf_type == 'linear':
        vf = LinearValueFunction(**vf_params)
    elif vf_type == 'nn':
        vf = NnValueFunction(ob_dim=ob_dim, **vf_params)

    ####
    # YOUR_CODE_HERE

    # batch of observations
    sy_ob_no = tf.placeholder(shape=[None, ob_dim],
                              name="ob",
                              dtype=tf.float32)
    # batch of actions
    sy_ac_n = tf.placeholder(shape=[None], name="ac", dtype=tf.float32)
    # batch of advantage function estimates
    sy_adv_n = tf.placeholder(shape=[None], name="adv", dtype=tf.float32)

    # 2-layer network to learn state from observation
    sy_h1 = lrelu(dense(sy_ob_no, 32, "h1",
                        weight_init=normc_initializer(1.0)))
    sy_h2 = lrelu(dense(sy_h1, 32, "h2", weight_init=normc_initializer(1.0)))
    # Mean control output
    sy_mean_na = dense(sy_h2,
                       ac_dim,
                       "mean",
                       weight_init=normc_initializer(0.1))
    # Variance
    logstd_a = tf.get_variable("logstdev", [ac_dim])

    # define action distribution
    sy_ac_distr = Normal(mu=tf.squeeze(sy_mean_na),
                         sigma=tf.exp(logstd_a),
                         validate_args=True)
    # sampled actions, used for defining the policy
    # (NOT computing the policy gradient)
    sy_sampled_ac = tf.squeeze(sy_ac_distr.sample(sample_shape=[ac_dim]))

    sy_n = tf.shape(sy_ob_no)[0]
    sy_logprob_n = sy_ac_distr.log_pdf(sy_ac_n)

    # used for computing KL and entropy, JUST FOR DIAGNOSTIC PURPOSES
    sy_oldmean_na = tf.placeholder(shape=[None, ac_dim],
                                   name='oldmean',
                                   dtype=tf.float32)
    sy_oldlogstd_a = tf.placeholder(shape=[ac_dim],
                                    name="oldlogstdev",
                                    dtype=tf.float32)
    sy_ac_olddistr = Normal(mu=tf.squeeze(sy_oldmean_na),
                            sigma=tf.exp(sy_oldlogstd_a),
                            validate_args=True)

    sy_kl = tf.reduce_mean(
        tf.contrib.distributions.kl(sy_ac_distr, sy_ac_olddistr))
    sy_ent = tf.reduce_mean(sy_ac_distr.entropy())

    ####

    sy_surr = -tf.reduce_mean(
        sy_adv_n * sy_logprob_n
    )  # Loss function that we'll differentiate to get the policy gradient ("surr" is for "surrogate loss")

    sy_stepsize = tf.placeholder(
        shape=[], dtype=tf.float32
    )  # Symbolic, in case you want to change the stepsize during optimization. (We're not doing that currently)
    update_op = tf.train.AdamOptimizer(sy_stepsize).minimize(sy_surr)

    sess = tf.Session()
    sess.__enter__()  # equivalent to `with sess:`
    tf.global_variables_initializer().run()  #pylint: disable=E1101

    total_timesteps = 0
    stepsize = initial_stepsize

    for i in range(n_iter):
        print("********** Iteration %i ************" % i)

        ####
        # YOUR_CODE_HERE

        # Collect paths until we have enough timesteps
        timesteps_this_batch = 0
        paths = []
        while True:
            ob = env.reset()
            terminated = False
            obs, acs, rewards = [], [], []
            animate_this_episode = (len(paths) == 0 and (i % 10 == 0)
                                    and animate)
            while True:
                if animate_this_episode:
                    env.render()
                obs.append(ob)
                ac = sess.run(sy_sampled_ac, feed_dict={sy_ob_no: ob[None]})
                acs.append(ac)
                ob, rew, done, _ = env.step([ac])
                rewards.append(rew)
                if done:
                    break

            path = {
                "observation": np.array(obs),
                "terminated": terminated,
                "reward": np.array(rewards),
                "action": np.array(acs)
            }
            paths.append(path)
            timesteps_this_batch += pathlength(path)
            if timesteps_this_batch > min_timesteps_per_batch:
                break

        total_timesteps += timesteps_this_batch

        # Estimate advantage function
        vtargs, vpreds, advs = [], [], []
        for path in paths:
            rew_t = path["reward"]
            return_t = discount(rew_t, gamma)
            vpred_t = vf.predict(path["observation"])
            adv_t = return_t - vpred_t
            advs.append(adv_t)
            vtargs.append(return_t)
            vpreds.append(vpred_t)

        # Build arrays for policy update
        ob_no = np.concatenate([path["observation"] for path in paths])
        ac_n = np.concatenate([path["action"] for path in paths])
        adv_n = np.concatenate(advs)
        standardized_adv_n = (adv_n - adv_n.mean()) / (adv_n.std() + 1e-8)
        vtarg_n = np.concatenate(vtargs)
        vpred_n = np.concatenate(vpreds)
        vf.fit(ob_no, vtarg_n)

        # Policy update
        _, oldmean_na, oldlogstdev = sess.run(
            [update_op, sy_mean_na, logstd_a],
            feed_dict={
                sy_ob_no: ob_no,
                sy_ac_n: ac_n,
                sy_adv_n: standardized_adv_n,
                sy_stepsize: stepsize
            })
        kl, ent = sess.run(
            [sy_kl, sy_ent],
            feed_dict={
                sy_ob_no: ob_no,
                sy_oldmean_na: oldmean_na,
                sy_oldlogstd_a: oldlogstdev
            })

        ####

        if kl > desired_kl * 2:
            stepsize /= 1.5
            print('stepsize -> %s' % stepsize)
        elif kl < desired_kl / 2:
            stepsize *= 1.5
            print('stepsize -> %s' % stepsize)
        else:
            print('stepsize OK')

        # Log diagnostics
        logz.log_tabular("EpRewMean",
                         np.mean([path["reward"].sum() for path in paths]))
        logz.log_tabular("EpLenMean",
                         np.mean([pathlength(path) for path in paths]))
        logz.log_tabular("KLOldNew", kl)
        logz.log_tabular("Entropy", ent)
        logz.log_tabular("EVBefore", explained_variance_1d(vpred_n, vtarg_n))
        logz.log_tabular("EVAfter",
                         explained_variance_1d(vf.predict(ob_no), vtarg_n))
        logz.log_tabular("TimestepsSoFar", total_timesteps)
        # If you're overfitting, EVAfter will be way larger than EVBefore.
        # Note that we fit value function AFTER using it to compute the advantage function to avoid introducing bias
        logz.dump_tabular()
with tf.name_scope("cost"):
    #mean_squared_error
    RSEcost = tf.reduce_mean(
        tf.square(y - y_mu))  # use square error for cost function

    #    #negative log-likelihood (same as maximum-likelihood)
    #    y_sigma  = tf.sqrt(tfmixedmodel(Xtf, tf.square(std_encoder1), Ztf, tf.square(std_encoder2)))
    #    NLLcost  = - tf.reduce_sum(-0.5 * tf.log(2. * np.pi) - tf.log(y_sigma)
    #                               -0.5 * tf.square((y - y_mu)/y_sigma))

    #Mean-field Variational inference using ELBO
    p_log_prob = [0.0] * n_samples
    q_log_prob = [0.0] * n_samples
    for s in range(n_samples):
        beta_tf_copy = Normal(loc=beta_mu, scale=std_encoder1)
        beta_sample = beta_tf_copy.sample()
        q_log_prob[s] += tf.reduce_sum(beta_tf.log_prob(beta_sample))
        b_tf_copy = Normal(loc=b_mu, scale=std_encoder2)
        b_sample = b_tf_copy.sample()
        q_log_prob[s] += tf.reduce_sum(b_tf.log_prob(b_sample))

        priormodel = Normal(loc=priormu, scale=priorsigma)
        y_sample = tf.matmul(Xtf, beta_sample) + tf.matmul(Ztf, b_sample)
        p_log_prob[s] += tf.reduce_sum(priormodel.log_prob(beta_sample))
        p_log_prob[s] += tf.reduce_sum(priormodel.log_prob(b_sample))
        modelcopy = Normal(loc=y_sample, scale=priorliksigma)
        p_log_prob[s] += tf.reduce_sum(modelcopy.log_prob(y))

    p_log_prob = tf.stack(p_log_prob)
    q_log_prob = tf.stack(q_log_prob)
    ELBO = -tf.reduce_mean(p_log_prob - q_log_prob)
Exemple #6
0
    def _build_ad_nn(self, tensor_io):
        from drlutils.dataflow.tensor_io import TensorIO
        assert (isinstance(tensor_io, TensorIO))
        from drlutils.model.base import get_current_nn_context
        from tensorpack.tfutils.common import get_global_step_var
        global_step = get_global_step_var()
        nnc = get_current_nn_context()
        is_training = nnc.is_training
        i_state = tensor_io.getInputTensor('state')
        i_agentIdent = tensor_io.getInputTensor('agentIdent')
        i_sequenceLength = tensor_io.getInputTensor('sequenceLength')
        i_resetRNN = tensor_io.getInputTensor('resetRNN')
        l = i_state
        # l = tf.Print(l, [i_state, tf.shape(i_state)], 'State = ')
        # l = tf.Print(l, [i_agentIdent, tf.shape(i_agentIdent)], 'agentIdent = ')
        # l = tf.Print(l, [i_sequenceLength, tf.shape(i_sequenceLength)], 'SeqLen = ')
        # l = tf.Print(l, [i_resetRNN, tf.shape(i_resetRNN)], 'resetRNN = ')
        with tf.variable_scope('critic', reuse=nnc.reuse) as vs:

            def _get_cell():
                cell = tf.nn.rnn_cell.BasicLSTMCell(256)
                # if is_training:
                #     cell = tf.nn.rnn_cell.DropoutWrapper(cell, output_keep_prob=0.9)
                return cell

            cell = tf.nn.rnn_cell.MultiRNNCell([_get_cell() for _ in range(1)])
            rnn_outputs = self._buildRNN(
                l,
                cell,
                tensor_io.batchSize,
                i_agentIdent=i_agentIdent,
                i_sequenceLength=i_sequenceLength,
                i_resetRNN=i_resetRNN,
            )
            rnn_outputs = tf.reshape(
                rnn_outputs, [-1, rnn_outputs.get_shape().as_list()[-1]])
            l = rnn_outputs
            from ad_cur.autodrive.model.selu import fc_selu
            for lidx in range(2):
                l = fc_selu(
                    l,
                    200,
                    keep_prob=1.,  # 由于我们只使用传感器训练,关键信息不能丢
                    is_training=is_training,
                    name='fc-{}'.format(lidx))
            value = tf.layers.dense(l, 1, name='fc-value')
            value = tf.squeeze(value, [1], name="value")
            if not hasattr(self, '_weights_critic'):
                self._weights_critic = tf.get_collection(
                    tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name)

        with tf.variable_scope('actor', reuse=nnc.reuse) as vs:
            l = tf.stop_gradient(l)
            l = tf.layers.dense(l,
                                128,
                                activation=tf.nn.relu6,
                                name='fc-actor')
            mu_steering = 0.5 * tf.layers.dense(
                l, 1, activation=tf.nn.tanh, name='fc-mu-steering')
            mu_accel = tf.layers.dense(l,
                                       1,
                                       activation=tf.nn.tanh,
                                       name='fc-mu-accel')
            mus = tf.concat([mu_steering, mu_accel], axis=-1)

            # mus = tf.layers.dense(l, 2, activation=tf.nn.tanh, name='fc-mus')
            # sigmas = tf.layers.dense(l, 2, activation=tf.nn.softplus, name='fc-sigmas')
            # sigmas = tf.clip_by_value(sigmas, -0.001, 0.5)
            def saturating_sigmoid(x):
                """Saturating sigmoid: 1.2 * sigmoid(x) - 0.1 cut to [0, 1]."""
                with tf.name_scope("saturating_sigmoid", [x]):
                    y = tf.sigmoid(x)
                    return tf.minimum(1.0, tf.maximum(0.0, 1.2 * y - 0.1))

            sigma_steering_ = 0.1 * tf.layers.dense(
                l, 1, activation=tf.nn.sigmoid, name='fc-sigma-steering')
            sigma_accel_ = 0.25 * tf.layers.dense(
                l, 1, activation=tf.nn.sigmoid, name='fc-sigma-accel')

            if not nnc.is_evaluating:
                sigma_beta_steering = tf.get_default_graph(
                ).get_tensor_by_name('actor/sigma_beta_steering:0')
                sigma_beta_accel = tf.get_default_graph().get_tensor_by_name(
                    'actor/sigma_beta_accel:0')
                sigma_beta_steering = tf.constant(1e-4)
                # sigma_beta_steering_exp = tf.train.exponential_decay(0.3, global_step, 1000, 0.5, name='sigma/beta/steering/exp')
                # sigma_beta_accel_exp = tf.train.exponential_decay(0.5, global_step, 5000, 0.5, name='sigma/beta/accel/exp')
            else:
                sigma_beta_steering = tf.constant(1e-4)
                sigma_beta_accel = tf.constant(1e-4)
            sigma_steering = (sigma_steering_ + sigma_beta_steering)
            sigma_accel = (sigma_accel_ + sigma_beta_accel)

            sigmas = tf.concat([sigma_steering, sigma_accel], axis=-1)
            # if is_training:
            #     pass
            #     # 如果不加sigma_beta,收敛会很慢,并且不稳定,猜测可能是以下原因:
            #     #   1、训练前期尽量大的探索可以避免网络陷入局部最优
            #     #   2、前期过小的sigma会使normal_dist的log_prob过大,导致梯度更新过大,网络一开始就畸形了,很难恢复回来
            #
            # if is_training:
            #     sigmas += sigma_beta_steering
            # sigma_steering = tf.clip_by_value(sigma_steering, sigma_beta_steering, 0.5)
            # sigma_accel = tf.clip_by_value(sigma_accel, sigma_beta_accel, 0.5)
            # sigmas = tf.clip_by_value(sigmas, 0.1, 0.5)
            # sigmas_orig = sigmas
            # sigmas = sigmas + sigma_beta_steering
            # sigmas = tf.minimum(sigmas + 0.1, 100)
            # sigmas = tf.clip_by_value(sigmas, sigma_beta_steering, 1)
            # sigma_steering += sigma_beta_steering
            # sigma_accel += sigma_beta_accel

            # mus = tf.concat([mu_steering, mu_accel], axis=-1)

            from tensorflow.contrib.distributions import Normal
            dists = Normal(mus, sigmas + 0.01)
            policy = tf.squeeze(dists.sample([1]), [0])
            # 裁剪到两倍方差之内
            policy = tf.clip_by_value(policy, mus - 2 * sigmas,
                                      mus + 2 * sigmas)
            if is_training:
                self._addMovingSummary(
                    tf.reduce_mean(mu_steering, name='mu/steering/mean'),
                    tf.reduce_mean(mu_accel, name='mu/accel/mean'),
                    tf.reduce_mean(sigma_steering, name='sigma/steering/mean'),
                    tf.reduce_max(sigma_steering, name='sigma/steering/max'),
                    tf.reduce_mean(sigma_accel, name='sigma/accel/mean'),
                    tf.reduce_max(sigma_accel, name='sigma/accel/max'),
                    # sigma_beta_accel,
                    # sigma_beta_steering,
                )
            # actions = tf.Print(actions, [mus, sigmas, tf.concat([sigma_steering_, sigma_accel_], -1), actions],
            #                    'mu/sigma/sigma.orig/act=', summarize=4)
            if not hasattr(self, '_weights_actor'):
                self._weights_actor = tf.get_collection(
                    tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name)
        if not is_training:
            tensor_io.setOutputTensors(policy, value, mus, sigmas)
            return

        i_actions = tensor_io.getInputTensor("action")
        # i_actions = tf.Print(i_actions, [i_actions], 'actions = ')
        i_actions = tf.reshape(i_actions,
                               [-1] + i_actions.get_shape().as_list()[2:])
        log_probs = dists.log_prob(i_actions)
        # exp_v = tf.transpose(
        #     tf.multiply(tf.transpose(log_probs), advantage))
        # exp_v = tf.multiply(log_probs, advantage)
        i_advantage = tensor_io.getInputTensor("advantage")
        i_advantage = tf.reshape(i_advantage,
                                 [-1] + i_advantage.get_shape().as_list()[2:])
        exp_v = log_probs * tf.expand_dims(i_advantage, -1)
        entropy = dists.entropy()
        entropy_beta = tf.get_variable(
            'entropy_beta',
            shape=[],
            initializer=tf.constant_initializer(0.01),
            trainable=False)
        exp_v = entropy_beta * entropy + exp_v
        loss_policy = tf.reduce_mean(-tf.reduce_sum(exp_v, axis=-1),
                                     name='loss/policy')

        i_futurereward = tensor_io.getInputTensor("futurereward")
        i_futurereward = tf.reshape(i_futurereward, [-1] +
                                    i_futurereward.get_shape().as_list()[2:])
        loss_value = tf.reduce_mean(0.5 * tf.square(value - i_futurereward))

        loss_entropy = tf.reduce_mean(tf.reduce_sum(entropy, axis=-1),
                                      name='xentropy_loss')

        from tensorflow.contrib.layers.python.layers.regularizers import apply_regularization, l2_regularizer
        loss_l2_regularizer = apply_regularization(l2_regularizer(1e-4),
                                                   self._weights_critic)
        loss_l2_regularizer = tf.identity(loss_l2_regularizer, 'loss/l2reg')
        loss_value += loss_l2_regularizer
        loss_value = tf.identity(loss_value, name='loss/value')

        # self.cost = tf.add_n([loss_policy, loss_value * 0.1, loss_l2_regularizer])

        self._addParamSummary([('.*', ['rms', 'absmax'])])
        pred_reward = tf.reduce_mean(value, name='predict_reward')
        import tensorpack.tfutils.symbolic_functions as symbf
        advantage = symbf.rms(i_advantage, name='rms_advantage')
        self._addMovingSummary(
            loss_policy,
            loss_value,
            loss_entropy,
            pred_reward,
            advantage,
            loss_l2_regularizer,
            tf.reduce_mean(policy[:, 0], name='actor/steering/mean'),
            tf.reduce_mean(policy[:, 1], name='actor/accel/mean'),
        )
        return loss_policy, loss_value
tf.set_random_seed(1)


def F(x):
    return x**2 - 2 * x + 1


def get_fitness(value):
    return -value


mean = tf.Variable(tf.constant(-30.), dtype=tf.float32)
sigma = tf.Variable(tf.constant(1.), dtype=tf.float32)
N_dist = Normal(loc=mean, scale=sigma)
make_kids = N_dist.sample([POP_SIZE])

tfkids = tf.placeholder(tf.float32, [POP_SIZE, DNA_SIZE])
tfkids_fit = tf.placeholder(tf.float32, [POP_SIZE])
loss = -tf.reduce_mean(N_dist.log_prob(tfkids) * tfkids_fit)
train_op = tf.train.GradientDescentOptimizer(LR).minimize(loss)

x = np.linspace(-70, 70, 100)
plt.plot(x, F(x))
plt.xlim(-70, 70)
plt.ylim(-100, 1000)

sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
Exemple #8
0
    def _get_NN_prediction(self, state):
        from tensorpack.tfutils import symbolic_functions
        ctx = get_current_tower_context()
        is_training = ctx.is_training
        l = state
        # l = tf.Print(l, [state], 'State = ')
        with tf.variable_scope('critic') as vs:

            from autodrive.model.selu import fc_selu
            for lidx in range(8):
                l = fc_selu(l, 200,
                            keep_prob=1., # 由于我们只使用传感器训练,关键信息不能丢
                            is_training=is_training, name='fc-{}'.format(lidx))
            # l = tf.layers.dense(l, 512, activation=tf.nn.relu, name='fc-dense')
            # for lidx, hidden_size in enumerate([300, 600]):
            #     l = tf.layers.dense(l, hidden_size, activation=tf.nn.relu, name='fc-%d'%lidx)
            value = tf.layers.dense(l, 1, name='fc-value',\
            kernel_initializer=tf.truncated_normal_initializer(stddev=0.1))
            if not hasattr(self, '_weights_critic'):
                self._weights_critic = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name)

        with tf.variable_scope('actor') as vs:
            l = tf.stop_gradient(l)
            mu_steering = 0.5 * tf.layers.dense(l, 1, activation=tf.nn.tanh, name='fc-mu-steering',\
            kernel_initializer=tf.truncated_normal_initializer(stddev=0.01))
            mu_accel = tf.layers.dense(l, 1, activation=tf.nn.tanh, name='fc-mu-accel',\
            kernel_initializer=tf.truncated_normal_initializer(stddev=0.01))
            mus = tf.concat([mu_steering, mu_accel], axis=-1)
            # mus = tf.layers.dense(l, 2, activation=tf.nn.tanh, name='fc-mus')
            # sigmas = tf.layers.dense(l, 2, activation=tf.nn.softplus, name='fc-sigmas')
            # sigmas = tf.clip_by_value(sigmas, -0.001, 0.5)
            sigma_steering_ = 0.5 * tf.layers.dense(l, 1, activation=tf.nn.sigmoid, name='fc-sigma-steering',\
            kernel_initializer=tf.truncated_normal_initializer(stddev=0.01))
            sigma_accel_ = 1. * tf.layers.dense(l, 1, activation=tf.nn.sigmoid, name='fc-sigma-accel',\
            kernel_initializer=tf.truncated_normal_initializer(stddev=0.01))
            # sigma_beta_steering = symbolic_functions.get_scalar_var('sigma_beta_steering', 0.3, summary=True, trainable=False)
            # sigma_beta_accel = symbolic_functions.get_scalar_var('sigma_beta_accel', 0.3, summary=True, trainable=False)
            from tensorpack.tfutils.common import get_global_step_var
            sigma_beta_steering_exp = tf.train.exponential_decay(0.001, get_global_step_var(), 1000, 0.5, name='sigma/beta/steering/exp')
            sigma_beta_accel_exp = tf.train.exponential_decay(0.5, get_global_step_var(), 5000, 0.5, name='sigma/beta/accel/exp')
            # sigma_steering = tf.minimum(sigma_steering_ + sigma_beta_steering, 0.5)
            # sigma_accel = tf.minimum(sigma_accel_ + sigma_beta_accel, 0.2)
            # sigma_steering = sigma_steering_
            sigma_steering = (sigma_steering_ + sigma_beta_steering_exp)
            sigma_accel = (sigma_accel_ + sigma_beta_accel_exp) #* 0.1
            # sigma_steering = sigma_steering_
            # sigma_accel = sigma_accel_
            sigmas = tf.concat([sigma_steering, sigma_accel], axis=-1)
            #     sigma_steering = tf.clip_by_value(sigma_steering, 0.1, 0.5)

            #     sigma_accel = tf.clip_by_value(sigma_accel, 0.1, 0.5)

            # sigmas = sigmas_orig + 0.001
            # sigmas = tf.clip_by_value(sigmas, 0.1, 0.5)
            # sigma_beta = tf.get_variable('sigma_beta', shape=[], dtype=tf.float32,
            #                              initializer=tf.constant_initializer(.5), trainable=False)

            # if is_training:
            #     pass
            #     # 如果不加sigma_beta,收敛会很慢,并且不稳定,猜测可能是以下原因:
            #     #   1、训练前期尽量大的探索可以避免网络陷入局部最优
            #     #   2、前期过小的sigma会使normal_dist的log_prob过大,导致梯度更新过大,网络一开始就畸形了,很难恢复回来
            #
            # if is_training:
            #     sigmas += sigma_beta_steering
            # sigma_steering = tf.clip_by_value(sigma_steering, sigma_beta_steering, 0.5)
            # sigma_accel = tf.clip_by_value(sigma_accel, sigma_beta_accel, 0.5)
            # sigmas = tf.clip_by_value(sigmas, 0.1, 0.5)
            # sigmas_orig = sigmas
            # sigmas = sigmas + sigma_beta_steering
            # sigmas = tf.minimum(sigmas + 0.1, 100)
            # sigmas = tf.clip_by_value(sigmas, sigma_beta_steering, 1)
            # sigma_steering += sigma_beta_steering
            # sigma_accel += sigma_beta_accel

            # mus = tf.concat([mu_steering, mu_accel], axis=-1)

            from tensorflow.contrib.distributions import Normal
            dists = Normal(mus, sigmas+1e-3)
            actions = tf.squeeze(dists.sample([1]), [0])
            # 裁剪到一倍方差之内
            # actions = tf.clip_by_value(actions, -1., 1.)
            if is_training:
                summary.add_moving_summary(tf.reduce_mean(mu_steering, name='mu/steering/mean'),
                                           tf.reduce_mean(mu_accel, name='mu/accel/mean'),
                                           tf.reduce_mean(sigma_steering, name='sigma/steering/mean'),
                                           tf.reduce_max(sigma_steering, name='sigma/steering/max'),
                                           tf.reduce_mean(sigma_accel, name='sigma/accel/mean'),
                                           tf.reduce_max(sigma_accel, name='sigma/accel/max'),
                                           sigma_beta_accel_exp,
                                           sigma_beta_steering_exp,
                                           )
            # actions = tf.Print(actions, [mus, sigmas, tf.concat([sigma_steering_, sigma_accel_], -1), actions],
            #                    'mu/sigma/sigma.orig/act=', summarize=4)
            if not hasattr(self, '_weights_actor'):
                self._weights_actor = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name)

        return actions, value, dists
Exemple #9
0
    def _get_NN_prediction(self, state):
        from tensorpack.tfutils import symbolic_functions
        ctx = get_current_tower_context()
        is_training = ctx.is_training
        l = state
        # l = tf.Print(l, [state], 'State = ')
        with tf.variable_scope('critic') as vs:

            from autodrive.model.selu import fc_selu
            for lidx in range(8):
                l = fc_selu(
                    l,
                    200,
                    keep_prob=1.,  # 由于我们只使用传感器训练,关键信息不能丢
                    is_training=is_training,
                    name='fc-{}'.format(lidx))
            # l = tf.layers.dense(l, 512, activation=tf.nn.relu, name='fc-dense')
            # for lidx, hidden_size in enumerate([300, 600]):
            #     l = tf.layers.dense(l, hidden_size, activation=tf.nn.relu, name='fc-%d'%lidx)
            value = tf.layers.dense(l, 1, name='fc-value',\
            kernel_initializer=tf.truncated_normal_initializer(stddev=0.1))
            if not hasattr(self, '_weights_critic'):
                self._weights_critic = tf.get_collection(
                    tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name)

        with tf.variable_scope('actor') as vs:
            l = tf.stop_gradient(l)
            mu_steering = 0.5 * tf.layers.dense(l, 1, activation=tf.nn.tanh, name='fc-mu-steering',\
            kernel_initializer=tf.truncated_normal_initializer(stddev=0.01))
            mu_accel = tf.layers.dense(l, 1, activation=tf.nn.tanh, name='fc-mu-accel',\
            kernel_initializer=tf.truncated_normal_initializer(stddev=0.01))
            mus = tf.concat([mu_steering, mu_accel], axis=-1)
            # mus = tf.layers.dense(l, 2, activation=tf.nn.tanh, name='fc-mus')
            # sigmas = tf.layers.dense(l, 2, activation=tf.nn.softplus, name='fc-sigmas')
            # sigmas = tf.clip_by_value(sigmas, -0.001, 0.5)
            sigma_steering_ = 0.5 * tf.layers.dense(l, 1, activation=tf.nn.sigmoid, name='fc-sigma-steering',\
            kernel_initializer=tf.truncated_normal_initializer(stddev=0.01))
            sigma_accel_ = 1. * tf.layers.dense(l, 1, activation=tf.nn.sigmoid, name='fc-sigma-accel',\
            kernel_initializer=tf.truncated_normal_initializer(stddev=0.01))
            # sigma_beta_steering = symbolic_functions.get_scalar_var('sigma_beta_steering', 0.3, summary=True, trainable=False)
            # sigma_beta_accel = symbolic_functions.get_scalar_var('sigma_beta_accel', 0.3, summary=True, trainable=False)
            from tensorpack.tfutils.common import get_global_step_var
            sigma_beta_steering_exp = tf.train.exponential_decay(
                0.001,
                get_global_step_var(),
                1000,
                0.5,
                name='sigma/beta/steering/exp')
            sigma_beta_accel_exp = tf.train.exponential_decay(
                0.5,
                get_global_step_var(),
                5000,
                0.5,
                name='sigma/beta/accel/exp')
            # sigma_steering = tf.minimum(sigma_steering_ + sigma_beta_steering, 0.5)
            # sigma_accel = tf.minimum(sigma_accel_ + sigma_beta_accel, 0.2)
            # sigma_steering = sigma_steering_
            sigma_steering = (sigma_steering_ + sigma_beta_steering_exp)
            sigma_accel = (sigma_accel_ + sigma_beta_accel_exp)  #* 0.1
            # sigma_steering = sigma_steering_
            # sigma_accel = sigma_accel_
            sigmas = tf.concat([sigma_steering, sigma_accel], axis=-1)
            #     sigma_steering = tf.clip_by_value(sigma_steering, 0.1, 0.5)

            #     sigma_accel = tf.clip_by_value(sigma_accel, 0.1, 0.5)

            # sigmas = sigmas_orig + 0.001
            # sigmas = tf.clip_by_value(sigmas, 0.1, 0.5)
            # sigma_beta = tf.get_variable('sigma_beta', shape=[], dtype=tf.float32,
            #                              initializer=tf.constant_initializer(.5), trainable=False)

            # if is_training:
            #     pass
            #     # 如果不加sigma_beta,收敛会很慢,并且不稳定,猜测可能是以下原因:
            #     #   1、训练前期尽量大的探索可以避免网络陷入局部最优
            #     #   2、前期过小的sigma会使normal_dist的log_prob过大,导致梯度更新过大,网络一开始就畸形了,很难恢复回来
            #
            # if is_training:
            #     sigmas += sigma_beta_steering
            # sigma_steering = tf.clip_by_value(sigma_steering, sigma_beta_steering, 0.5)
            # sigma_accel = tf.clip_by_value(sigma_accel, sigma_beta_accel, 0.5)
            # sigmas = tf.clip_by_value(sigmas, 0.1, 0.5)
            # sigmas_orig = sigmas
            # sigmas = sigmas + sigma_beta_steering
            # sigmas = tf.minimum(sigmas + 0.1, 100)
            # sigmas = tf.clip_by_value(sigmas, sigma_beta_steering, 1)
            # sigma_steering += sigma_beta_steering
            # sigma_accel += sigma_beta_accel

            # mus = tf.concat([mu_steering, mu_accel], axis=-1)

            from tensorflow.contrib.distributions import Normal
            dists = Normal(mus, sigmas + 1e-3)
            actions = tf.squeeze(dists.sample([1]), [0])
            # 裁剪到一倍方差之内
            # actions = tf.clip_by_value(actions, -1., 1.)
            if is_training:
                summary.add_moving_summary(
                    tf.reduce_mean(mu_steering, name='mu/steering/mean'),
                    tf.reduce_mean(mu_accel, name='mu/accel/mean'),
                    tf.reduce_mean(sigma_steering, name='sigma/steering/mean'),
                    tf.reduce_max(sigma_steering, name='sigma/steering/max'),
                    tf.reduce_mean(sigma_accel, name='sigma/accel/mean'),
                    tf.reduce_max(sigma_accel, name='sigma/accel/max'),
                    sigma_beta_accel_exp,
                    sigma_beta_steering_exp,
                )
            # actions = tf.Print(actions, [mus, sigmas, tf.concat([sigma_steering_, sigma_accel_], -1), actions],
            #                    'mu/sigma/sigma.orig/act=', summarize=4)
            if not hasattr(self, '_weights_actor'):
                self._weights_actor = tf.get_collection(
                    tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name)

        return actions, value, dists
Exemple #10
0
class VariationalAutoEncoder(object):
    def __init__(self,
                 feature_size,
                 latent_size,
                 hidden_sizes,
                 reconstruction_distribution=None,
                 number_of_reconstruction_classes=None,
                 use_batch_norm=True,
                 use_count_sum=True,
                 epsilon=1e-6):

        # Setup

        super(VariationalAutoEncoder, self).__init__()

        self.feature_size = feature_size
        self.latent_size = latent_size
        self.hidden_sizes = hidden_sizes

        self.reconstruction_distribution_name = reconstruction_distribution
        self.reconstruction_distribution = distributions[
            reconstruction_distribution]

        self.k_max = number_of_reconstruction_classes

        self.use_batch_norm = use_batch_norm
        self.use_count_sum = use_count_sum

        self.epsilon = epsilon

        # self.graph = tf.Graph()

        self.x = tf.placeholder(tf.float32, [None, self.feature_size],
                                'x')  # counts

        if self.use_count_sum:
            self.n = tf.placeholder(tf.float32, [None, 1],
                                    'N')  # total counts sum

        self.learning_rate = tf.placeholder(tf.float32, [], 'learning_rate')
        self.warm_up_weight = tf.placeholder(tf.float32, [], 'warm_up_weight')

        self.is_training = tf.placeholder(tf.bool, [], 'phase')

        self.inference()
        self.loss()
        self.training()

        self.summary = tf.summary.merge_all()

        for parameter in tf.trainable_variables():
            print(parameter.name, parameter.get_shape())

    @property
    def name(self):

        #model_name = dataSetBaseName(splitting_method, splitting_fraction,
        #filtering_method, feature_selection, feature_size)

        model_name = self.reconstruction_distribution_name.replace(" ", "_")

        # if self.k_max:
        #     model_name += "_c_" + str(self.k_max)

        if self.use_count_sum:
            model_name += "_sum"

        model_name += "_l_" + str(self.latent_size) + "_h_" + "_".join(
            map(str, self.hidden_sizes))

        if self.use_batch_norm:
            model_name += "_bn"

        # model_name += "_lr_{:.1g}".format(self.learning_rate)
        # model_name += "_b_" + str(self.batch_size)
        # model_name += "_wu_" + str(number_of_warm_up_epochs)

        # model_name += "_e_" + str(number_of_epochs)

        return model_name

    def inference(self):

        encoder = self.x

        with tf.variable_scope("ENCODER"):
            for i, hidden_size in enumerate(self.hidden_sizes):
                encoder = dense_layer(inputs=encoder,
                                      num_outputs=hidden_size,
                                      activation_fn=relu,
                                      use_batch_norm=self.use_batch_norm,
                                      is_training=self.is_training,
                                      scope='{:d}'.format(i + 1))

        with tf.variable_scope("Z"):
            z_mu = dense_layer(inputs=encoder,
                               num_outputs=self.latent_size,
                               activation_fn=None,
                               use_batch_norm=False,
                               is_training=self.is_training,
                               scope='MU')

            z_sigma = dense_layer(
                inputs=encoder,
                num_outputs=self.latent_size,
                activation_fn=lambda x: tf.exp(tf.clip_by_value(x, -3, 3)),
                use_batch_norm=False,
                is_training=self.is_training,
                scope='SIGMA')

            self.q_z_given_x = Normal(mu=z_mu, sigma=z_sigma)

            # Mean of z
            self.z_mean = self.q_z_given_x.mean()

            # Stochastic layer
            self.z = self.q_z_given_x.sample()

        # Decoder - Generative model, p(x|z)

        if self.use_count_sum:
            decoder = tf.concat([self.z, self.n], axis=1, name='Z_N')
        else:
            decoder = self.z

        with tf.variable_scope("DECODER"):
            for i, hidden_size in enumerate(reversed(self.hidden_sizes)):
                decoder = dense_layer(
                    inputs=decoder,
                    num_outputs=hidden_size,
                    activation_fn=relu,
                    use_batch_norm=self.use_batch_norm,
                    is_training=self.is_training,
                    scope='{:d}'.format(len(self.hidden_sizes) - i))

        # Reconstruction distribution parameterisation

        with tf.variable_scope("X_TILDE"):

            x_theta = {}

            for parameter in self.reconstruction_distribution["parameters"]:

                parameter_activation_function = \
                    self.reconstruction_distribution["parameters"]\
                    [parameter]["activation function"]
                p_min, p_max = \
                    self.reconstruction_distribution["parameters"]\
                    [parameter]["support"]

                x_theta[parameter] = dense_layer(
                    inputs=decoder,
                    num_outputs=self.feature_size,
                    activation_fn=lambda x: tf.clip_by_value(
                        parameter_activation_function(x), p_min + self.epsilon,
                        p_max - self.epsilon),
                    is_training=self.is_training,
                    scope=parameter.upper())

            self.p_x_given_z = self.reconstruction_distribution["class"](
                x_theta)

            if self.k_max:

                x_logits = dense_layer(inputs=decoder,
                                       num_outputs=self.feature_size *
                                       self.k_max,
                                       activation_fn=None,
                                       is_training=self.is_training,
                                       scope="P_K")

                x_logits = tf.reshape(x_logits,
                                      [-1, self.feature_size, self.k_max])

                self.p_x_given_z = Categorized(
                    dist=self.p_x_given_z, cat=Categorical(logits=x_logits))

            self.x_tilde_mean = self.p_x_given_z.mean()

        # Add histogram summaries for the trainable parameters
        for parameter in tf.trainable_variables():
            tf.summary.histogram(parameter.name, parameter)

    def loss(self):

        # Recognition prior
        p_z_mu = tf.constant(0.0, dtype=tf.float32)
        p_z_sigma = tf.constant(1.0, dtype=tf.float32)
        p_z = Normal(p_z_mu, p_z_sigma)

        # Loss

        ## Reconstruction error
        log_p_x_given_z = tf.reduce_mean(tf.reduce_sum(
            self.p_x_given_z.log_prob(self.x), axis=1),
                                         name='reconstruction_error')
        tf.add_to_collection('losses', log_p_x_given_z)

        ## Regularisation
        KL_qp = tf.reduce_mean(tf.reduce_sum(kl(self.q_z_given_x, p_z),
                                             axis=1),
                               name="kl_divergence")
        tf.add_to_collection('losses', KL_qp)

        # Averaging over samples.
        self.loss_op = tf.subtract(log_p_x_given_z, KL_qp, name='lower_bound')
        tf.add_to_collection('losses', self.loss_op)

        # Add scalar summaries for the losses
        for l in tf.get_collection('losses'):
            tf.summary.scalar(l.op.name, l)

    def training(self):

        # Create the gradient descent optimiser with the given learning rate.
        def setupTraining():

            # Optimizer and training objective of negative loss
            optimiser = tf.train.AdamOptimizer(self.learning_rate)

            # Create a variable to track the global step.
            self.global_step = tf.Variable(0,
                                           name='global_step',
                                           trainable=False)

            # Use the optimiser to apply the gradients that minimize the loss
            # (and also increment the global step counter) as a single training
            # step.
            self.train_op = optimiser.minimize(-self.loss_op,
                                               global_step=self.global_step)

        # Make sure that the updates of the moving_averages in batch_norm
        # layers are performed before the train_step.

        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

        if update_ops:
            updates = tf.group(*update_ops)
            with tf.control_dependencies([updates]):
                setupTraining()
        else:
            setupTraining()

    def train(self,
              train_data,
              valid_data,
              number_of_epochs=50,
              batch_size=100,
              learning_rate=1e-3,
              log_directory=None,
              reset_training=False):

        if self.use_count_sum:
            n_train = train_data.counts.sum(axis=1).reshape(-1, 1)
            n_valid = valid_data.counts.sum(axis=1).reshape(-1, 1)

        if reset_training and os.path.exists(log_directory):
            for f in os.listdir(log_directory):
                os.remove(os.path.join(log_directory, f))
            os.rmdir(log_directory)

        # Train

        M = train_data.number_of_examples

        self.saver = tf.train.Saver()
        checkpoint_file = os.path.join(log_directory, 'model.ckpt')

        with tf.Session() as session:

            summary_writer = tf.summary.FileWriter(log_directory,
                                                   session.graph)

            session.run(tf.global_variables_initializer())

            # Print out the defined graph
            # print("The inference graph:")
            # print(tf.get_default_graph().as_graph_def())

            #train_losses, valid_losses = [], []
            feed_dict_train = {
                self.x: train_data.counts,
                self.is_training: False
            }
            feed_dict_valid = {
                self.x: valid_data.counts,
                self.is_training: False
            }
            if self.use_count_sum:
                feed_dict_train[self.n] = n_train
                feed_dict_valid[self.n] = n_valid

            for epoch in range(number_of_epochs):
                shuffled_indices = numpy.random.permutation(M)
                for i in range(0, M, batch_size):

                    step = session.run(self.global_step)

                    start_time = time()

                    # Feeding in batch to model
                    subset = shuffled_indices[i:(i + batch_size)]
                    batch = train_data.counts[subset]
                    feed_dict_batch = {
                        self.x: batch,
                        self.is_training: True,
                        self.learning_rate: learning_rate
                    }

                    # Adding the sum of counts per cell to the generator after the sample layer.
                    if self.use_count_sum:
                        feed_dict_batch[self.n] = n_train[subset]

                    # Run the stochastic batch training operation.
                    _, batch_loss = session.run([self.train_op, self.loss_op],
                                                feed_dict=feed_dict_batch)

                    # Duration of one training step.
                    duration = time() - start_time

                    # Evaluation printout and TensorBoard summary
                    if step % 10 == 0:
                        print('Step {:d}: loss = {:.2f} ({:.3f} sec)'.format(
                            int(step), batch_loss, duration))
                        summary_str = session.run(self.summary,
                                                  feed_dict=feed_dict_batch)
                        summary_writer.add_summary(summary_str, step)
                        summary_writer.flush()

                # Saving model parameters
                print('Checkpoint reached: Saving model')
                self.saver.save(session, checkpoint_file)
                print('Done saving model')

                # Evaluation
                print('Evaluating epoch {:d}'.format(epoch))

                train_loss = 0
                for i in range(0, M, batch_size):
                    subset = slice(i, (i + batch_size))
                    batch = train_data.counts[subset]
                    feed_dict_batch = {self.x: batch, self.is_training: False}
                    if self.use_count_sum:
                        feed_dict_batch[self.n] = n_train[subset]
                    train_loss += session.run(self.loss_op,
                                              feed_dict=feed_dict_batch)
                train_loss /= M / batch_size
                print('Done evaluating training set')

                valid_loss = 0
                for i in range(0, valid_data.number_of_examples, batch_size):
                    subset = slice(i, (i + batch_size))
                    batch = valid_data.counts[subset]
                    feed_dict_batch = {self.x: batch, self.is_training: False}
                    if self.use_count_sum:
                        feed_dict_batch[self.n] = n_valid[subset]
                    valid_loss += session.run(self.loss_op,
                                              feed_dict=feed_dict_batch)
                valid_loss /= valid_data.number_of_examples / batch_size
                print('Done evaluating validation set')

                print("Epoch %d: ELBO: %g (Train), %g (Valid)" %
                      (epoch + 1, train_loss, valid_loss))

    def evaluate(self, test_set, batch_size=100, log_directory=None):

        checkpoint = tf.train.get_checkpoint_state(log_directory)

        if self.use_count_sum:
            n_test = test_set.counts.sum(axis=1).reshape(-1, 1)

        with tf.Session() as session:

            if checkpoint and checkpoint.model_checkpoint_path:
                self.saver.restore(session, checkpoint.model_checkpoint_path)

            lower_bound_test = 0
            recon_mean_test = numpy.empty(
                [test_set.number_of_examples, test_set.number_of_features])
            z_mu_test = numpy.empty(
                [test_set.number_of_examples, self.latent_size])
            for i in range(0, test_set.number_of_examples, batch_size):
                subset = slice(i, (i + batch_size))
                batch = test_set.counts[subset]
                feed_dict_batch = {self.x: batch, self.is_training: False}
                if self.use_count_sum:
                    feed_dict_batch[self.n] = n_test[subset]
                lower_bound_batch, recon_mean_batch, z_mu_batch = session.run(
                    [self.loss_op, self.x_tilde_mean, self.z_mean],
                    feed_dict=feed_dict_batch)
                lower_bound_test += lower_bound_batch
                recon_mean_test[subset] = recon_mean_batch
                z_mu_test[subset] = z_mu_batch
            lower_bound_test /= test_set.number_of_examples / batch_size

            metrics_test = {"LL_test": lower_bound_test}

            print(metrics_test)

            return recon_mean_test, z_mu_test, metrics_test
Exemple #11
0
def gaussian_layer(x,
                   in_dim,
                   out_dim,
                   scope,
                   activation_fn=tf.nn.relu,
                   reuse=False,
                   use_mean=False,
                   store=False,
                   use_stored=False,
                   prior_stddev=1.0,
                   l2_const=0.0):
    """Single layer of fully-connected units where the weights follow a
    unit gaussian prior, and
    Args:
        x: batch of input
        in_dim: input dimension
        out_dim: output dimension
        scope: tensorflow variable scope name
        activation_fn: activation function
        use_mean: use the mean of approximate posterior, instead of sampling
        closed_form_kl: return closed form kl
    Returns:
        output and kl of the weights for the layer
    """

    prior_var = prior_stddev**2

    with tf.variable_scope(scope, reuse=reuse):

        w_mean = tf.get_variable('w_mean',
                                 shape=[in_dim, out_dim],
                                 initializer=xi())
        w_row = tf.get_variable('w_row',
                                shape=[in_dim, out_dim],
                                initializer=ni(-3.0, 0.1))
        w_stddev = tf.nn.softplus(w_row, name='w_std') + eps
        w_dist = Normal([0.0] * in_dim * out_dim, [1.0] * in_dim * out_dim)
        w_std_sample = tf.reshape(w_dist.sample(), [in_dim, out_dim],
                                  name='w_std_sample')

        # local reparametrization
        w_sample = w_mean + w_std_sample * w_stddev
        b = tf.get_variable('b',
                            shape=[out_dim],
                            dtype=tf.float32,
                            initializer=xi())

        # to store the previous theta value
        w_last = tf.get_variable('w_last',
                                 initializer=tf.zeros([in_dim, out_dim]),
                                 trainable=False)

        if use_mean:
            out = activation_fn(tf.matmul(x, w_mean) + b, name='activation')
            return out, 0.0
        else:
            if store:
                store_op = tf.assign(w_last, w_sample)
                with tf.control_dependencies([store_op]):
                    out = activation_fn(tf.matmul(x, w_sample) + b,
                                        name='activation')
            else:
                if use_stored:
                    out = activation_fn(tf.matmul(x, w_last) + b,
                                        name='activation')
                else:
                    out = activation_fn(tf.matmul(x, w_sample) + b,
                                        name='activation')

            D = in_dim * out_dim
            kl = tf.log(prior_stddev) * D - \
                 tf.reduce_sum(tf.log(w_stddev+eps)) + \
                 0.5*(-D +
                     (tf.reduce_sum(w_stddev**2) +
                      tf.reduce_sum(w_mean**2)) / prior_var)
            return out, kl
Exemple #12
0
    def fit(self,
            data,
            epochs=1000,
            max_seconds=600,
            activation=tf.nn.elu,
            batch_norm_decay=0.9,
            learning_rate=1e-5,
            batch_sz=1024,
            adapt_lr=False,
            print_progress=True,
            show_fig=True):

        os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

        # static features
        X = data['X_train_static_mins']
        N, D = X.shape
        self.X = tf.placeholder(tf.float32, shape=(None, D), name='X')

        # timeseries features
        X_time = data['X_train_time_0']
        T1, N1, D1 = X_time.shape
        assert N == N1
        self.X_time = tf.placeholder(tf.float32,
                                     shape=(T1, None, D1),
                                     name='X_time')
        self.train = tf.placeholder(tf.bool, shape=(), name='train')
        self.rnn_keep_p_encode = tf.placeholder(tf.float32,
                                                shape=(),
                                                name='rnn_keep_p_encode')
        self.rnn_keep_p_decode = tf.placeholder(tf.float32,
                                                shape=(),
                                                name='rnn_keep_p_decode')
        adp_learning_rate = tf.placeholder(tf.float32,
                                           shape=(),
                                           name='adp_learning_rate')

        he_init = variance_scaling_initializer()
        bn_params = {
            'is_training': self.train,
            'decay': batch_norm_decay,
            'updates_collections': None
        }
        latent_size = self.encoder_layer_sizes[-1]

        inputs = self.X
        with tf.variable_scope('static_encoder'):
            for layer_size, keep_p in zip(self.encoder_layer_sizes[:-1],
                                          self.encoder_dropout[:-1]):
                inputs = dropout(inputs, keep_p, is_training=self.train)
                inputs = fully_connected(inputs,
                                         layer_size,
                                         weights_initializer=he_init,
                                         activation_fn=activation,
                                         normalizer_fn=batch_norm,
                                         normalizer_params=bn_params)

        if self.rnn_encoder_layer_sizes:
            with tf.variable_scope('rnn_encoder'):
                rnn_cell = MultiRNNCell([
                    LayerNormBasicLSTMCell(
                        s,
                        activation=tf.tanh,
                        dropout_keep_prob=self.rnn_encoder_dropout)
                    for s in self.rnn_encoder_layer_sizes
                ])
                time_inputs, states = tf.nn.dynamic_rnn(rnn_cell,
                                                        self.X_time,
                                                        swap_memory=True,
                                                        time_major=True,
                                                        dtype=tf.float32)
                time_inputs = tf.transpose(time_inputs, perm=(1, 0, 2))
                time_inputs = tf.reshape(
                    time_inputs,
                    shape=(-1, self.rnn_encoder_layer_sizes[-1] * T1))

            inputs = tf.concat([inputs, time_inputs], axis=1)

        with tf.variable_scope('latent_space'):
            inputs = dropout(inputs,
                             self.encoder_dropout[-1],
                             is_training=self.train)
            loc = fully_connected(inputs,
                                  latent_size,
                                  weights_initializer=he_init,
                                  activation_fn=None,
                                  normalizer_fn=batch_norm,
                                  normalizer_params=bn_params)
            scale = fully_connected(inputs,
                                    latent_size,
                                    weights_initializer=he_init,
                                    activation_fn=tf.nn.softplus,
                                    normalizer_fn=batch_norm,
                                    normalizer_params=bn_params)

            standard_normal = Normal(loc=np.zeros(latent_size,
                                                  dtype=np.float32),
                                     scale=np.ones(latent_size,
                                                   dtype=np.float32))
            e = standard_normal.sample(tf.shape(loc)[0])
            outputs = e * scale + loc

            static_output_size = self.decoder_layer_sizes[0]
            if self.rnn_decoder_layer_sizes:
                time_output_size = self.rnn_decoder_layer_sizes[0] * T1
                output_size = static_output_size + time_output_size
            else:
                output_size = static_output_size
            outputs = fully_connected(outputs,
                                      output_size,
                                      weights_initializer=he_init,
                                      activation_fn=activation,
                                      normalizer_fn=batch_norm,
                                      normalizer_params=bn_params)
            if self.rnn_decoder_layer_sizes:
                outputs, time_outputs = tf.split(
                    outputs, [static_output_size, time_output_size], axis=1)

        with tf.variable_scope('static_decoder'):
            for layer_size, keep_p in zip(self.decoder_layer_sizes,
                                          self.decoder_dropout[:-1]):
                outputs = dropout(outputs, keep_p, is_training=self.train)
                outputs = fully_connected(outputs,
                                          layer_size,
                                          weights_initializer=he_init,
                                          activation_fn=activation,
                                          normalizer_fn=batch_norm,
                                          normalizer_params=bn_params)
            outputs = dropout(outputs,
                              self.decoder_dropout[-1],
                              is_training=self.train)
            outputs = fully_connected(outputs,
                                      D,
                                      weights_initializer=he_init,
                                      activation_fn=None,
                                      normalizer_fn=batch_norm,
                                      normalizer_params=bn_params)

            X_hat = Bernoulli(logits=outputs)
            self.posterior_predictive = X_hat.sample()
            self.posterior_predictive_probs = tf.nn.sigmoid(outputs)

        if self.rnn_decoder_layer_sizes:
            with tf.variable_scope('rnn_decoder'):
                self.rnn_decoder_layer_sizes.append(D1)
                time_output_size = self.rnn_decoder_layer_sizes[0]
                time_outputs = tf.reshape(time_outputs,
                                          shape=(-1, T1, time_output_size))
                time_outputs = tf.transpose(time_outputs, perm=(1, 0, 2))
                rnn_cell = MultiRNNCell([
                    LayerNormBasicLSTMCell(
                        s,
                        activation=tf.tanh,
                        dropout_keep_prob=self.rnn_decoder_dropout)
                    for s in self.rnn_decoder_layer_sizes
                ])
                time_outputs, states = tf.nn.dynamic_rnn(rnn_cell,
                                                         time_outputs,
                                                         swap_memory=True,
                                                         time_major=True,
                                                         dtype=tf.float32)
                time_outputs = tf.transpose(time_outputs, perm=(1, 0, 2))
                time_outputs = tf.reshape(time_outputs, shape=(-1, T1 * D1))
                X_hat_time = Bernoulli(logits=time_outputs)
                posterior_predictive_time = X_hat_time.sample()
                posterior_predictive_time = tf.reshape(
                    posterior_predictive_time, shape=(-1, T1, D1))
                self.posterior_predictive_time = tf.transpose(
                    posterior_predictive_time, perm=(1, 0, 2))
                self.posterior_predictive_probs_time = tf.nn.sigmoid(
                    time_outputs)

        kl_div = -tf.log(scale) + 0.5 * (scale**2 + loc**2) - 0.5
        kl_div = tf.reduce_sum(kl_div, axis=1)

        expected_log_likelihood = tf.reduce_sum(X_hat.log_prob(self.X), axis=1)
        X_time_trans = tf.transpose(self.X_time, perm=(1, 0, 2))
        X_time_reshape = tf.reshape(X_time_trans, shape=(-1, T1 * D1))
        if self.rnn_encoder_layer_sizes:
            expected_log_likelihood_time = tf.reduce_sum(
                X_hat_time.log_prob(X_time_reshape), axis=1)
            elbo = -tf.reduce_sum(expected_log_likelihood +
                                  expected_log_likelihood_time - kl_div)
        else:
            elbo = -tf.reduce_sum(expected_log_likelihood - kl_div)
        train_op = tf.train.AdamOptimizer(
            learning_rate=adp_learning_rate).minimize(elbo)

        tf.summary.scalar('elbo', elbo)
        if self.save_file:
            saver = tf.train.Saver()

        if self.tensorboard:
            for v in tf.trainable_variables():
                tf.summary.histogram(v.name, v)
            train_merge = tf.summary.merge_all()
            writer = tf.summary.FileWriter(self.tensorboard)

        self.init_op = tf.global_variables_initializer()
        n = 0
        n_batches = N // batch_sz
        costs = list()
        min_cost = np.inf

        t0 = dt.now()
        with tf.Session() as sess:
            sess.run(self.init_op)
            for epoch in range(epochs):
                idxs = shuffle(range(N))
                X_train = X[idxs]
                X_train_time = X_time[:, idxs]

                for batch in range(n_batches):
                    n += 1
                    X_batch = X_train[batch * batch_sz:(batch + 1) * batch_sz]
                    X_batch_time = X_train_time[:,
                                                batch * batch_sz:(batch + 1) *
                                                batch_sz]

                    sess.run(train_op,
                             feed_dict={
                                 self.X: X_batch,
                                 self.X_time: X_batch_time,
                                 self.rnn_keep_p_encode:
                                 self.rnn_encoder_dropout,
                                 self.rnn_keep_p_decode:
                                 self.rnn_decoder_dropout,
                                 self.train: True,
                                 adp_learning_rate: learning_rate
                             })
                    if n % 100 == 0 and print_progress:
                        cost = sess.run(elbo,
                                        feed_dict={
                                            self.X: X,
                                            self.X_time: X_time,
                                            self.rnn_keep_p_encode: 1.0,
                                            self.rnn_keep_p_decode: 1.0,
                                            self.train: False
                                        })
                        cost /= N
                        costs.append(cost)

                        if adapt_lr and epoch > 0:
                            if cost < min_cost:
                                min_cost = cost
                            elif cost > min_cost * 1.01:
                                learning_rate *= 0.75
                                if print_progress:
                                    print('Updating Learning Rate',
                                          learning_rate)

                        print('Epoch:', epoch, 'Batch:', batch, 'Cost:', cost)

                        if self.tensorboard:
                            train_sum = sess.run(train_merge,
                                                 feed_dict={
                                                     self.X: X,
                                                     self.X_time: X_time,
                                                     self.rnn_keep_p_encode:
                                                     1.0,
                                                     self.rnn_keep_p_decode:
                                                     1.0,
                                                     self.train: False
                                                 })
                            writer.add_summary(train_sum, n)

                seconds = (dt.now() - t0).seconds
                if seconds > max_seconds:
                    if print_progress:
                        print('Breaking after', seconds, 'seconds')
                    break

            if self.save_file:
                saver.save(sess, self.save_file)

            if self.tensorboard:
                writer.add_graph(sess.graph)

        if show_fig:
            plt.plot(costs)
            plt.title('Costs and Scores')
            plt.show()
Exemple #13
0
    def step(self,
             time,
             inputs,
             input_latent_sample,
             states,
             use_inference,
             name=None):
        """Perform a decoding step.
    Args:
      time: scalar `int32`.
      inputs: A (structure of) input tensors.
      input_latent_sample: Can override sampling of new latent.
      states: A (structure of) state tensors and TensorArrays.
      use_inference: If True overrides checks for inference or prior network usage and
          always uses inference network.
      name: Name scope for any created operations.
    Returns:
      `output_frame, inference_dist, prior_dist, states`.
    """
        cell_outputs, cell_states = dict(), dict()
        if self._prev_inputs is None:
            raise ValueError("Need previous input for VariationalDecoder!")

        with ops.name_scope(name, "VariationalDecoderStep",
                            (time, inputs, states)):

            if input_latent_sample is None:
                # predict inference distribution from current frame if any
                if inputs is not None:
                    cell_outputs['inference'], cell_states['inference'] = \
                      self._cells['inference'](self._maybe_encode_inputs(inputs), states['inference'])
                else:
                    cell_outputs['inference'], cell_states[
                        'inference'] = None, None

                # predict learned prior from previous frame
                if not self._fixed_prior:
                    cell_outputs['prior'], cell_states['prior'] = \
                      self._cells['prior'](self._maybe_encode_inputs(self._prev_inputs), states['prior'])
                else:
                    means = tf.zeros([self._batch_size, self._sample_dim])
                    log_std_dev = tf.log(
                        tf.constant(1.0,
                                    shape=[self._batch_size,
                                           self._sample_dim]))
                    cell_outputs['prior'] = tf.concat([means, log_std_dev],
                                                      axis=1)

                # sample from inference or prior distribution
                if use_inference:
                    means = cell_outputs['inference'][..., :self._sample_dim]
                    std_dev = tf.exp(
                        cell_outputs['inference'][..., self._sample_dim:])
                else:
                    means = cell_outputs['prior'][..., :self._sample_dim]
                    std_dev = tf.exp(cell_outputs['prior'][...,
                                                           self._sample_dim:])

                z_dists = Normal(loc=means, scale=std_dev)
                z_sample = tf.squeeze(z_dists.sample(
                    [1]))  # sample one sample from each distribution
                if tf.flags.FLAGS.trajectory_space and not tf.flags.FLAGS.trajectory_autoencoding:
                    z_sample = tf.concat([
                        z_sample,
                        tf.zeros(z_sample.get_shape().as_list()[:-1] + [1],
                                 dtype=tf.float32)
                    ],
                                         axis=-1)
            else:
                z_sample = input_latent_sample
                cell_outputs['inference'] = None
                cell_outputs['prior'] = None

            # reconstruct output with LSTM and decoder
            if self._use_cdna_model:
                decoder_input = [
                    self._prev_inputs, self._first_image, z_sample,
                    self._is_training
                ]
            else:
                decoder_input = tf.concat((self._prev_inputs, z_sample),
                                          axis=-1)
            cell_outputs['output'], cell_states['output'] = \
              self._cells['output'](decoder_input, states['output'])
            if self._output_layer is not None:
                cell_outputs['output'] = self._output_layer(
                    cell_outputs['output'])

        return cell_outputs['output'], cell_outputs['inference'], \
               cell_outputs['prior'], cell_states, z_sample