Esempio n. 1
0
    def update_inference(self):
        self.save_params()

        if self.bayes:
            pqz = Normal(loc=0., scale=self.prior_std)
            self.inference.latent_vars[self.scope] = {pqz: self.qz}
            for w, b, qw, qb in zip(self.prev_eW, self.prev_eB, self.eW,
                                    self.eB):
                self.inference.latent_vars[self.scope].update({w: qw, b: qb})

            for w, b, qw, qb in zip(self.prev_dW, self.prev_dB, self.dW,
                                    self.dB):
                self.inference.latent_vars[self.scope].update({w: qw, b: qb})

            self.inference.latent_vars[self.scope].update({
                self.prev_sigma_w:
                self.sigma_w,
                self.prev_sigma_b:
                self.sigma_b
            })

        else:
            pqz = Normal(loc=self.prev_eH[-1],
                         scale=tf.maximum(self.prev_z_sigma, 1e-4))
            self.inference.latent_vars[self.scope] = {pqz: self.qz}
        self.inference.reinitialize(self)
	def prepare_loss(self, global_step):
		self.global_step = global_step
		print( "Preparing loss {}".format(self.id) )
		self.state_value_batch = self.critic_batch
		# [Policy distribution]
		old_policy_distributions = []
		new_policy_distributions = []
		policy_loss_builder = []
		for h,policy_head in enumerate(self.policy_heads):
			if is_continuous_control(policy_head['depth']):
				# Old policy
				old_policy_batch = tf.transpose(self.old_policy_batch[h], [1, 0, 2])
				old_policy_distributions.append( Normal(old_policy_batch[0], old_policy_batch[1]) )
				# New policy
				new_policy_batch = tf.transpose(self.actor_batch[h], [1, 0, 2])
				new_policy_distributions.append( Normal(new_policy_batch[0], new_policy_batch[1]) )
			else: # discrete control
				old_policy_distributions.append( Categorical(self.old_policy_batch[h]) ) # Old policy
				new_policy_distributions.append( Categorical(self.actor_batch[h]) ) # New policy
			builder = self._get_policy_loss_builder(new_policy_distributions[h], old_policy_distributions[h], self.old_action_batch[h], self.old_action_mask_batch[h] if self.has_masked_actions else None)
			policy_loss_builder.append(builder)
		# [Actor loss]
		self.policy_loss = sum(self._get_policy_loss(b) for b in policy_loss_builder)
		# [Debug variables]
		self.policy_kl_divergence = sum(b.approximate_kullback_leibler_divergence() for b in policy_loss_builder)
		self.policy_clipping_frequency = sum(b.get_clipping_frequency() for b in policy_loss_builder)/len(policy_loss_builder) # take average because clipping frequency must be in [0,1]
		self.policy_entropy_regularization = sum(b.get_entropy_regularization() for b in policy_loss_builder)
		# [Critic loss]
		value_loss_builder = self._get_value_loss_builder()
		self.value_loss = self._get_value_loss(value_loss_builder)
		# [Entropy regularization]
		if flags.entropy_regularization:
			self.policy_loss += -self.policy_entropy_regularization
		# [Constraining Replay]
		if self.constrain_replay:
			constrain_loss = sum(
				0.5*builder.reduce_function(tf.squared_difference(new_distribution.mean(), tf.stop_gradient(old_action))) 
				for builder, new_distribution, old_action in zip(policy_loss_builder, new_policy_distributions, self.old_action_batch)
			)
			self.policy_loss += tf.cond(
				pred=self.is_replayed_batch[0], 
				true_fn=lambda: constrain_loss,
				false_fn=lambda: tf.constant(0., dtype=self.parameters_type)
			)
		# [Total loss]
		self.total_loss = self.policy_loss + self.value_loss
		if flags.intrinsic_reward:
			self.total_loss += self.intrinsic_reward_loss
Esempio n. 3
0
 def prepare_loss(self):
     with tf.device(self.device):
         print("    [{}]Preparing loss".format(self.id))
         # [Policy distribution]
         if self.is_continuous_control():
             # Old policy
             old_policy_batch = tf.transpose(self.old_policy_batch,
                                             [1, 0, 2])
             old_policy_distributions = Normal(old_policy_batch[0],
                                               old_policy_batch[1])
             # New policy
             new_policy_batch = tf.transpose(self.policy_batch, [1, 0, 2])
             new_policy_distributions = Normal(new_policy_batch[0],
                                               new_policy_batch[1])
         else:  # discrete control
             old_policy_distributions = Categorical(
                 self.old_policy_batch)  # Old policy
             new_policy_distributions = Categorical(
                 self.policy_batch)  # New policy
         # [Actor loss]
         policy_loss_builder = PolicyLoss(
             cliprange=self.clip,
             cross_entropy=new_policy_distributions.cross_entropy(
                 self.old_action_batch),
             old_cross_entropy=old_policy_distributions.cross_entropy(
                 self.old_action_batch),
             advantage=self.advantage_batch,
             # entropy=self.fentropy,
             entropy=new_policy_distributions.entropy(),
             beta=self.beta)
         self.policy_loss = policy_loss_builder.get()
         # [Critic loss]
         value_loss_builder = ValueLoss(cliprange=self.clip,
                                        value=self.value_batch,
                                        old_value=self.old_value_batch,
                                        reward=self.cumulative_reward_batch)
         self.value_loss = flags.value_coefficient * value_loss_builder.get(
         )  # usually critic has lower learning rate
         # [Extra loss]
         self.extra_loss = tf.constant(0.)
         if self.predict_reward:
             self.extra_loss += self._reward_prediction_loss()
         # [Debug variables]
         self.policy_kl_divergence = policy_loss_builder.approximate_kullback_leibler_divergence(
         )
         self.policy_clipping_frequency = policy_loss_builder.get_clipping_frequency(
         )
         self.policy_entropy_contribution = policy_loss_builder.get_entropy_contribution(
         )
         self.total_loss = self.policy_loss + self.value_loss + self.extra_loss
Esempio n. 4
0
def gen_stein_coreset(init,core_y_data,qW,qB,n_samples,ac_fn,conv_W=None,LR=False,noise_std=0.001,sess=None):
    stein_core_x = tf.get_variable('stein_cx',initializer=init.astype(np.float32),dtype=tf.float32)
    print('gen stein coreset')
    if LR:
        stein_core_y = Normal(loc=tf.matmul(stein_core_x,qW)+qB,scale=noise_std)
    elif conv_W is not None:
        ## to do: change to general function ##
        h = forward_cifar_model(stein_core_x,conv_W)
        stein_core_y = forward_nets(qW,qB,h,ac_fn=ac_fn,bayes=True,num_samples=10)[-1]
    else:
        stein_core_y = forward_nets(qW,qB,stein_core_x,ac_fn=ac_fn,bayes=True,num_samples=10)[-1]
    lnp = tf.reduce_mean(stein_core_y.log_prob(core_y_data),axis=0)
    dlnp = tf.gradients(lnp,stein_core_x)
    svgd = SVGD()
    #print('shape check',stein_core_x.shape)
    core_sgrad = svgd.gradients(stein_core_x,dlnp[0])

    return stein_core_x,stein_core_y,core_sgrad
Esempio n. 5
0
 def get_feature_entropy(
         self,
         input,
         scope,
         name=""
 ):  # feature entropy measures how much the input is uncommon
     with tf.device(self.device):
         batch_norm, _ = self._batch_norm_layer(input=input,
                                                scope=scope,
                                                name=name,
                                                share_trainables=False)
         fentropy = Normal(
             batch_norm.moving_mean,
             tf.sqrt(batch_norm.moving_variance)).cross_entropy(input)
         fentropy = tf.layers.flatten(fentropy)
         if len(fentropy.get_shape()) > 1:
             fentropy = tf.reduce_mean(fentropy, axis=-1)
         return fentropy
 def reward(i):
     global node_types
     sigma_val = {
         'V1': 5,
         'V2': 10,
         'V3': 20,
         'V4': 40,
         'G1': 100,
         'G2': 120,
         'G3': 140,
         'G4': 160,
         'G5': 180
     }
     return Normal(mu=0, sigma=sigma_val[node_types[i]])
Esempio n. 7
0
def create_env_structure(leaves):
    sigma_val = {'V1': 5, 'V2': 10, 'V3': 20, 'V4': 40, 'G1': 100, 'G2': 120, 'G3': 140, 'G4': 160, 'G5': 180}
    goal_structure = ['G1', 'G2', 'G3', 'G4', 'G5']
    x = 17
    TREE = [[(i*x)+1 for i in range(leaves)]]
    dist = ['V1']
    for i in range(leaves):
        TREE += [[(i*x)+2, (i*x)+8, (i*x)+13], [(i*x)+3, (i*x)+7], [(i*x)+4], [], [(i*x)+4], [(i*x)+4], [(i*x)+5, (i*x)+6], [(i*x)+9, (i*x)+12], [(i*x)+4], [(i*x)+4], [(i*x)+4], [(i*x)+10, (i*x)+11], [(i*x)+14, (i*x)+17], [(i*x)+4], [(i*x)+4], [(i*x)+4], [(i*x)+15, (i*x)+16]]

        g = goal_structure.pop(0)
        goal_structure.append(g)
        dist += ['V1', 'V2', 'V3', g, 'V4', 'V4', 'V3', 'V2', 'V3', 'V4', 'V4', 'V3', 'V2', 'V3', 'V4', 'V4', 'V3']
    INIT = tuple([Normal(mu=0, sigma=sigma_val[d]) for d in dist])
    return TREE, INIT
Esempio n. 8
0
 def sample_actions(self):
     with tf.device(self.device):
         if self.is_continuous_control():
             new_policy_batch = tf.transpose(self.policy_batch, [1, 0, 2])
             sample_batch = Normal(new_policy_batch[0],
                                   new_policy_batch[1]).sample()
             action_batch = tf.clip_by_value(
                 sample_batch, -1, 1
             )  # Sample action batch in forward direction, use old action in backward direction
         else:  # discrete control
             action_batch = Categorical(self.policy_batch).sample(
             )  # Sample action batch in forward direction, use old action in backward direction
         # Give self esplicative name to output for easily retrieving it in frozen graph
         tf.identity(action_batch, name="action")
         return action_batch
	def sample_actions(self):
		action_batch = []
		hot_action_batch = []
		for h,actor_head in enumerate(self.actor_batch):
			if is_continuous_control(self.policy_heads[h]['depth']):
				new_policy_batch = tf.transpose(actor_head, [1, 0, 2])
				sample_batch = Normal(new_policy_batch[0], new_policy_batch[1]).sample()
				action = tf.clip_by_value(sample_batch, -1,1)
				action_batch.append(action) # Sample action batch in forward direction, use old action in backward direction
				hot_action_batch.append(action)
			else: # discrete control
				distribution = Categorical(actor_head)
				action = distribution.sample(one_hot=False) # Sample action batch in forward direction, use old action in backward direction
				action_batch.append(action)
				hot_action_batch.append(distribution.get_sample_one_hot(action))
		# Give self esplicative name to output for easily retrieving it in frozen graph
		# tf.identity(action_batch, name="action")
		return action_batch, hot_action_batch
    def get_goal_reward(self, goal):
        """
        Returns the reward distribution for a goal state taking the best path to the goal into account.
        """

        max_path_reward = self.get_max_path_reward(goal)
        # Update the distribution by the value along the path
        goal_state = self._state[goal]
        if hasattr(goal_state, "sample"):
            if hasattr(goal_state, "mu") and hasattr(goal_state, "sigma"):
                return Normal(goal_state.mu + max_path_reward,
                              goal_state.sigma)
            elif hasattr(goal_state, "vals") and hasattr(goal_state, "probs"):
                vals = tuple(
                    [value + max_path_reward for value in goal_state.vals])
                return Categorical(vals, goal_state.probs)
            else:
                print(f"Type {type(goal_state)} not supported.")
                raise NotImplementedError()
        else:
            return goal_state + max_path_reward
    def __init__(self,
                 ob_readings: dict,
                 wound: Wound,
                 priors: list = None,
                 t_units='minutes'):
        """
        Perform inference on observed bias readings to infer the posterior distribution over the
        attractant dynamics parameters {q, D, τ, R0, κ, m, b0}.

        A dictionary specifying the observed bias readings must be provided, along with a certain
        instantiated wound (which can be a PointWound, a CellsOnWoundMargin or CellsInsideWound) .

        The observed bias readings should be a dictionary with elements of the following form:

        {(r1, t1): (mu1, sig1), (r2, t2): (mu2, sig2) ... }

        r and t specify the spatial and temporal location where the observed bias has been measured,
        (this could be the mid-point of their respective bins), and mu and sig represent the mean and
        standard deviation of the posterior of the observed bias at this location.

        DISTANCES SHOULD BE MEASURED IN MICRONS

        time can be measured in minutes or seconds: specify this with the t_units argument.

        The parameters are measured in the following units:

        q:      Mmol / min
        D:      µm^2 / min
        τ:      min
        R0:     Mmol / µm^2
        kappa:  Mmol / µm^2
        m:      µm^2 / Mmol
        b0:     unitless


        Parameters
        ----------
        ob_readings     The observed bias readings
        wound           A Wound class, which the observed bias is assumed to be generated from
        priors          A list of distributions, one element per parameter, specifying the priors
        t_units         The units which time is measured in, in the ob_readings dictionary keys
        """

        super().__init__()

        self.wound = wound

        assert t_units in [
            'seconds', 'minutes'
        ], 't_units must be either "seconds" or "minutes" but it is {}'.format(
            t_units)

        # the total number of readings
        self.TS = len(ob_readings)

        # extract a list of rs, ts, mus and sigs
        self.r = np.array([r for r, t in ob_readings.keys()])
        self.t = np.array([t for r, t in ob_readings.keys()])
        mus = np.array([mu for mu, sig in ob_readings.values()])
        sigs = np.array([sig for mu, sig in ob_readings.values()])

        # convert to minutes
        if t_units is 'seconds':
            self.t /= 60

        # this is our multivariate Gaussian observed bias distribution
        self.ob_dists = multivariate_normal(mus, sigs**2)

        # these are the default priors
        if priors is None:
            self.priors = [
                Normal(5 * 60, 4 * 60),
                Normal(400, 300),
                Normal(60, 16),
                Normal(0.3, 0.2),
                Normal(0.1, 0.2),
                Normal(4, 4),
                Normal(0.001, 0.0005)
            ]
        else:
            assert isinstance(priors, list)
            assert len(priors) == 7
            self.priors = priors