def env_init(): global local_observation, this_reward_observation, arms, numarms local_observation = np.zeros(1) arms = np.zeros(numarms) for i in range(numarms): arms[i] = randn(0.0, 0.5) this_reward_observation = (0.0, local_observation, False)
def _test(self, model): np.random.seed(0) logger.info("Starting test gradient for model %s" % model) np.random.seed(1) x = np.round(randn(1), 1) grad = model.d_log_likelihood(x) grad_approx = model.d_log_likelihood_approx(x) np.testing.assert_allclose(grad, grad_approx, 0.01)
def env_init(): global local_observation, this_reward_observation, bandit_action_values local_observation = np.zeros(1) this_reward_observation = (0.0, local_observation, False) #Create the bandit problem for the current run bandit_action_values = [randn(0.0, 1.0) for action in range(10)]
def __init__(self, num_tags): super(CRF, self).__init__() self.num_tags = num_tags # matrix of transition scores from j to i self.transition = nn.Parameter(randn(num_tags, num_tags)) self.transition.data[START_TAG_IDX, :] = -10000. # no transition to START self.transition.data[:, STOP_TAG_IDX] = -10000. # no transition from END except to PAD self.transition.data[:, PAD_IDX] = -10000. # no transition from PAD except to PAD self.transition.data[PAD_IDX, :] = -10000. # no transition to PAD except from END self.transition.data[PAD_IDX, STOP_TAG_IDX] = 0. self.transition.data[PAD_IDX, PAD_IDX] = 0.
def env_step(this_action): # returns (floating point, NumPy array, Boolean), this_action: NumPy array global local_observation, this_reward_observation, arms#, nStatesSimpleEnv episode_over = False atp1 = this_action[0] # how to extact action stp1 = randInRange(nStatesSimpleEnv) # state transitions are uniform random the_reward = randn(0.0, 1.0) + arms[int(atp1)] # rewards drawn from (0, 1) Gaussian #if rand_un() < 0.05: # episode_over = True # termination is random local_observation[0] = stp1 this_reward_observation = (the_reward, this_reward_observation[1], episode_over) return this_reward_observation
def __init__(self, num_tags): super().__init__() self.num_tags = num_tags # matrix of transition scores from j to i self.trans = nn.Parameter(randn(num_tags, num_tags)) self.trans.data[SOS_IDX, :] = -10000. # no transition to SOS self.trans.data[:, EOS_IDX] = -10000. # no transition from EOS except to PAD self.trans.data[:, PAD_IDX] = -10000. # no transition from PAD except to PAD self.trans.data[ PAD_IDX, :] = -10000. # no transition to PAD except from EOS self.trans.data[PAD_IDX, EOS_IDX] = 0. self.trans.data[PAD_IDX, PAD_IDX] = 0.
def env_step( this_action ): # returns (floating point, NumPy array, Boolean), this_action: NumPy array global local_observation, this_reward_observation #, nStatesSimpleEnv episode_over = False #Get a reward from the current action reward distribution atp1 = int(this_action[0]) # how to extact action the_reward = randn(bandit_action_values[atp1], 1.0) # rewards drawn from (q*, 1) Gaussian stp1 = randInRange( nStatesSimpleEnv) # state transitions are uniform random ######### local_observation[0] = stp1 this_reward_observation = (the_reward, this_reward_observation[1], episode_over) return this_reward_observation
def random(self, n=1): mu, sigma = self.params["mu"], self.params["sigma"] return mu + randn(n) * sigma
def random(self, n=1): mu, sigma = self.params["mu"], self.params["sigma"] n_gaussian = np.random.randint(len(mu), size=n) mu0, sigma0 = mu[n_gaussian], sigma[n_gaussian] return mu0 + randn(n) * sigma0
def random(self, n=1): mu, sigma = self.params["mu"], self.params["sigma"] n_gaussian = np.random.randint(2, size=n) return (1 - 2 * n_gaussian) * mu + randn(n) * sigma