def env_init():
    global local_observation, this_reward_observation, arms, numarms
    local_observation = np.zeros(1)
    arms = np.zeros(numarms)
    for i in range(numarms):
        arms[i] = randn(0.0, 0.5)
    this_reward_observation = (0.0, local_observation, False)
Beispiel #2
0
 def _test(self, model):
     np.random.seed(0)
     logger.info("Starting test gradient for model %s" % model)
     np.random.seed(1)
     x = np.round(randn(1), 1)
     grad = model.d_log_likelihood(x)
     grad_approx = model.d_log_likelihood_approx(x)
     np.testing.assert_allclose(grad, grad_approx, 0.01)
Beispiel #3
0
def env_init():
    global local_observation, this_reward_observation, bandit_action_values
    local_observation = np.zeros(1)

    this_reward_observation = (0.0, local_observation, False)

    #Create the bandit problem for the current run
    bandit_action_values = [randn(0.0, 1.0) for action in range(10)]
 def _test(self, model):
     np.random.seed(0)
     logger.info("Starting test gradient for model %s" % model)
     np.random.seed(1)
     x = np.round(randn(1), 1)
     grad = model.d_log_likelihood(x)
     grad_approx = model.d_log_likelihood_approx(x)
     np.testing.assert_allclose(grad, grad_approx, 0.01)
Beispiel #5
0
    def __init__(self, num_tags):
        super(CRF, self).__init__()
        self.num_tags = num_tags

        # matrix of transition scores from j to i
        self.transition = nn.Parameter(randn(num_tags, num_tags))
        self.transition.data[START_TAG_IDX, :] = -10000.  # no transition to START
        self.transition.data[:, STOP_TAG_IDX] = -10000.  # no transition from END except to PAD
        self.transition.data[:, PAD_IDX] = -10000.  # no transition from PAD except to PAD
        self.transition.data[PAD_IDX, :] = -10000.  # no transition to PAD except from END
        self.transition.data[PAD_IDX, STOP_TAG_IDX] = 0.
        self.transition.data[PAD_IDX, PAD_IDX] = 0.
def env_step(this_action): # returns (floating point, NumPy array, Boolean), this_action: NumPy array
    global local_observation, this_reward_observation, arms#, nStatesSimpleEnv
    episode_over = False

    atp1 = this_action[0] # how to extact action
    stp1 = randInRange(nStatesSimpleEnv) # state transitions are uniform random
    the_reward = randn(0.0, 1.0) + arms[int(atp1)] # rewards drawn from (0, 1) Gaussian
    #if rand_un() < 0.05:
    #    episode_over = True # termination is random

    local_observation[0] = stp1
    this_reward_observation = (the_reward, this_reward_observation[1], episode_over)

    return this_reward_observation
Beispiel #7
0
    def __init__(self, num_tags):
        super().__init__()
        self.num_tags = num_tags

        # matrix of transition scores from j to i
        self.trans = nn.Parameter(randn(num_tags, num_tags))
        self.trans.data[SOS_IDX, :] = -10000.  # no transition to SOS
        self.trans.data[:,
                        EOS_IDX] = -10000.  # no transition from EOS except to PAD
        self.trans.data[:,
                        PAD_IDX] = -10000.  # no transition from PAD except to PAD
        self.trans.data[
            PAD_IDX, :] = -10000.  # no transition to PAD except from EOS
        self.trans.data[PAD_IDX, EOS_IDX] = 0.
        self.trans.data[PAD_IDX, PAD_IDX] = 0.
Beispiel #8
0
def env_step(
    this_action
):  # returns (floating point, NumPy array, Boolean), this_action: NumPy array
    global local_observation, this_reward_observation  #, nStatesSimpleEnv
    episode_over = False

    #Get a reward from the current action reward distribution
    atp1 = int(this_action[0])  # how to extact action
    the_reward = randn(bandit_action_values[atp1],
                       1.0)  # rewards drawn from (q*, 1) Gaussian

    stp1 = randInRange(
        nStatesSimpleEnv)  # state transitions are uniform random
    #########

    local_observation[0] = stp1
    this_reward_observation = (the_reward, this_reward_observation[1],
                               episode_over)

    return this_reward_observation
Beispiel #9
0
 def random(self, n=1):
     mu, sigma = self.params["mu"], self.params["sigma"]
     return mu + randn(n) * sigma
Beispiel #10
0
 def random(self, n=1):
     mu, sigma = self.params["mu"], self.params["sigma"]
     n_gaussian = np.random.randint(len(mu), size=n)
     mu0, sigma0 = mu[n_gaussian], sigma[n_gaussian]
     return mu0 + randn(n) * sigma0
 def random(self, n=1):
     mu, sigma = self.params["mu"], self.params["sigma"]
     return mu + randn(n) * sigma
 def random(self, n=1):
     mu, sigma = self.params["mu"], self.params["sigma"]
     n_gaussian = np.random.randint(len(mu), size=n)
     mu0, sigma0 = mu[n_gaussian], sigma[n_gaussian]
     return mu0 + randn(n) * sigma0
Beispiel #13
0
 def random(self, n=1):
     mu, sigma = self.params["mu"], self.params["sigma"]
     n_gaussian = np.random.randint(2, size=n)
     return (1 - 2 * n_gaussian) * mu + randn(n) * sigma