コード例 #1
0
ファイル: main.py プロジェクト: LiamHz/tiny-autodiff
def main():
  lr = 0.001
  input_size = 50
  output_size = 10
  n_iterations = 100

  # Both input and "ground truth" are random vectors
  x = np.random.random(input_size)
  y = np.random.random(output_size)

  # Randomly initialize neural network weights
  #weights = to_value(np.random.random((input_size, output_size)))
  nn = MLP(input_size, output_size, [5, 10, 20])
  print(nn.layers[0])

  losses = []
  for i in tqdm(range(100)):
    y_pred = nn(x)
    loss = np.sum((y - y_pred) * (y - y_pred))
    losses.append(loss.data)

    loss.backward()
    for p in nn.parameters():
      p.data -= lr * p.grad

    nn.zero_grad()

  plt.plot(losses)
  plt.ylabel('Loss')
  plt.xlabel('Iteration')
  plt.title('Multilayer perceptron fitting random noise')
  plt.show()
コード例 #2
0
class Policy(object):
    def __init__(self, input_dim, n_actions, gamma=0.9):
        self.input_dim = input_dim
        self.n_actions = n_actions
        self.gamma = gamma

        self.model = MLP(input_dim, [32, 32], n_actions)
        self.optim = optim.Adam(self.model.parameters(), lr=1e-2)

        self.action_reward = []

    def get_action(self, observation, stochastic=True):
        pred = self.model(observation)

        if stochastic:
            return pred.multinomial()
        return pred[0].argmax()

    def update(self):
        R = 0
        rewards = []
        for action, reward in self.action_reward:
            R = reward + self.gamma * R
            rewards.insert(0, R)

        rewards = T.Tensor(rewards)
        rewards = (rewards - rewards.mean()) / (rewards.std() +
                                                np.finfo(np.float32).eps)

        actions = []
        for (action, _), reward in zip(self.action_reward, rewards):
            action.reinforce(reward)
            actions.append(action)

        self.optim.zero_grad()
        T.autograd.backward(actions, [None for _ in actions])
        self.optim.step()

        self.action_reward = []

    def record(self, action, reward):
        self.action_reward.append((action, reward))