Esempio n. 1
0
    def prepare_train_data_method(self,
                                  window_data: List = [],
                                  word2index: dict = {},
                                  weighting_dic: dict = {},
                                  X_ik: dict = {}):
        u_p = []
        v_p = []
        co_p = []
        weight_p = []
        # Reference
        # view
        #    http://pytorch.org/docs/master/tensors.html#torch.Tensor.view
        for pair in window_data:
            u_p.append(prepare_word(pair[0], word2index).view(1, -1))
            v_p.append(prepare_word(pair[1], word2index).view(1, -1))
            try:
                cooc = X_ik[pair]
            except:
                cooc = 1

            co_p.append(torch.log(Variable(FloatTensor([cooc]))).view(1, -1))
            weight_p.append(
                Variable(FloatTensor([weighting_dic[pair]])).view(1, -1))

        train_data = list(zip(u_p, v_p, co_p, weight_p))
        return train_data
Esempio n. 2
0
def GAE(reward, value, mask, gamma, lam):
    adv = FloatTensor(reward.shape)
    delta = FloatTensor(reward.shape)

    # pre_value, pre_adv = 0, 0
    pre_value = torch.zeros(reward.shape[1:], device=device)
    pre_adv = torch.zeros(reward.shape[1:], device=device)
    for i in reversed(range(reward.shape[0])):
        delta[i] = reward[i] + gamma * pre_value * mask[i] - value[i]
        adv[i] = delta[i] + gamma * lam * pre_adv * mask[i]
        pre_adv = adv[i, ...]
        pre_value = value[i, ...]
    returns = value + adv
    adv = (adv - adv.mean()) / adv.std()
    return adv, returns
Esempio n. 3
0
 def get_net_log_prob(self, net_input_state, net_input_discrete_action,
                      net_input_continuous_action):
     net = getattr(self, net_name)
     n_action_dim = getattr(self, 'n_' + action_name)
     discrete_action_dim = getattr(self, 'discrete_' + action_name + '_dim')
     sections = getattr(self, 'discrete_' + action_name + '_sections')
     continuous_action_log_std = getattr(
         self, net_name + '_' + action_name + '_std')
     discrete_action_probs_with_continuous_mean = net(net_input_state)
     discrete_actions_log_prob = 0
     continuous_actions_log_prob = 0
     if discrete_action_dim != 0:
         dist = MultiOneHotCategorical(
             discrete_action_probs_with_continuous_mean[
                 ..., :discrete_action_dim], sections)
         discrete_actions_log_prob = dist.log_prob(
             net_input_discrete_action)
     if n_action_dim - discrete_action_dim != 0:
         continuous_actions_mean = discrete_action_probs_with_continuous_mean[
             ..., discrete_action_dim:]
         continuous_log_std = continuous_action_log_std.expand_as(
             continuous_actions_mean)
         continuous_actions_std = torch.exp(continuous_log_std)
         continuous_dist = MultivariateNormal(
             continuous_actions_mean,
             torch.diag_embed(continuous_actions_std))
         continuous_actions_log_prob = continuous_dist.log_prob(
             net_input_continuous_action)
     return FloatTensor(discrete_actions_log_prob +
                        continuous_actions_log_prob).unsqueeze(-1)
Esempio n. 4
0
    def get_net_action(self, state, size=1):
        net = getattr(self, net_name)
        n_action_dim = getattr(self, 'n_' + action_name)
        discrete_action_dim = getattr(self, 'discrete_' + action_name + '_dim')
        sections = getattr(self, 'discrete_' + action_name + '_sections')
        continuous_action_log_std = getattr(
            self, net_name + '_' + action_name + '_std')
        discrete_action_probs_with_continuous_mean = net(state)
        discrete_actions = torch.empty((size, 0), device=self.device)
        continuous_actions = torch.empty((size, 0), device=self.device)
        discrete_actions_log_prob = 0
        continuous_actions_log_prob = 0
        if discrete_action_dim != 0:
            dist = MultiOneHotCategorical(
                discrete_action_probs_with_continuous_mean[
                    ..., :discrete_action_dim], sections)
            discrete_actions = dist.sample()
            discrete_actions_log_prob = dist.log_prob(discrete_actions)
        if n_action_dim - discrete_action_dim != 0:
            continuous_actions_mean = discrete_action_probs_with_continuous_mean[
                ..., discrete_action_dim:]
            continuous_log_std = continuous_action_log_std.expand_as(
                continuous_actions_mean)
            continuous_actions_std = torch.exp(continuous_log_std)
            continuous_dist = MultivariateNormal(
                continuous_actions_mean,
                torch.diag_embed(continuous_actions_std))
            continuous_actions = continuous_dist.sample()
            continuous_actions_log_prob = continuous_dist.log_prob(
                continuous_actions)

        return discrete_actions, continuous_actions, FloatTensor(
            discrete_actions_log_prob +
            continuous_actions_log_prob).unsqueeze(-1)
Esempio n. 5
0
    def get_policy_net_log_prob(self, net_input_state,
                                net_input_discrete_action,
                                net_input_continuous_action):
        net = self.policy
        n_action_dim = args.n_continuous_action + args.n_discrete_action
        discrete_action_dim = args.n_discrete_action
        sections = discrete_action_sections
        continuous_action_log_std = self.policy_net_action_std

        discrete_action_probs_with_continuous_mean = net(net_input_state)
        discrete_actions_log_prob = 0
        continuous_actions_log_prob = 0
        if discrete_action_dim != 0:
            dist = MultiOneHotCategorical(
                discrete_action_probs_with_continuous_mean[
                    ..., :discrete_action_dim], sections)
            discrete_actions_log_prob = dist.log_prob(
                net_input_discrete_action)
        if n_action_dim - discrete_action_dim != 0:
            continuous_actions_mean = discrete_action_probs_with_continuous_mean[
                ..., discrete_action_dim:]
            continuous_log_std = continuous_action_log_std.expand_as(
                continuous_actions_mean)
            continuous_actions_std = torch.exp(continuous_log_std)
            continuous_dist = MultivariateNormal(
                continuous_actions_mean,
                torch.diag_embed(continuous_actions_std))
            continuous_actions_log_prob = continuous_dist.log_prob(
                net_input_continuous_action)
        return FloatTensor(discrete_actions_log_prob +
                           continuous_actions_log_prob).unsqueeze(-1)
Esempio n. 6
0
    def get_action(self, state, num_trajs=1):
        net = self.policy
        n_action_dim = args.n_continuous_action + args.n_discrete_action
        discrete_action_dim = args.n_discrete_action
        sections = discrete_action_sections
        continuous_action_log_std = self.policy_net_action_std

        discrete_action_probs_with_continuous_mean = self.policy(state)
        discrete_actions = torch.empty((num_trajs, 0), device=device)
        continuous_actions = torch.empty((num_trajs, 0), device=device)
        discrete_actions_log_prob = 0
        continuous_actions_log_prob = 0
        if discrete_action_dim != 0:
            dist = MultiOneHotCategorical(
                discrete_action_probs_with_continuous_mean[
                    ..., :discrete_action_dim], sections)
            discrete_actions = dist.sample()
            discrete_actions_log_prob = dist.log_prob(discrete_actions)
        if n_action_dim - discrete_action_dim != 0:
            continuous_actions_mean = discrete_action_probs_with_continuous_mean[
                ..., discrete_action_dim:]
            continuous_log_std = continuous_action_log_std.expand_as(
                continuous_actions_mean)
            continuous_actions_std = torch.exp(continuous_log_std)
            continuous_dist = MultivariateNormal(
                continuous_actions_mean,
                torch.diag_embed(continuous_actions_std))
            continuous_actions = continuous_dist.sample()
            continuous_actions_log_prob = continuous_dist.log_prob(
                continuous_actions)

        return discrete_actions, continuous_actions, FloatTensor(
            discrete_actions_log_prob +
            continuous_actions_log_prob).unsqueeze(-1)
def get_probs(logging_policy, x, clip_threshold, tao):
    # Forward pass on logging policy
    logging_output = logging_policy(Variable(FloatTensor([normalize(x)
                                                          ]))).data[0]

    # Do a temperature softmax
    logging_output = logging_output * tao
    pi_o = F.softmax(logging_output, dim=0)
    pi_o, num_z = clip_and_renorm(pi_o, clip_threshold)

    return pi_o, num_z
Esempio n. 8
0
def sample_image(args, generator, n_row, batches_done):
    """Saves a grid of generated digits ranging from 0 to n_classes"""
    # Sample noise
    z = Variable(
        FloatTensor(np.random.normal(0, 1, (n_row**2, args.latent_dim))))
    # Get labels for the n rows
    labels = np.array([num for _ in range(n_row) for num in range(n_row)])
    labels = Variable(LongTensor(labels))
    gen_imgs = generator(z, labels)
    save_image(gen_imgs.data,
               "images/%d.png" % batches_done,
               nrow=n_row,
               normalize=True)
Esempio n. 9
0
    def get_net_action(self, state, num_trajs=1):
        net = getattr(self, net_name)
        n_action_dim = getattr(self, 'n_' + action_name)
        onehot_action_dim = getattr(self, 'onehot_' + action_name + '_dim')
        multihot_action_dim = getattr(self, 'multihot_' + action_name + '_dim')
        sections = getattr(self, 'onehot_' + action_name + '_sections')
        continuous_action_log_std = getattr(
            self, net_name + '_' + action_name + '_std')
        onehot_action_probs_with_continuous_mean = net(state)

        onehot_actions = torch.empty((num_trajs, 0), device=self.device)
        multihot_actions = torch.empty((num_trajs, 0), device=self.device)
        continuous_actions = torch.empty((num_trajs, 0), device=self.device)
        onehot_actions_log_prob = 0
        multihot_actions_log_prob = 0
        continuous_actions_log_prob = 0
        if onehot_action_dim != 0:
            dist = MultiOneHotCategorical(
                onehot_action_probs_with_continuous_mean[
                    ..., :onehot_action_dim], sections)
            onehot_actions = dist.sample()
            onehot_actions_log_prob = dist.log_prob(onehot_actions)
        if multihot_action_dim != 0:
            multihot_actions_prob = torch.sigmoid(
                onehot_action_probs_with_continuous_mean[
                    ...,
                    onehot_action_dim:onehot_action_dim + multihot_action_dim])
            dist = torch.distributions.bernoulli.Bernoulli(
                probs=multihot_actions_prob)
            multihot_actions = dist.sample()
            multihot_actions_log_prob = dist.log_prob(multihot_actions).sum(
                dim=1)
        if n_action_dim - onehot_action_dim - multihot_action_dim != 0:
            continuous_actions_mean = onehot_action_probs_with_continuous_mean[
                ..., onehot_action_dim + multihot_action_dim:]
            continuous_log_std = continuous_action_log_std.expand_as(
                continuous_actions_mean)
            continuous_actions_std = torch.exp(continuous_log_std)
            continuous_dist = MultivariateNormal(
                continuous_actions_mean,
                torch.diag_embed(continuous_actions_std))
            continuous_actions = continuous_dist.sample()
            continuous_actions_log_prob = continuous_dist.log_prob(
                continuous_actions)

        return onehot_actions, multihot_actions, continuous_actions, FloatTensor(
            onehot_actions_log_prob + multihot_actions_log_prob +
            continuous_actions_log_prob).unsqueeze(-1)
Esempio n. 10
0
 def get_net_log_prob(self, net_input_state, net_input_onehot_action,
                      net_input_multihot_action,
                      net_input_continuous_action):
     net = getattr(self, net_name)
     n_action_dim = getattr(self, 'n_' + action_name)
     onehot_action_dim = getattr(self, 'onehot_' + action_name + '_dim')
     multihot_action_dim = getattr(self, 'multihot_' + action_name + '_dim')
     sections = getattr(self, 'onehot_' + action_name + '_sections')
     continuous_action_log_std = getattr(
         self, net_name + '_' + action_name + '_std')
     onehot_action_probs_with_continuous_mean = net(net_input_state)
     onehot_actions_log_prob = 0
     multihot_actions_log_prob = 0
     continuous_actions_log_prob = 0
     if onehot_action_dim != 0:
         dist = MultiOneHotCategorical(
             onehot_action_probs_with_continuous_mean[
                 ..., :onehot_action_dim], sections)
         onehot_actions_log_prob = dist.log_prob(net_input_onehot_action)
     if multihot_action_dim != 0:
         multihot_actions_prob = torch.sigmoid(
             onehot_action_probs_with_continuous_mean[
                 ...,
                 onehot_action_dim:onehot_action_dim + multihot_action_dim])
         dist = torch.distributions.bernoulli.Bernoulli(
             probs=multihot_actions_prob)
         multihot_actions_log_prob = dist.log_prob(
             net_input_multihot_action).sum(dim=1)
     if n_action_dim - onehot_action_dim - multihot_action_dim != 0:
         continuous_actions_mean = onehot_action_probs_with_continuous_mean[
             ..., onehot_action_dim + multihot_action_dim:]
         continuous_log_std = continuous_action_log_std.expand_as(
             continuous_actions_mean)
         continuous_actions_std = torch.exp(continuous_log_std)
         continuous_dist = MultivariateNormal(
             continuous_actions_mean,
             torch.diag_embed(continuous_actions_std))
         continuous_actions_log_prob = continuous_dist.log_prob(
             net_input_continuous_action)
     return FloatTensor(onehot_actions_log_prob +
                        multihot_actions_log_prob +
                        continuous_actions_log_prob).unsqueeze(-1)
Esempio n. 11
0
def GAE(reward, value, mask, gamma, lam):
    # adv = FloatTensor(reward.shape, device=device)
    # delta = FloatTensor(reward.shape, device=device)

    # # pre_value, pre_adv = 0, 0
    # pre_value = torch.zeros(reward.shape[1:], device=device)
    # pre_adv = torch.zeros(reward.shape[1:], device=device)
    # for i in reversed(range(reward.shape[0])):
    #     delta[i] = reward[i] + gamma * pre_value * mask[i] - value[i]
    #     adv[i] = delta[i] + gamma * lam * pre_adv * mask[i]
    #     pre_adv = adv[i, ...]
    #     pre_value = value[i, ...]
    # returns = value + adv
    # adv = (adv - adv.mean()) / adv.std()

    reward = reward.reshape(-1, args.sample_traj_length, 1)
    value = value.reshape(-1, args.sample_traj_length, 1)
    mask = mask.reshape(-1, args.sample_traj_length, 1)

    adv = FloatTensor(reward.shape, device=device)
    delta = FloatTensor(reward.shape, device=device)

    # pre_value, pre_adv = 0, 0
    pre_value = torch.zeros((reward.shape[0], 1), device=device)
    pre_adv = torch.zeros((reward.shape[0], 1), device=device)
    for i in reversed(range(reward.shape[1])):
        delta[:,
              i] = reward[:, i] + gamma * pre_value * mask[:, i] - value[:, i]
        adv[:, i] = delta[:, i] + gamma * lam * pre_adv * mask[:, i]
        pre_adv = adv[:, i, ...]
        pre_value = value[:, i, ...]
    returns = value + adv
    adv = (adv - adv.mean()) / adv.std()
    returns = returns.reshape(-1, 1)
    adv = adv.reshape(-1, 1)

    return adv, returns
Esempio n. 12
0
def train(generator, discriminator, dataloader, args, cuda, adversarial_loss,
          auxiliary_loss):

    optimizer_G = torch.optim.Adam(generator.parameters(),
                                   lr=args.lr,
                                   betas=(args.b1, args.b2))
    optimizer_D = torch.optim.Adam(discriminator.parameters(),
                                   lr=args.lr,
                                   betas=(args.b1, args.b2))

    for epoch in range(args.n_epochs):
        for i, (imgs, labels) in enumerate(dataloader):

            batch_size = imgs.shape[0]

            # Adversarial ground truths
            valid = Variable(FloatTensor(batch_size, 1).fill_(1.0),
                             requires_grad=False)
            fake = Variable(FloatTensor(batch_size, 1).fill_(0.0),
                            requires_grad=False)

            # Configure input
            real_imgs = Variable(imgs.type(FloatTensor))
            labels = Variable(labels.type(LongTensor))

            # -----------------
            #  Train Generator
            # -----------------

            optimizer_G.zero_grad()

            # Sample noise as generator input
            z = Variable(
                FloatTensor(
                    np.random.normal(0, 1, (batch_size, args.latent_dim))))
            gen_labels = Variable(
                LongTensor(np.random.randint(0, args.n_classes, batch_size)))

            # Generate a batch of images
            gen_imgs = generator(z, gen_labels)

            # Loss measures generator's ability to fool the discriminator
            validity, pred_label = discriminator(gen_imgs)
            g_loss = 0.5 * adversarial_loss(validity, valid) + auxiliary_loss(
                pred_label, gen_labels)

            g_loss.backward()
            optimizer_G.step()

            # ---------------------
            #  Train Discriminator
            # ---------------------

            optimizer_D.zero_grad()

            # Loss for real images
            real_pred, real_aux = discriminator(real_imgs)
            d_real_loss = (adversarial_loss(real_pred, valid) +
                           auxiliary_loss(real_aux, labels)) / 2

            # Loss for fake images
            fake_pred, fake_aux = discriminator(gen_imgs.detach())
            d_fake_loss = (adversarial_loss(fake_pred, fake) +
                           auxiliary_loss(fake_aux, gen_labels)) / 2

            # Measure discriminator's ability to classify real from generated samples
            d_loss = (d_real_loss + d_fake_loss) / 2

            # Calculate discriminator accuracy
            pred = np.concatenate(
                [real_aux.data.cpu().numpy(),
                 fake_aux.data.cpu().numpy()],
                axis=0)
            gt = np.concatenate(
                [labels.data.cpu().numpy(),
                 gen_labels.data.cpu().numpy()],
                axis=0)
            d_acc = np.mean(np.argmax(pred, axis=1) == gt)

            d_loss.backward()
            optimizer_D.step()

            print("[Epoch %d/%d] [Batch %d/%d] [D loss: %f] [G loss: %f]" %
                  (epoch, args.n_epochs, i, len(dataloader), d_loss.item(),
                   g_loss.item()))

            batches_done = epoch * len(dataloader) + i
            if batches_done % args.sample_interval == 0:
                sample_image(args,
                             generator,
                             n_row=10,
                             batches_done=batches_done)
Esempio n. 13
0
def train(game,
          num_steps=60000000,
          lr=0.00025,
          gamma=0.99,
          C=20000,
          batch_size=32):

    env = wrappers.wrap(gym.make(GAMES[game]))
    num_actions = env.action_space.n

    Q1 = QNetwork(num_actions)
    Q2 = QNetwork(num_actions)
    Q2.load_state_dict(Q1.state_dict())

    if torch.cuda.is_available():
        Q1.cuda()
        Q2.cuda()

    epsilon = Epsilon(1, 0.05, 1000000)
    optimizer = torch.optim.Adam(Q1.parameters(), lr=lr)
    optimizer.zero_grad()

    state1 = env.reset()

    t, last_t, loss, episode, score = 0, 0, 0, 0, 0
    last_ts, scores = datetime.now(), collections.deque(maxlen=100)

    while t < num_steps:

        qvalues = Q1(state1)
        if random() < epsilon(t):
            action = env.action_space.sample()
        else:
            action = qvalues.data.max(dim=1)[1][0]

        q = qvalues[0][action]

        state2, reward, done, _info = env.step(action)
        score += reward

        if not done:
            y = gamma * Q2(state2).detach().max(dim=1)[0][0] + reward
            state1 = state2
        else:
            reward = FloatTensor([reward])
            y = torch.autograd.Variable(reward, requires_grad=False)
            state1 = env.reset()
            scores.append(score)
            score = 0
            episode += 1

        loss += torch.nn.functional.smooth_l1_loss(q, y)

        t += 1

        if done or t % batch_size == 0:
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
            loss = 0

        if t % C == 0:
            Q2.load_state_dict(Q1.state_dict())
            torch.save(Q1.state_dict(), 'qlearning_{}.pt'.format(game))

        if t % 1000 == 0:
            ts = datetime.now()
            datestr = ts.strftime('%Y-%m-%dT%H:%M:%S.%f')
            avg = mean(scores) if scores else float('nan')
            steps_per_sec = (t - last_t) / (ts - last_ts).total_seconds()
            l = '{} step {} episode {} avg last 100 scores: {:.2f} ε: {:.2f}, steps/s: {:.0f}'
            print(l.format(datestr, t, episode, avg, epsilon(t),
                           steps_per_sec))
            last_t, last_ts = t, ts
Esempio n. 14
0
 def observation(self, observation):
     x = FloatTensor(np.swapaxes(observation, 2, 0))
     return torch.autograd.Variable(x, requires_grad=False).unsqueeze(0)