def __init__(self,
                 indices_by_class,
                 batch_size,
                 pc_noclassif=0.5,
                 nb_indices_same_class=2):
        self.indices_by_class = copy.copy(indices_by_class)
        self.indices_no_class = self.indices_by_class.pop(0)
        self.batch_size = batch_size
        self.pc_noclassif = pc_noclassif
        self.nb_indices_same_class = nb_indices_same_class

        self.batch_size_classif = round(
            (1 - self.pc_noclassif) * self.batch_size)
        self.batch_size_noclassif = self.batch_size - self.batch_size_classif

        # Batch Sampler NoClassif
        self.batch_sampler_noclassif = BatchSampler(
            RandomSamplerValues(self.indices_no_class),
            self.batch_size_noclassif, True)

        # Batch Sampler Classif
        self.batch_sampler_classif = BatchSamplerClassif(
            RandomSamplerValues(self.indices_by_class),
            self.batch_size_classif, self.nb_indices_same_class)
    def __init__(self, indices_by_class, batch_size, nb_indices_same_class):
        if batch_size % nb_indices_same_class != 0:
            raise ValueError(
                'batch_size of BatchSamplerClassif ({}) must be divisible by nb_indices_same_class ({})'
                .format(batch_size, nb_indices_same_class))

        self.indices_by_class = indices_by_class
        self.batch_size = batch_size
        self.nb_indices_same_class = nb_indices_same_class

        self.batch_sampler_by_class = []
        for indices in indices_by_class:
            self.batch_sampler_by_class.append(
                BatchSampler(RandomSamplerValues(indices),
                             self.nb_indices_same_class, True))
Beispiel #3
0
    def batch_generator(self, advantages, mini_batch_size):

        
        sampler = BatchSampler(SubsetRandomSampler(range(self.num_samples)), mini_batch_size, drop_last=True)
        for indices in sampler:
            obs_batch = self.obs[:-1].view(-1, *self.obs.size()[2:])[indices]
            actions_batch = self.actions.view(-1, self.actions.size(-1))[indices]
            value_preds_batch = self.value_preds[:-1].view(-1, 1)[indices]
            return_batch = self.returns[:-1].view(-1, 1)[indices]
            masks_batch = self.masks[:-1].view(-1, 1)[indices]
            old_action_log_probs_batch = self.action_log_probs.view(-1,1)[indices]
            adv_targ = advantages.view(-1, 1)[indices]

            yield obs_batch, actions_batch, value_preds_batch, return_batch,\
                                 masks_batch, old_action_log_probs_batch, adv_targ
Beispiel #4
0
 def __iter__(self):
     for page_indices in self.page_sampler:
         if self.key is not None:
             in_page_sampler = SortedSampler(
                 page_indices, key=lambda i: self.key(self.data_source[i])
             )
         else:
             in_page_sampler = SequentialSampler(page_indices)
         batch_sampler = BatchSampler(
             in_page_sampler, self.batch_size, self.drop_last
         )
         batches = list(batch_sampler)
         random.shuffle(batches)
         for batch_indices in batches:
             yield [page_indices[i] for i in batch_indices]
Beispiel #5
0
    def feed_forward_generator(
        self,
        advantages: Optional[torch.Tensor],
        num_mini_batch: Optional[int] = None,
        mini_batch_size: Optional[int] = None,
    ) -> Generator[Tuple[torch.Tensor, ...], None, None]:
        num_steps, num_processes = self.rewards.size()[0:2]
        batch_size = num_processes * num_steps

        if mini_batch_size is None:
            assert batch_size >= num_mini_batch, (
                "PPO requires the number of processes ({}) "
                "* number of steps ({}) = {} "
                "to be greater than or equal to the number of PPO mini batches ({})."
                "".format(
                    num_processes, num_steps, num_processes * num_steps, num_mini_batch
                )
            )
            mini_batch_size = batch_size // num_mini_batch
        sampler = BatchSampler(
            SubsetRandomSampler(range(batch_size)), mini_batch_size, drop_last=True
        )
        for indices in sampler:
            obs_batch = self.obs[:-1].view(-1, *self.obs.size()[2:])[indices]
            recurrent_hidden_states_batch = self.recurrent_hidden_states[:-1].view(
                -1, self.recurrent_hidden_states.size(-1)
            )[indices]
            actions_batch = self.actions.view(-1, self.actions.size(-1))[indices]
            value_preds_batch = self.value_preds[:-1].view(-1, 1)[indices]
            return_batch = self.returns[:-1].view(-1, 1)[indices]
            masks_batch = self.masks[:-1].view(-1, 1)[indices]
            old_action_log_probs_batch = self.action_log_probs.view(-1, 1)[indices]
            if advantages is None:
                adv_targ = None
            else:
                adv_targ = advantages.view(-1, 1)[indices]

            batch = (
                obs_batch,
                recurrent_hidden_states_batch,
                actions_batch,
                value_preds_batch,
                return_batch,
                masks_batch,
                old_action_log_probs_batch,
                adv_targ,
            )
            yield batch
Beispiel #6
0
    def trainmodel(self):
        s = torch.tensor(self.memory.buffer['s'],
                         dtype=torch.double).to(device)
        a = torch.tensor(self.memory.buffer['a'],
                         dtype=torch.double).to(device)
        r = torch.tensor(self.memory.buffer['r'],
                         dtype=torch.double).to(device).view(-1, 1)
        s_ = torch.tensor(self.memory.buffer['s_'],
                          dtype=torch.double).to(device)
        r = (r - r.mean()) / (r.std() + 1e-5)
        old_a_logp = torch.tensor(self.memory.buffer['a_logp'],
                                  dtype=torch.double).to(device).view(-1, 1)

        with torch.no_grad():
            target_v = r + self.gamma * self.net(s_)[1]
            adv = target_v - self.net(s)[1]

        for _ in range(self.PPOepoch):
            for index in BatchSampler(
                    SubsetRandomSampler(range(self.memory.buffer_capacity)),
                    self.memory.batch_size, False):

                alpha, beta = self.net(s[index])[0]
                dist = Beta(alpha, beta)
                a_logp = dist.log_prob(a[index]).sum(dim=1)
                ratio = torch.exp(a_logp - old_a_logp[index])
                with torch.no_grad():
                    entrop = dist.entropy()

                surr1 = ratio * adv[index]
                surr2 = torch.clamp(ratio, 1.0 - self.clip_param,
                                    1.0 + self.clip_param) * adv[index]
                action_loss = -torch.min(surr1, surr2).mean()
                value_loss = F.smooth_l1_loss(
                    self.net(s[index])[1], target_v[index])
                self.storeloss(action_loss, value_loss)
                action_loss = torch.clamp(action_loss, 0, 10)
                value_loss = torch.clamp(value_loss, 0, 10)
                loss = action_loss + 2. * value_loss - args.bound * entrop.mean(
                )

                self.optimizer.zero_grad()
                loss.backward()
                nn.utils.clip_grad_norm_(self.net.parameters(),
                                         self.max_grad_norm)
                self.optimizer.step()

        torch.save(self.net.state_dict(), self.path_t7)
Beispiel #7
0
    def update(self):
        if self.memory_count >= self.capacity:

            # convert inputs to torch tensors.
            state = torch.Tensor([t.old_state for t in self.memory]).float()
            action = torch.LongTensor([t.action
                                       for t in self.memory]).view(-1,
                                                                   1).long()
            reward = torch.Tensor([t.reward for t in self.memory]).float()
            next_state = torch.Tensor([t.new_state
                                       for t in self.memory]).float()

            # move to device.
            state = state.to(self.device)
            action = action.to(self.device)
            reward = reward.to(self.device)
            next_state = next_state.to(self.device)

            # normalize rewards.
            reward = (reward - reward.mean()) / (reward.std() + 1e-7)

            # update Q value
            with torch.no_grad():
                target_v = reward + self.gamma * self.target_net(
                    next_state).max(1)[0]

            batch_loss = 0

            # sample from replay buffer, update actor network.
            for index in BatchSampler(SubsetRandomSampler(
                    range(len(self.memory))),
                                      batch_size=self.batch_size,
                                      drop_last=False):
                v = (self.act_net(state).gather(1, action))[index]
                loss = self.loss_func(target_v[index].unsqueeze(1),
                                      (self.act_net(state).gather(
                                          1, action))[index])
                self.optimizer.zero_grad()
                loss.backward()
                self.optimizer.step()
                batch_loss += loss.item()

                # update target Q network when sufficient iterations have passed.
                self.update_count += 1
                self.update_target_network_weights()

            self.losses.append(batch_loss / self.batch_size)
            self.rewards.append(reward.mean().item())
Beispiel #8
0
    def __init__(
        self,
        dataset,
        batch_size=1,
        shuffle=False,
        sampler=None,
        batch_sampler=None,
        pad_idx=0,
        num_workers=None,
        pin_memory=False,
        drop_last=False,
        pre_pad=True,
        half=False,
        transpose=False,
        transpose_y=False,
    ):
        self.dataset, self.batch_size, self.num_workers = (
            dataset,
            batch_size,
            num_workers,
        )
        self.pin_memory, self.drop_last, self.pre_pad = pin_memory, drop_last, pre_pad
        self.transpose, self.transpose_y, self.pad_idx, self.half = (
            transpose,
            transpose_y,
            pad_idx,
            half,
        )

        if batch_sampler is not None:
            if batch_size > 1 or shuffle or sampler is not None or drop_last:
                raise ValueError("batch_sampler is mutually exclusive with "
                                 "batch_size, shuffle, sampler, and drop_last")

        if sampler is not None and shuffle:
            raise ValueError("sampler is mutually exclusive with shuffle")

        if batch_sampler is None:
            if sampler is None:
                sampler = (RandomSampler(dataset)
                           if shuffle else SequentialSampler(dataset))
            batch_sampler = BatchSampler(sampler, batch_size, drop_last)

        if num_workers is None:
            self.num_workers = num_cpus()

        self.sampler = sampler
        self.batch_sampler = batch_sampler
Beispiel #9
0
def magent_feed_forward_generator(rollouts_list, advantages_list,
                                  num_mini_batch):
    num_steps, num_processes = rollouts_list[0].rewards.size()[0:2]
    batch_size = num_processes * num_steps

    mini_batch_size = int(
        (batch_size / num_mini_batch))  # size of minibatch for each agent

    sampler = BatchSampler(SubsetRandomSampler(range(batch_size)),
                           mini_batch_size,
                           drop_last=False)
    for indices in sampler:
        obs_batch = torch.cat([
            rollout.obs[:-1].view(-1,
                                  *rollout.obs.size()[2:])[indices]
            for rollout in rollouts_list
        ], 0)
        recurrent_hidden_states_batch = torch.cat([
            rollout.recurrent_hidden_states[:-1].view(
                -1, rollout.recurrent_hidden_states.size(-1))[indices]
            for rollout in rollouts_list
        ], 0)
        actions_batch = torch.cat([
            rollout.actions.view(-1, rollout.actions.size(-1))[indices]
            for rollout in rollouts_list
        ], 0)
        value_preds_batch = torch.cat([
            rollout.value_preds[:-1].view(-1, 1)[indices]
            for rollout in rollouts_list
        ], 0)
        return_batch = torch.cat([
            rollout.returns[:-1].view(-1, 1)[indices]
            for rollout in rollouts_list
        ], 0)
        masks_batch = torch.cat([
            rollout.masks[:-1].view(-1, 1)[indices]
            for rollout in rollouts_list
        ], 0)
        old_action_log_probs_batch = torch.cat([
            rollout.action_log_probs.view(-1, 1)[indices]
            for rollout in rollouts_list
        ], 0)
        adv_targ = torch.cat([
            advantages.view(-1, 1)[indices] for advantages in advantages_list
        ], 0)

        yield obs_batch, recurrent_hidden_states_batch, actions_batch, value_preds_batch, return_batch,\
              masks_batch, old_action_log_probs_batch, adv_targ
    def feed_forward_generator(self,
                               fetch_normalized,
                               advantages,
                               num_mini_batch=None,
                               mini_batch_size=None):
        num_steps, num_processes = self.rewards.size()[0:2]
        batch_size = num_processes * num_steps

        if mini_batch_size is None:
            assert batch_size >= num_mini_batch, (
                "PPO requires the number of processes ({}) "
                "* number of steps ({}) = {} "
                "to be greater than or equal to the number of PPO mini batches ({})."
                "".format(num_processes, num_steps, num_processes * num_steps,
                          num_mini_batch))
            mini_batch_size = batch_size // num_mini_batch
        sampler = BatchSampler(SubsetRandomSampler(range(batch_size)),
                               mini_batch_size,
                               drop_last=True)
        for indices in sampler:

            if fetch_normalized:
                obs_batch = self.normalized_obs[:-1].view(
                    -1,
                    *self.normalized_obs.size()[2:])[indices]
            else:
                obs_batch = self.raw_obs[:-1].view(
                    -1,
                    *self.raw_obs.size()[2:])[indices]

            recurrent_hidden_states_batch = self.recurrent_hidden_states[:-1].view(
                -1, self.recurrent_hidden_states.size(-1))[indices]
            actions_batch = self.actions.view(-1,
                                              self.actions.size(-1))[indices]
            pretanh_actions_batch = self.pretanh_actions.view(
                -1, self.pretanh_actions.size(-1))[indices]
            value_preds_batch = self.value_preds[:-1].view(-1, 1)[indices]
            return_batch = self.returns[:-1].view(-1, 1)[indices]
            masks_batch = self.masks[:-1].view(-1, 1)[indices]
            old_action_log_probs_batch = self.action_log_probs.view(-1,
                                                                    1)[indices]
            if advantages is None:
                adv_targ = None
            else:
                adv_targ = advantages.view(-1, 1)[indices]

            yield obs_batch, recurrent_hidden_states_batch, actions_batch, pretanh_actions_batch, \
                value_preds_batch, return_batch, masks_batch, old_action_log_probs_batch, adv_targ
Beispiel #11
0
    def update(self):
        print(self.memory_count)

        for index in BatchSampler(SubsetRandomSampler(range(len(self.memory))),
                                  batch_size=self.batch_size,
                                  drop_last=False):
            losses = torch.tensor(0).float()
            for i, j in enumerate(index):
                # state = self.target_net.analysis_state(mem.state)
                mem = self.memory[j]
                reward = torch.tensor(mem.reward).float()
                action = mem.action
                if action == -1:
                    target_v = reward
                else:
                    with torch.no_grad():
                        # Doule DQN
                        next_state = self.act_net.analysis_state(
                            mem.next_state)
                        action_candidate, Qsa_values = self.act_net(next_state)
                        action2 = action_candidate[Qsa_values.max(1)[1]]

                        next_state = self.target_net.analysis_state(
                            mem.next_state)
                        action_candidate2, Qsa_values2 = self.target_net(
                            next_state)
                        Qsa2 = Qsa_values2[0][action_candidate2.index(action2)]
                        target_v = reward + self.gamma * Qsa2

                        # Nature DQN
                        # target_v = reward + self.gamma * self.target_net(next_state)[1].max()
                state = self.act_net.analysis_state(mem.state)
                action_candidate, Qsa_values = self.act_net(state)
                Qsa = Qsa_values[0][action_candidate.index(action)]

                loss = self.loss_func(target_v, Qsa)
                losses += loss
            self.optimizer.zero_grad()
            losses.backward()
            self.optimizer.step()
            self.writer.add_scalar('loss/value_loss', losses / self.batch_size,
                                   self.update_count)
            self.update_count += 1
            if self.update_count % 1500 == 0:
                self.target_net.load_state_dict(self.act_net.state_dict())
                torch.save(
                    self.act_net.state_dict(), config.act_net_model_dir +
                    str(self.update_count) + ".model")
Beispiel #12
0
    def train(self):

        epochs = 10

        state = torch.FloatTensor([t.state
                                   for t in self.buffer]).to(self.device)
        action = torch.LongTensor([t.action for t in self.buffer
                                   ]).view(-1, 1).to(self.device)

        reward = [t.reward for t in self.buffer]
        old_action_log_prob = torch.FloatTensor(
            [t.a_log_prob for t in self.buffer]).view(-1, 1).to(self.device)

        R = 0
        Gt = []
        for r in reward[::-1]:
            R = r + 0.99 * R
            Gt.insert(0, R)
        Gt = torch.FloatTensor(Gt).to(self.device)

        for n in range(epochs):
            for index in BatchSampler(
                    SubsetRandomSampler(range(len(self.buffer))), 32, False):
                Gt_index = Gt[index].view(-1, 1)
                V = self.Critic(state[index])
                delta = Gt_index - V
                advantage = delta.detach()

                action_prob = self.Actor(state[index]).gather(1, action[index])

                ratio = (action_prob / old_action_log_prob[index])
                surr1 = ratio * advantage
                surr2 = torch.clamp(ratio, 1 - 0.2, 1 + 0.2) * advantage

                actor_loss = -torch.min(surr1, surr2).mean()

                self.actor_optimizer.zero_grad()
                actor_loss.backward()
                torch.nn.utils.clip_grad_norm_(self.Actor.parameters(), 0.5)
                self.actor_optimizer.step()

                critic_loss = torch.nn.functional.mse_loss(Gt_index, V)

                self.critic_optimizer.zero_grad()
                critic_loss.backward()
                torch.nn.utils.clip_grad_norm_(self.Critic.parameters(), 0.5)
                self.critic_optimizer.step()
        del self.buffer[:]
    def _setup_train(self, dataset, batch_size, epoch_size, alpha,
                     exclude_idx):
        all_idx = np.arange(len(dataset))
        train_idx = [i for i in all_idx if i not in exclude_idx]

        if alpha is None:
            self.logger.info('No sample weighting selected.')
            subset = Subset(dataset, train_idx)
            sampler = BatchSampler(SequentialSampler(subset), batch_size,
                                   False)
            return sampler, len(train_idx)

        factory = SamplerFactory(self.verbose)
        sampler = factory.get(dataset.df, train_idx, batch_size, epoch_size,
                              alpha)
        return sampler, len(sampler) * batch_size
    def _generate_batches(self, *tensors: torch.Tensor):
        num_envs = tensors[0].shape[1]

        sampler = BatchSampler(
            SubsetRandomSampler(range(num_envs)),
            self.batch_size,
            drop_last=True
        )

        for indices in sampler:
            batch = []

            for t in tensors:
                batch.append(t[:, indices].view(-1, *t.shape[2:]))

            yield batch
Beispiel #15
0
def generate_batch_data(data, batch_size, mini_batch_size):
    obs, act, ret, val, adv, logp = data['obs'], data['act'], data[
        'ret'], data['val'], data['adv'], data['logp']

    # generate batch data
    sampler = BatchSampler(SubsetRandomSampler(range(batch_size)),
                           mini_batch_size,
                           drop_last=True)
    for indices in sampler:
        obs_batch = obs[indices]
        act_batch = act[indices]
        ret_batch = ret[indices]
        val_batch = val[indices]
        adv_batch = adv[indices]
        logp_batch = logp[indices]
        yield obs_batch, act_batch, ret_batch, val_batch, adv_batch, logp_batch
Beispiel #16
0
    def sample(self, advantages, num_mini_batch):
        num_steps, num_processes = self.rewards.size()[0:2]
        batch_size = num_processes * num_steps
        assert batch_size >= num_mini_batch

        mini_batch_size = batch_size // num_mini_batch
        sampler = BatchSampler(SubsetRandomSampler(range(batch_size)), mini_batch_size, drop_last=False)
        for indices in sampler:
            observations_batch = self.observations[:-1].view(-1, *self.observations.size()[2:])[indices]
            actions_batch = self.actions.view(-1, self.actions.size(-1))[indices]
            return_batch = self.returns[:-1].view(-1, self.actions.size(-1))[indices]
            masks_batch = self.masks[:-1].view(-1, 1)[indices]
            old_action_log_probs_batch = self.action_log_probs.view(-1, self.actions.size(-1))[indices]
            adv = advantages.view(-1, self.actions.size(-1))[indices]

            yield observations_batch, actions_batch, return_batch, masks_batch, old_action_log_probs_batch, adv
Beispiel #17
0
    def get_generator(self, minibatch_size):
        minibatch_size = min(self.sample_num, minibatch_size)
        sampler = BatchSampler(SubsetRandomSampler(range(self.sample_num)),
                               minibatch_size,
                               drop_last=True)
        for ind in sampler:
            obs_fov_batch = self.obs_fovs[ind]
            actions_batch = self.actions[ind]
            tids_batch = self.tids[ind]
            return_batch = self.returns[ind]
            log_probs_batch = self.lprobs[ind]
            advantage_batch = self.advs[ind]

            yield (
                obs_fov_batch, tids_batch
            ), actions_batch, return_batch, log_probs_batch, advantage_batch
    def feed_forward_generator(self,
                               advantages,
                               num_mini_batch=None,
                               mini_batch_size=None,
                               include_mask=None):
        num_steps, num_processes = self.rewards.size()[0:2]
        batch_size = num_processes * num_steps

        if mini_batch_size is None:
            assert batch_size >= num_mini_batch, (
                "PPO requires the number of processes ({}) "
                "* number of steps ({}) = {} "
                "to be greater than or equal to the number of PPO mini batches ({})."
                "".format(num_processes, num_steps, num_processes * num_steps,
                          num_mini_batch))
            mini_batch_size = batch_size // num_mini_batch

        # Only include examples which are valid.
        indicies = list(range(batch_size))
        if include_mask is not None:
            filtered_indicies = []
            for i in indicies:
                if include_mask[i] == 1:
                    filtered_indicies.append(i)
            indicies = filtered_indicies

        sampler = BatchSampler(SubsetRandomSampler(indicies),
                               mini_batch_size,
                               drop_last=True)
        for indices in sampler:
            obs_batch = self.obs[:-1].view(-1, *self.obs.size()[2:])[indices]
            recurrent_hidden_states_batch = self.recurrent_hidden_states[:-1].view(
                -1, self.recurrent_hidden_states.size(-1))[indices]
            actions_batch = self.actions.view(-1,
                                              self.actions.size(-1))[indices]
            value_preds_batch = self.value_preds[:-1].view(-1, 1)[indices]
            return_batch = self.returns[:-1].view(-1, 1)[indices]
            masks_batch = self.masks[:-1].view(-1, 1)[indices]
            old_action_log_probs_batch = self.action_log_probs.view(-1,
                                                                    1)[indices]
            if advantages is None:
                adv_targ = None
            else:
                adv_targ = advantages.view(-1, 1)[indices]

            yield obs_batch, recurrent_hidden_states_batch, actions_batch, \
                value_preds_batch, return_batch, masks_batch, old_action_log_probs_batch, adv_targ
Beispiel #19
0
def download_landmark_for_classification(data_folder, uniform_sampling=False):
    transform_train, transform_test = create_transformations_for_test_and_train(
    )
    new_test_dataset, new_train_dataset = create_new_train_and_test_datasets(
        transform_train, transform_test, data_folder)

    if uniform_sampling:
        number_of_samples_with_the_same_label_in_the_batch = (batch_size +
                                                              1) / 2
        train_loader = data.DataLoader(
            new_train_dataset,
            batch_sampler=BatchSampler(sampler=UniformSampler(
                new_train_dataset,
                batch_size=batch_size,
                number_of_samples_with_the_same_label_in_the_batch=
                number_of_samples_with_the_same_label_in_the_batch),
                                       batch_size=batch_size,
                                       drop_last=False),
            num_workers=8)
    else:
        train_loader = data.DataLoader(new_train_dataset,
                                       batch_size=batch_size,
                                       drop_last=False,
                                       shuffle=False,
                                       num_workers=8)

    print('train_loader.batch_size = ', train_loader.batch_size,
          ' train_loader.batch_sampler.batch_size =',
          train_loader.batch_sampler.batch_size, ' train_loader.dataset ',
          train_loader.dataset)
    # print('new_test_dataset.images_paths', new_test_dataset.images_paths)
    # print('new_test_dataset.images_labels', new_test_dataset.images_labels)
    # print('ful batch size = ', len(new_test_dataset.test_labels))
    test_loader = None
    # test_loader = data.DataLoader(new_test_dataset,
    #                              batch_size=batch_size,
    #                              drop_last=False,
    #                              shuffle=False,
    #                              num_workers=8)

    # print('new_train_dataset ', new_train_dataset.__len__())
    # print('new_test_dataset ', new_test_dataset.__len__())
    # print('new_train_dataset.images_paths', new_train_dataset.images_paths)
    # print('new_train_dataset.images_labels', new_train_dataset.images_labels)
    # print('ful batch size = ', len(new_train_dataset.test_labels))

    return train_loader, test_loader
Beispiel #20
0
    def get_sequential_batch_generator(self, batch_size, num_steps):
        sampler = BatchSampler(SubsetRandomSampler(range(self.size -
                                                         num_steps)),
                               int(batch_size / self.num_envs),
                               drop_last=True)

        for indices in sampler:
            indices = np.array(indices)
            states = torch.zeros(batch_size,
                                 num_steps,
                                 *self.states.shape[2:],
                                 device=self.device)
            next_states = torch.zeros(batch_size,
                                      num_steps,
                                      *self.next_states.shape[2:],
                                      device=self.device)
            actions = torch.zeros(
                [batch_size, num_steps, self.actions.shape[-1]],
                device=self.device)
            rewards = torch.zeros([batch_size, num_steps, 1],
                                  device=self.device)
            masks = torch.zeros([batch_size, num_steps, 1], device=self.device)
            bad_masks = torch.zeros([batch_size, num_steps, 1],
                                    device=self.device)
            for step in range(num_steps):
                states[:, step, :].copy_(self.states[indices + step].view(
                    -1, *self.states.shape[2:]))
                next_states[:, step, :].copy_(
                    self.next_states[indices + step].view(
                        -1, *self.next_states.shape[2:]))
                actions[:, step, :].copy_(self.actions[indices + step].view(
                    -1, self.actions.shape[-1]))
                rewards[:, step, :].copy_(self.rewards[indices + step].view(
                    -1, 1))
                masks[:, step, :].copy_(self.masks[indices + step].view(-1, 1))
                bad_masks[:,
                          step, :].copy_(self.bad_masks[indices + step].view(
                              -1, 1))

            yield {
                'states': states,
                'actions': actions,
                'masks': masks,
                'next_states': next_states,
                'rewards': rewards,
                'bad_masks': bad_masks
            }
Beispiel #21
0
    def update(self):
        self.training_step += 1

        state = torch.tensor([t.state for t in self.buffer], dtype=torch.float)
        action = torch.tensor([t.action for t in self.buffer],
                              dtype=torch.float).view(-1, 1)
        reward = torch.tensor([t.reward for t in self.buffer],
                              dtype=torch.float).view(-1, 1)
        next_state = torch.tensor([t.next_state for t in self.buffer],
                                  dtype=torch.float)
        old_action_log_prob = torch.tensor([t.a_log_prob for t in self.buffer],
                                           dtype=torch.float).view(-1, 1)

        reward = (reward - reward.mean()) / (reward.std() + 1e-10)
        with torch.no_grad():
            target_v = reward + args.gamma * self.critic_net(next_state)

        advantage = (target_v - self.critic_net(state)).detach()
        for _ in range(self.ppo_epoch):  # iteration ppo_epoch
            for index in BatchSampler(
                    SubsetRandomSampler(range(self.buffer_capacity),
                                        self.batch_size, True)):
                # epoch iteration, PPO core!!!
                mu, sigma = self.actor_net(state[index])
                n = Normal(mu, sigma)
                action_log_prob = n.log_prob(action[index])
                ratio = torch.exp(action_log_prob - old_action_log_prob)

                L1 = ratio * advantage[index]
                L2 = torch.clamp(ratio, 1 - self.clip_param,
                                 1 + self.clip_param) * advantage[index]
                action_loss = -torch.min(L1, L2).mean()  # MAX->MIN desent
                self.actor_optimizer.zero_grad()
                action_loss.backward()
                nn.utils.clip_grad_norm_(self.actor_net.parameters(),
                                         self.max_grad_norm)
                self.actor_optimizer.step()

                value_loss = F.smooth_l1_loss(self.critic_net(state[index]),
                                              target_v[index])
                self.critic_net_optimizer.zero_grad()
                value_loss.backward()
                nn.utils.clip_grad_norm_(self.critic_net.parameters(),
                                         self.max_grad_norm)
                self.critic_net_optimizer.step()

        del self.buffer[:]
    def __init__(self,
                 dataset,
                 batch_size=1,
                 shuffle=False,
                 sampler=None,
                 batch_sampler=None,
                 num_workers=0,
                 collate_fn=default_collate,
                 pin_memory=False,
                 drop_last=False,
                 timeout=0,
                 worker_init_fn=None):
        self.dataset = dataset
        self.batch_size = batch_size
        self.num_workers = num_workers
        self.collate_fn = collate_fn
        self.pin_memory = pin_memory
        self.drop_last = drop_last
        self.timeout = timeout
        self.worker_init_fn = worker_init_fn

        if timeout < 0:
            raise ValueError('timeout option should be non-negative')

        if batch_sampler is not None:
            if batch_size > 1 or shuffle or sampler is not None or drop_last:
                raise ValueError('batch_sampler is mutually exclusive with '
                                 'batch_size, shuffle, sampler, and drop_last')

        if sampler is not None and shuffle:
            raise ValueError('sampler is mutually exclusive with shuffle')

        if self.num_workers < 0:
            raise ValueError('num_workers cannot be negative; '
                             'use num_workers=0 to disable multiprocessing.')

        if batch_sampler is None:
            if sampler is None:
                if shuffle:
                    sampler = RandomSampler(dataset)
                else:
                    sampler = SequentialSampler(dataset)
            batch_sampler = BatchSampler(sampler, batch_size, drop_last)

        self.sampler = sampler
        self.batch_sampler = batch_sampler
        self._init_workers()
def train() -> int:

    data_directory = os.environ['SM_CHANNEL_TRAINING']

    ratings_data = RatingsData(data_directory)

    sampler = BatchSampler(RandomSampler(ratings_data),
                           batch_size=100, drop_last=False)

    train_loader = DataLoader(ratings_data, sampler=sampler)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    model = Recommender(ratings_data.get_dim()).to(device)

    model.train()

    model.init_params()

    optimizer = SGD(model.parameters(), lr=0.1, momentum=0.9)

    loss_fcn = nn.MSELoss()

    for batch in train_loader:
        # 'batch' is a tensor with shape (batch_size, 1, n_items)
        batch = batch.to(device)
        optimizer.zero_grad()
        output = model(batch)
        idx = batch != 0
        loss = loss_fcn(output[idx], batch[idx])

        # Add penalty to enforce orthonormal factors
        params = next(model.parameters())
        Id = torch.eye(model.get_dim())
        penalty = 0.1 * ((params.T @ params - Id)**2).sum() / model.get_dim()**2
        print(f"Penalty: {penalty}")
        
        loss_with_penalty = loss + penalty
        loss_with_penalty.backward()

        print(f"Loss: {loss.item()}")

        optimizer.step()

        print(next(model.parameters()))

    return 0 
Beispiel #24
0
 def _get_sampler(self,
                  cursor,
                  is_test,
                  limit=None,
                  use_fast_sampler=False):
     if self.use_conll:
         return BatchSampler(RandomSampler(self._dataset),
                             self.train_params.batch_size, False)
     else:
         page_ids = self.page_id_order_test if is_test else self.page_id_order_train
         return MentionContextBatchSampler(
             cursor,
             page_ids,
             self.train_params.batch_size,
             self.train_params.min_mentions,
             limit=limit,
             use_fast_sampler=use_fast_sampler)
Beispiel #25
0
 def __init__(self,
              dataset,
              batch_size=1,
              shuffle=False,
              num_workers=0,
              pin_memory=False,
              drop_last=False,
              batch_container=tuple):
     if num_workers != 0:
         print("warning: num_workers > 0: num_workers=0 is used instead")
     sampler = RandomSampler if shuffle else SequentialSampler
     self.pin_memory = pin_memory
     self.batch_sampler = BatchSampler(sampler=sampler(range(len(dataset))),
                                       batch_size=batch_size,
                                       drop_last=drop_last)
     self.dataset = dataset
     self.batch_container = batch_container
Beispiel #26
0
    def sample_batch(self, batch_size=512):
        if not self.buffer_ready:
            self._finish_buffer()

        if self.is_recurrent:
            raise NotImplementedError("This is not supported yet")
        else:
            random_indices = SubsetRandomSampler(range(self.size))
            sampler = BatchSampler(random_indices, batch_size, drop_last=True)

            for i, indices in enumerate(sampler):
                states = self.states[indices]
                actions = self.actions[indices]
                returns = self.returns[indices]
                log_probs = self.log_probs[indices]
                advantages = self.advantages[indices]
                yield states, actions, returns, log_probs, advantages
Beispiel #27
0
 def update(self):
     s = torch.tensor(self.buffer['s'], dtype=torch.double).to(device)
     for _ in range(args.train_epochs):
         print('New EPoch \n')
         for index in BatchSampler(
                 SubsetRandomSampler(range(args.buffer_capacity)),
                 args.batch_size, False):
             s_in = s[index]
             z, s_hat = self.AE(s_in)
             loss = self.criterion(s_hat, s_in)
             print("Loss:\t", loss.item())
             if args.tensorboard:
                 writer.add_scalar('Loss', loss.item(), self.step)
             self.optimizer.zero_grad()
             loss.backward()
             self.optimizer.step()
             self.step += 1
Beispiel #28
0
 def get_predictions(self, dataset):
     batch_sampler = BatchSampler(sampler=self.get_base_sampler(
         len(dataset), shuffle=False),
                                  batch_size=256,
                                  drop_last=False)
     loader = torch.utils.data.DataLoader(dataset,
                                          batch_sampler=batch_sampler)
     preds = []
     with torch.no_grad():
         for (data, _) in tqdm.tqdm(loader, disable=self.logger.level > 15):
             if self.cuda_available:
                 data = data.cuda()
             prediction = self.model(data)
             if self.cuda_available:
                 prediction = prediction.detach().cpu()
             preds.append(prediction.data)
     return torch.cat(preds).numpy()
Beispiel #29
0
 def __init__(self,
              dataset,
              batch_size=1,
              shuffle=False,
              batch_sampler=None,
              sampler=None,
              pad_idx=0,
              drop_last=False):
     self.dataset = dataset
     self.pad_idx, self.batch_size, self.shuffle = pad_idx, batch_size, shuffle
     if batch_sampler is None:
         if sampler is None:
             sampler = RandomSampler(
                 dataset) if shuffle else SequentialSampler(dataset)
         batch_sampler = BatchSampler(sampler, batch_size, drop_last)
     self.sampler = sampler
     self.batch_sampler = batch_sampler
Beispiel #30
0
def data_loader(
    corpus,
    vocab,
    batch_size,
    sort_batches,
    num_data_workers=0,
    verbose=True,
):
    if sort_batches:
        if verbose:
            logging.info(
                'Using sort batch sampler. '
                'WARNING: While being more more efficient '
                'than the standard one, the loss could be higher (as the data '
                'is not completely random)')
        source_lengths, target_lengths = corpus.get_lengths()
        batch_sampler = SortedBatchSampler(
            source_lengths=source_lengths,
            target_lengths=target_lengths,
            batch_size=batch_size,
            verbose=verbose,
        )
    else:
        if verbose:
            logging.info(
                'Using standard random batch sampler. '
                'WARNING: That might be inefficient as sentences in the batch '
                'might be of drastically different length')
        batch_sampler = BatchSampler(
            sampler=RandomSampler(corpus),
            batch_size=batch_size,
            drop_last=True,
        )

    return torch.utils.data.DataLoader(
        corpus,
        batch_sampler=batch_sampler,
        num_workers=num_data_workers,
        collate_fn=lambda samples: prepare_batch_from_parallel_samples(
            parallel_samples=samples,
            pad_token_id=vocab.pad_idx,
            eos_token_id=vocab.eos_idx,
            go_token_id=vocab.go_idx,
        ),
        pin_memory=True,
    )