def __init__(self, indices_by_class, batch_size, pc_noclassif=0.5, nb_indices_same_class=2): self.indices_by_class = copy.copy(indices_by_class) self.indices_no_class = self.indices_by_class.pop(0) self.batch_size = batch_size self.pc_noclassif = pc_noclassif self.nb_indices_same_class = nb_indices_same_class self.batch_size_classif = round( (1 - self.pc_noclassif) * self.batch_size) self.batch_size_noclassif = self.batch_size - self.batch_size_classif # Batch Sampler NoClassif self.batch_sampler_noclassif = BatchSampler( RandomSamplerValues(self.indices_no_class), self.batch_size_noclassif, True) # Batch Sampler Classif self.batch_sampler_classif = BatchSamplerClassif( RandomSamplerValues(self.indices_by_class), self.batch_size_classif, self.nb_indices_same_class)
def __init__(self, indices_by_class, batch_size, nb_indices_same_class): if batch_size % nb_indices_same_class != 0: raise ValueError( 'batch_size of BatchSamplerClassif ({}) must be divisible by nb_indices_same_class ({})' .format(batch_size, nb_indices_same_class)) self.indices_by_class = indices_by_class self.batch_size = batch_size self.nb_indices_same_class = nb_indices_same_class self.batch_sampler_by_class = [] for indices in indices_by_class: self.batch_sampler_by_class.append( BatchSampler(RandomSamplerValues(indices), self.nb_indices_same_class, True))
def batch_generator(self, advantages, mini_batch_size): sampler = BatchSampler(SubsetRandomSampler(range(self.num_samples)), mini_batch_size, drop_last=True) for indices in sampler: obs_batch = self.obs[:-1].view(-1, *self.obs.size()[2:])[indices] actions_batch = self.actions.view(-1, self.actions.size(-1))[indices] value_preds_batch = self.value_preds[:-1].view(-1, 1)[indices] return_batch = self.returns[:-1].view(-1, 1)[indices] masks_batch = self.masks[:-1].view(-1, 1)[indices] old_action_log_probs_batch = self.action_log_probs.view(-1,1)[indices] adv_targ = advantages.view(-1, 1)[indices] yield obs_batch, actions_batch, value_preds_batch, return_batch,\ masks_batch, old_action_log_probs_batch, adv_targ
def __iter__(self): for page_indices in self.page_sampler: if self.key is not None: in_page_sampler = SortedSampler( page_indices, key=lambda i: self.key(self.data_source[i]) ) else: in_page_sampler = SequentialSampler(page_indices) batch_sampler = BatchSampler( in_page_sampler, self.batch_size, self.drop_last ) batches = list(batch_sampler) random.shuffle(batches) for batch_indices in batches: yield [page_indices[i] for i in batch_indices]
def feed_forward_generator( self, advantages: Optional[torch.Tensor], num_mini_batch: Optional[int] = None, mini_batch_size: Optional[int] = None, ) -> Generator[Tuple[torch.Tensor, ...], None, None]: num_steps, num_processes = self.rewards.size()[0:2] batch_size = num_processes * num_steps if mini_batch_size is None: assert batch_size >= num_mini_batch, ( "PPO requires the number of processes ({}) " "* number of steps ({}) = {} " "to be greater than or equal to the number of PPO mini batches ({})." "".format( num_processes, num_steps, num_processes * num_steps, num_mini_batch ) ) mini_batch_size = batch_size // num_mini_batch sampler = BatchSampler( SubsetRandomSampler(range(batch_size)), mini_batch_size, drop_last=True ) for indices in sampler: obs_batch = self.obs[:-1].view(-1, *self.obs.size()[2:])[indices] recurrent_hidden_states_batch = self.recurrent_hidden_states[:-1].view( -1, self.recurrent_hidden_states.size(-1) )[indices] actions_batch = self.actions.view(-1, self.actions.size(-1))[indices] value_preds_batch = self.value_preds[:-1].view(-1, 1)[indices] return_batch = self.returns[:-1].view(-1, 1)[indices] masks_batch = self.masks[:-1].view(-1, 1)[indices] old_action_log_probs_batch = self.action_log_probs.view(-1, 1)[indices] if advantages is None: adv_targ = None else: adv_targ = advantages.view(-1, 1)[indices] batch = ( obs_batch, recurrent_hidden_states_batch, actions_batch, value_preds_batch, return_batch, masks_batch, old_action_log_probs_batch, adv_targ, ) yield batch
def trainmodel(self): s = torch.tensor(self.memory.buffer['s'], dtype=torch.double).to(device) a = torch.tensor(self.memory.buffer['a'], dtype=torch.double).to(device) r = torch.tensor(self.memory.buffer['r'], dtype=torch.double).to(device).view(-1, 1) s_ = torch.tensor(self.memory.buffer['s_'], dtype=torch.double).to(device) r = (r - r.mean()) / (r.std() + 1e-5) old_a_logp = torch.tensor(self.memory.buffer['a_logp'], dtype=torch.double).to(device).view(-1, 1) with torch.no_grad(): target_v = r + self.gamma * self.net(s_)[1] adv = target_v - self.net(s)[1] for _ in range(self.PPOepoch): for index in BatchSampler( SubsetRandomSampler(range(self.memory.buffer_capacity)), self.memory.batch_size, False): alpha, beta = self.net(s[index])[0] dist = Beta(alpha, beta) a_logp = dist.log_prob(a[index]).sum(dim=1) ratio = torch.exp(a_logp - old_a_logp[index]) with torch.no_grad(): entrop = dist.entropy() surr1 = ratio * adv[index] surr2 = torch.clamp(ratio, 1.0 - self.clip_param, 1.0 + self.clip_param) * adv[index] action_loss = -torch.min(surr1, surr2).mean() value_loss = F.smooth_l1_loss( self.net(s[index])[1], target_v[index]) self.storeloss(action_loss, value_loss) action_loss = torch.clamp(action_loss, 0, 10) value_loss = torch.clamp(value_loss, 0, 10) loss = action_loss + 2. * value_loss - args.bound * entrop.mean( ) self.optimizer.zero_grad() loss.backward() nn.utils.clip_grad_norm_(self.net.parameters(), self.max_grad_norm) self.optimizer.step() torch.save(self.net.state_dict(), self.path_t7)
def update(self): if self.memory_count >= self.capacity: # convert inputs to torch tensors. state = torch.Tensor([t.old_state for t in self.memory]).float() action = torch.LongTensor([t.action for t in self.memory]).view(-1, 1).long() reward = torch.Tensor([t.reward for t in self.memory]).float() next_state = torch.Tensor([t.new_state for t in self.memory]).float() # move to device. state = state.to(self.device) action = action.to(self.device) reward = reward.to(self.device) next_state = next_state.to(self.device) # normalize rewards. reward = (reward - reward.mean()) / (reward.std() + 1e-7) # update Q value with torch.no_grad(): target_v = reward + self.gamma * self.target_net( next_state).max(1)[0] batch_loss = 0 # sample from replay buffer, update actor network. for index in BatchSampler(SubsetRandomSampler( range(len(self.memory))), batch_size=self.batch_size, drop_last=False): v = (self.act_net(state).gather(1, action))[index] loss = self.loss_func(target_v[index].unsqueeze(1), (self.act_net(state).gather( 1, action))[index]) self.optimizer.zero_grad() loss.backward() self.optimizer.step() batch_loss += loss.item() # update target Q network when sufficient iterations have passed. self.update_count += 1 self.update_target_network_weights() self.losses.append(batch_loss / self.batch_size) self.rewards.append(reward.mean().item())
def __init__( self, dataset, batch_size=1, shuffle=False, sampler=None, batch_sampler=None, pad_idx=0, num_workers=None, pin_memory=False, drop_last=False, pre_pad=True, half=False, transpose=False, transpose_y=False, ): self.dataset, self.batch_size, self.num_workers = ( dataset, batch_size, num_workers, ) self.pin_memory, self.drop_last, self.pre_pad = pin_memory, drop_last, pre_pad self.transpose, self.transpose_y, self.pad_idx, self.half = ( transpose, transpose_y, pad_idx, half, ) if batch_sampler is not None: if batch_size > 1 or shuffle or sampler is not None or drop_last: raise ValueError("batch_sampler is mutually exclusive with " "batch_size, shuffle, sampler, and drop_last") if sampler is not None and shuffle: raise ValueError("sampler is mutually exclusive with shuffle") if batch_sampler is None: if sampler is None: sampler = (RandomSampler(dataset) if shuffle else SequentialSampler(dataset)) batch_sampler = BatchSampler(sampler, batch_size, drop_last) if num_workers is None: self.num_workers = num_cpus() self.sampler = sampler self.batch_sampler = batch_sampler
def magent_feed_forward_generator(rollouts_list, advantages_list, num_mini_batch): num_steps, num_processes = rollouts_list[0].rewards.size()[0:2] batch_size = num_processes * num_steps mini_batch_size = int( (batch_size / num_mini_batch)) # size of minibatch for each agent sampler = BatchSampler(SubsetRandomSampler(range(batch_size)), mini_batch_size, drop_last=False) for indices in sampler: obs_batch = torch.cat([ rollout.obs[:-1].view(-1, *rollout.obs.size()[2:])[indices] for rollout in rollouts_list ], 0) recurrent_hidden_states_batch = torch.cat([ rollout.recurrent_hidden_states[:-1].view( -1, rollout.recurrent_hidden_states.size(-1))[indices] for rollout in rollouts_list ], 0) actions_batch = torch.cat([ rollout.actions.view(-1, rollout.actions.size(-1))[indices] for rollout in rollouts_list ], 0) value_preds_batch = torch.cat([ rollout.value_preds[:-1].view(-1, 1)[indices] for rollout in rollouts_list ], 0) return_batch = torch.cat([ rollout.returns[:-1].view(-1, 1)[indices] for rollout in rollouts_list ], 0) masks_batch = torch.cat([ rollout.masks[:-1].view(-1, 1)[indices] for rollout in rollouts_list ], 0) old_action_log_probs_batch = torch.cat([ rollout.action_log_probs.view(-1, 1)[indices] for rollout in rollouts_list ], 0) adv_targ = torch.cat([ advantages.view(-1, 1)[indices] for advantages in advantages_list ], 0) yield obs_batch, recurrent_hidden_states_batch, actions_batch, value_preds_batch, return_batch,\ masks_batch, old_action_log_probs_batch, adv_targ
def feed_forward_generator(self, fetch_normalized, advantages, num_mini_batch=None, mini_batch_size=None): num_steps, num_processes = self.rewards.size()[0:2] batch_size = num_processes * num_steps if mini_batch_size is None: assert batch_size >= num_mini_batch, ( "PPO requires the number of processes ({}) " "* number of steps ({}) = {} " "to be greater than or equal to the number of PPO mini batches ({})." "".format(num_processes, num_steps, num_processes * num_steps, num_mini_batch)) mini_batch_size = batch_size // num_mini_batch sampler = BatchSampler(SubsetRandomSampler(range(batch_size)), mini_batch_size, drop_last=True) for indices in sampler: if fetch_normalized: obs_batch = self.normalized_obs[:-1].view( -1, *self.normalized_obs.size()[2:])[indices] else: obs_batch = self.raw_obs[:-1].view( -1, *self.raw_obs.size()[2:])[indices] recurrent_hidden_states_batch = self.recurrent_hidden_states[:-1].view( -1, self.recurrent_hidden_states.size(-1))[indices] actions_batch = self.actions.view(-1, self.actions.size(-1))[indices] pretanh_actions_batch = self.pretanh_actions.view( -1, self.pretanh_actions.size(-1))[indices] value_preds_batch = self.value_preds[:-1].view(-1, 1)[indices] return_batch = self.returns[:-1].view(-1, 1)[indices] masks_batch = self.masks[:-1].view(-1, 1)[indices] old_action_log_probs_batch = self.action_log_probs.view(-1, 1)[indices] if advantages is None: adv_targ = None else: adv_targ = advantages.view(-1, 1)[indices] yield obs_batch, recurrent_hidden_states_batch, actions_batch, pretanh_actions_batch, \ value_preds_batch, return_batch, masks_batch, old_action_log_probs_batch, adv_targ
def update(self): print(self.memory_count) for index in BatchSampler(SubsetRandomSampler(range(len(self.memory))), batch_size=self.batch_size, drop_last=False): losses = torch.tensor(0).float() for i, j in enumerate(index): # state = self.target_net.analysis_state(mem.state) mem = self.memory[j] reward = torch.tensor(mem.reward).float() action = mem.action if action == -1: target_v = reward else: with torch.no_grad(): # Doule DQN next_state = self.act_net.analysis_state( mem.next_state) action_candidate, Qsa_values = self.act_net(next_state) action2 = action_candidate[Qsa_values.max(1)[1]] next_state = self.target_net.analysis_state( mem.next_state) action_candidate2, Qsa_values2 = self.target_net( next_state) Qsa2 = Qsa_values2[0][action_candidate2.index(action2)] target_v = reward + self.gamma * Qsa2 # Nature DQN # target_v = reward + self.gamma * self.target_net(next_state)[1].max() state = self.act_net.analysis_state(mem.state) action_candidate, Qsa_values = self.act_net(state) Qsa = Qsa_values[0][action_candidate.index(action)] loss = self.loss_func(target_v, Qsa) losses += loss self.optimizer.zero_grad() losses.backward() self.optimizer.step() self.writer.add_scalar('loss/value_loss', losses / self.batch_size, self.update_count) self.update_count += 1 if self.update_count % 1500 == 0: self.target_net.load_state_dict(self.act_net.state_dict()) torch.save( self.act_net.state_dict(), config.act_net_model_dir + str(self.update_count) + ".model")
def train(self): epochs = 10 state = torch.FloatTensor([t.state for t in self.buffer]).to(self.device) action = torch.LongTensor([t.action for t in self.buffer ]).view(-1, 1).to(self.device) reward = [t.reward for t in self.buffer] old_action_log_prob = torch.FloatTensor( [t.a_log_prob for t in self.buffer]).view(-1, 1).to(self.device) R = 0 Gt = [] for r in reward[::-1]: R = r + 0.99 * R Gt.insert(0, R) Gt = torch.FloatTensor(Gt).to(self.device) for n in range(epochs): for index in BatchSampler( SubsetRandomSampler(range(len(self.buffer))), 32, False): Gt_index = Gt[index].view(-1, 1) V = self.Critic(state[index]) delta = Gt_index - V advantage = delta.detach() action_prob = self.Actor(state[index]).gather(1, action[index]) ratio = (action_prob / old_action_log_prob[index]) surr1 = ratio * advantage surr2 = torch.clamp(ratio, 1 - 0.2, 1 + 0.2) * advantage actor_loss = -torch.min(surr1, surr2).mean() self.actor_optimizer.zero_grad() actor_loss.backward() torch.nn.utils.clip_grad_norm_(self.Actor.parameters(), 0.5) self.actor_optimizer.step() critic_loss = torch.nn.functional.mse_loss(Gt_index, V) self.critic_optimizer.zero_grad() critic_loss.backward() torch.nn.utils.clip_grad_norm_(self.Critic.parameters(), 0.5) self.critic_optimizer.step() del self.buffer[:]
def _setup_train(self, dataset, batch_size, epoch_size, alpha, exclude_idx): all_idx = np.arange(len(dataset)) train_idx = [i for i in all_idx if i not in exclude_idx] if alpha is None: self.logger.info('No sample weighting selected.') subset = Subset(dataset, train_idx) sampler = BatchSampler(SequentialSampler(subset), batch_size, False) return sampler, len(train_idx) factory = SamplerFactory(self.verbose) sampler = factory.get(dataset.df, train_idx, batch_size, epoch_size, alpha) return sampler, len(sampler) * batch_size
def _generate_batches(self, *tensors: torch.Tensor): num_envs = tensors[0].shape[1] sampler = BatchSampler( SubsetRandomSampler(range(num_envs)), self.batch_size, drop_last=True ) for indices in sampler: batch = [] for t in tensors: batch.append(t[:, indices].view(-1, *t.shape[2:])) yield batch
def generate_batch_data(data, batch_size, mini_batch_size): obs, act, ret, val, adv, logp = data['obs'], data['act'], data[ 'ret'], data['val'], data['adv'], data['logp'] # generate batch data sampler = BatchSampler(SubsetRandomSampler(range(batch_size)), mini_batch_size, drop_last=True) for indices in sampler: obs_batch = obs[indices] act_batch = act[indices] ret_batch = ret[indices] val_batch = val[indices] adv_batch = adv[indices] logp_batch = logp[indices] yield obs_batch, act_batch, ret_batch, val_batch, adv_batch, logp_batch
def sample(self, advantages, num_mini_batch): num_steps, num_processes = self.rewards.size()[0:2] batch_size = num_processes * num_steps assert batch_size >= num_mini_batch mini_batch_size = batch_size // num_mini_batch sampler = BatchSampler(SubsetRandomSampler(range(batch_size)), mini_batch_size, drop_last=False) for indices in sampler: observations_batch = self.observations[:-1].view(-1, *self.observations.size()[2:])[indices] actions_batch = self.actions.view(-1, self.actions.size(-1))[indices] return_batch = self.returns[:-1].view(-1, self.actions.size(-1))[indices] masks_batch = self.masks[:-1].view(-1, 1)[indices] old_action_log_probs_batch = self.action_log_probs.view(-1, self.actions.size(-1))[indices] adv = advantages.view(-1, self.actions.size(-1))[indices] yield observations_batch, actions_batch, return_batch, masks_batch, old_action_log_probs_batch, adv
def get_generator(self, minibatch_size): minibatch_size = min(self.sample_num, minibatch_size) sampler = BatchSampler(SubsetRandomSampler(range(self.sample_num)), minibatch_size, drop_last=True) for ind in sampler: obs_fov_batch = self.obs_fovs[ind] actions_batch = self.actions[ind] tids_batch = self.tids[ind] return_batch = self.returns[ind] log_probs_batch = self.lprobs[ind] advantage_batch = self.advs[ind] yield ( obs_fov_batch, tids_batch ), actions_batch, return_batch, log_probs_batch, advantage_batch
def feed_forward_generator(self, advantages, num_mini_batch=None, mini_batch_size=None, include_mask=None): num_steps, num_processes = self.rewards.size()[0:2] batch_size = num_processes * num_steps if mini_batch_size is None: assert batch_size >= num_mini_batch, ( "PPO requires the number of processes ({}) " "* number of steps ({}) = {} " "to be greater than or equal to the number of PPO mini batches ({})." "".format(num_processes, num_steps, num_processes * num_steps, num_mini_batch)) mini_batch_size = batch_size // num_mini_batch # Only include examples which are valid. indicies = list(range(batch_size)) if include_mask is not None: filtered_indicies = [] for i in indicies: if include_mask[i] == 1: filtered_indicies.append(i) indicies = filtered_indicies sampler = BatchSampler(SubsetRandomSampler(indicies), mini_batch_size, drop_last=True) for indices in sampler: obs_batch = self.obs[:-1].view(-1, *self.obs.size()[2:])[indices] recurrent_hidden_states_batch = self.recurrent_hidden_states[:-1].view( -1, self.recurrent_hidden_states.size(-1))[indices] actions_batch = self.actions.view(-1, self.actions.size(-1))[indices] value_preds_batch = self.value_preds[:-1].view(-1, 1)[indices] return_batch = self.returns[:-1].view(-1, 1)[indices] masks_batch = self.masks[:-1].view(-1, 1)[indices] old_action_log_probs_batch = self.action_log_probs.view(-1, 1)[indices] if advantages is None: adv_targ = None else: adv_targ = advantages.view(-1, 1)[indices] yield obs_batch, recurrent_hidden_states_batch, actions_batch, \ value_preds_batch, return_batch, masks_batch, old_action_log_probs_batch, adv_targ
def download_landmark_for_classification(data_folder, uniform_sampling=False): transform_train, transform_test = create_transformations_for_test_and_train( ) new_test_dataset, new_train_dataset = create_new_train_and_test_datasets( transform_train, transform_test, data_folder) if uniform_sampling: number_of_samples_with_the_same_label_in_the_batch = (batch_size + 1) / 2 train_loader = data.DataLoader( new_train_dataset, batch_sampler=BatchSampler(sampler=UniformSampler( new_train_dataset, batch_size=batch_size, number_of_samples_with_the_same_label_in_the_batch= number_of_samples_with_the_same_label_in_the_batch), batch_size=batch_size, drop_last=False), num_workers=8) else: train_loader = data.DataLoader(new_train_dataset, batch_size=batch_size, drop_last=False, shuffle=False, num_workers=8) print('train_loader.batch_size = ', train_loader.batch_size, ' train_loader.batch_sampler.batch_size =', train_loader.batch_sampler.batch_size, ' train_loader.dataset ', train_loader.dataset) # print('new_test_dataset.images_paths', new_test_dataset.images_paths) # print('new_test_dataset.images_labels', new_test_dataset.images_labels) # print('ful batch size = ', len(new_test_dataset.test_labels)) test_loader = None # test_loader = data.DataLoader(new_test_dataset, # batch_size=batch_size, # drop_last=False, # shuffle=False, # num_workers=8) # print('new_train_dataset ', new_train_dataset.__len__()) # print('new_test_dataset ', new_test_dataset.__len__()) # print('new_train_dataset.images_paths', new_train_dataset.images_paths) # print('new_train_dataset.images_labels', new_train_dataset.images_labels) # print('ful batch size = ', len(new_train_dataset.test_labels)) return train_loader, test_loader
def get_sequential_batch_generator(self, batch_size, num_steps): sampler = BatchSampler(SubsetRandomSampler(range(self.size - num_steps)), int(batch_size / self.num_envs), drop_last=True) for indices in sampler: indices = np.array(indices) states = torch.zeros(batch_size, num_steps, *self.states.shape[2:], device=self.device) next_states = torch.zeros(batch_size, num_steps, *self.next_states.shape[2:], device=self.device) actions = torch.zeros( [batch_size, num_steps, self.actions.shape[-1]], device=self.device) rewards = torch.zeros([batch_size, num_steps, 1], device=self.device) masks = torch.zeros([batch_size, num_steps, 1], device=self.device) bad_masks = torch.zeros([batch_size, num_steps, 1], device=self.device) for step in range(num_steps): states[:, step, :].copy_(self.states[indices + step].view( -1, *self.states.shape[2:])) next_states[:, step, :].copy_( self.next_states[indices + step].view( -1, *self.next_states.shape[2:])) actions[:, step, :].copy_(self.actions[indices + step].view( -1, self.actions.shape[-1])) rewards[:, step, :].copy_(self.rewards[indices + step].view( -1, 1)) masks[:, step, :].copy_(self.masks[indices + step].view(-1, 1)) bad_masks[:, step, :].copy_(self.bad_masks[indices + step].view( -1, 1)) yield { 'states': states, 'actions': actions, 'masks': masks, 'next_states': next_states, 'rewards': rewards, 'bad_masks': bad_masks }
def update(self): self.training_step += 1 state = torch.tensor([t.state for t in self.buffer], dtype=torch.float) action = torch.tensor([t.action for t in self.buffer], dtype=torch.float).view(-1, 1) reward = torch.tensor([t.reward for t in self.buffer], dtype=torch.float).view(-1, 1) next_state = torch.tensor([t.next_state for t in self.buffer], dtype=torch.float) old_action_log_prob = torch.tensor([t.a_log_prob for t in self.buffer], dtype=torch.float).view(-1, 1) reward = (reward - reward.mean()) / (reward.std() + 1e-10) with torch.no_grad(): target_v = reward + args.gamma * self.critic_net(next_state) advantage = (target_v - self.critic_net(state)).detach() for _ in range(self.ppo_epoch): # iteration ppo_epoch for index in BatchSampler( SubsetRandomSampler(range(self.buffer_capacity), self.batch_size, True)): # epoch iteration, PPO core!!! mu, sigma = self.actor_net(state[index]) n = Normal(mu, sigma) action_log_prob = n.log_prob(action[index]) ratio = torch.exp(action_log_prob - old_action_log_prob) L1 = ratio * advantage[index] L2 = torch.clamp(ratio, 1 - self.clip_param, 1 + self.clip_param) * advantage[index] action_loss = -torch.min(L1, L2).mean() # MAX->MIN desent self.actor_optimizer.zero_grad() action_loss.backward() nn.utils.clip_grad_norm_(self.actor_net.parameters(), self.max_grad_norm) self.actor_optimizer.step() value_loss = F.smooth_l1_loss(self.critic_net(state[index]), target_v[index]) self.critic_net_optimizer.zero_grad() value_loss.backward() nn.utils.clip_grad_norm_(self.critic_net.parameters(), self.max_grad_norm) self.critic_net_optimizer.step() del self.buffer[:]
def __init__(self, dataset, batch_size=1, shuffle=False, sampler=None, batch_sampler=None, num_workers=0, collate_fn=default_collate, pin_memory=False, drop_last=False, timeout=0, worker_init_fn=None): self.dataset = dataset self.batch_size = batch_size self.num_workers = num_workers self.collate_fn = collate_fn self.pin_memory = pin_memory self.drop_last = drop_last self.timeout = timeout self.worker_init_fn = worker_init_fn if timeout < 0: raise ValueError('timeout option should be non-negative') if batch_sampler is not None: if batch_size > 1 or shuffle or sampler is not None or drop_last: raise ValueError('batch_sampler is mutually exclusive with ' 'batch_size, shuffle, sampler, and drop_last') if sampler is not None and shuffle: raise ValueError('sampler is mutually exclusive with shuffle') if self.num_workers < 0: raise ValueError('num_workers cannot be negative; ' 'use num_workers=0 to disable multiprocessing.') if batch_sampler is None: if sampler is None: if shuffle: sampler = RandomSampler(dataset) else: sampler = SequentialSampler(dataset) batch_sampler = BatchSampler(sampler, batch_size, drop_last) self.sampler = sampler self.batch_sampler = batch_sampler self._init_workers()
def train() -> int: data_directory = os.environ['SM_CHANNEL_TRAINING'] ratings_data = RatingsData(data_directory) sampler = BatchSampler(RandomSampler(ratings_data), batch_size=100, drop_last=False) train_loader = DataLoader(ratings_data, sampler=sampler) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = Recommender(ratings_data.get_dim()).to(device) model.train() model.init_params() optimizer = SGD(model.parameters(), lr=0.1, momentum=0.9) loss_fcn = nn.MSELoss() for batch in train_loader: # 'batch' is a tensor with shape (batch_size, 1, n_items) batch = batch.to(device) optimizer.zero_grad() output = model(batch) idx = batch != 0 loss = loss_fcn(output[idx], batch[idx]) # Add penalty to enforce orthonormal factors params = next(model.parameters()) Id = torch.eye(model.get_dim()) penalty = 0.1 * ((params.T @ params - Id)**2).sum() / model.get_dim()**2 print(f"Penalty: {penalty}") loss_with_penalty = loss + penalty loss_with_penalty.backward() print(f"Loss: {loss.item()}") optimizer.step() print(next(model.parameters())) return 0
def _get_sampler(self, cursor, is_test, limit=None, use_fast_sampler=False): if self.use_conll: return BatchSampler(RandomSampler(self._dataset), self.train_params.batch_size, False) else: page_ids = self.page_id_order_test if is_test else self.page_id_order_train return MentionContextBatchSampler( cursor, page_ids, self.train_params.batch_size, self.train_params.min_mentions, limit=limit, use_fast_sampler=use_fast_sampler)
def __init__(self, dataset, batch_size=1, shuffle=False, num_workers=0, pin_memory=False, drop_last=False, batch_container=tuple): if num_workers != 0: print("warning: num_workers > 0: num_workers=0 is used instead") sampler = RandomSampler if shuffle else SequentialSampler self.pin_memory = pin_memory self.batch_sampler = BatchSampler(sampler=sampler(range(len(dataset))), batch_size=batch_size, drop_last=drop_last) self.dataset = dataset self.batch_container = batch_container
def sample_batch(self, batch_size=512): if not self.buffer_ready: self._finish_buffer() if self.is_recurrent: raise NotImplementedError("This is not supported yet") else: random_indices = SubsetRandomSampler(range(self.size)) sampler = BatchSampler(random_indices, batch_size, drop_last=True) for i, indices in enumerate(sampler): states = self.states[indices] actions = self.actions[indices] returns = self.returns[indices] log_probs = self.log_probs[indices] advantages = self.advantages[indices] yield states, actions, returns, log_probs, advantages
def update(self): s = torch.tensor(self.buffer['s'], dtype=torch.double).to(device) for _ in range(args.train_epochs): print('New EPoch \n') for index in BatchSampler( SubsetRandomSampler(range(args.buffer_capacity)), args.batch_size, False): s_in = s[index] z, s_hat = self.AE(s_in) loss = self.criterion(s_hat, s_in) print("Loss:\t", loss.item()) if args.tensorboard: writer.add_scalar('Loss', loss.item(), self.step) self.optimizer.zero_grad() loss.backward() self.optimizer.step() self.step += 1
def get_predictions(self, dataset): batch_sampler = BatchSampler(sampler=self.get_base_sampler( len(dataset), shuffle=False), batch_size=256, drop_last=False) loader = torch.utils.data.DataLoader(dataset, batch_sampler=batch_sampler) preds = [] with torch.no_grad(): for (data, _) in tqdm.tqdm(loader, disable=self.logger.level > 15): if self.cuda_available: data = data.cuda() prediction = self.model(data) if self.cuda_available: prediction = prediction.detach().cpu() preds.append(prediction.data) return torch.cat(preds).numpy()
def __init__(self, dataset, batch_size=1, shuffle=False, batch_sampler=None, sampler=None, pad_idx=0, drop_last=False): self.dataset = dataset self.pad_idx, self.batch_size, self.shuffle = pad_idx, batch_size, shuffle if batch_sampler is None: if sampler is None: sampler = RandomSampler( dataset) if shuffle else SequentialSampler(dataset) batch_sampler = BatchSampler(sampler, batch_size, drop_last) self.sampler = sampler self.batch_sampler = batch_sampler
def data_loader( corpus, vocab, batch_size, sort_batches, num_data_workers=0, verbose=True, ): if sort_batches: if verbose: logging.info( 'Using sort batch sampler. ' 'WARNING: While being more more efficient ' 'than the standard one, the loss could be higher (as the data ' 'is not completely random)') source_lengths, target_lengths = corpus.get_lengths() batch_sampler = SortedBatchSampler( source_lengths=source_lengths, target_lengths=target_lengths, batch_size=batch_size, verbose=verbose, ) else: if verbose: logging.info( 'Using standard random batch sampler. ' 'WARNING: That might be inefficient as sentences in the batch ' 'might be of drastically different length') batch_sampler = BatchSampler( sampler=RandomSampler(corpus), batch_size=batch_size, drop_last=True, ) return torch.utils.data.DataLoader( corpus, batch_sampler=batch_sampler, num_workers=num_data_workers, collate_fn=lambda samples: prepare_batch_from_parallel_samples( parallel_samples=samples, pad_token_id=vocab.pad_idx, eos_token_id=vocab.eos_idx, go_token_id=vocab.go_idx, ), pin_memory=True, )