def random_crop(image, boxes, labels): original_h = image.size(1) original_w = image.size(2) while True: min_overlap = rand_choice([0., .1, .3, .5, .7, .9, None]) if min_overlap is None: return image, boxes, labels max_trials = 50 for _ in range(max_trials): min_scale = 0.3 scale_h = rand_uniform(min_scale, 1) scale_w = rand_uniform(min_scale, 1) new_h = int(scale_h * original_h) new_w = int(scale_w * original_w) aspect_ratio = new_h / new_w if not 0.5 < aspect_ratio < 2: continue left = randint(0, original_w - new_w) right = left + new_w top = randint(0, original_h - new_h) bottom = top + new_h crop = FloatTensor([left, top, right, bottom]) overlap = find_jaccard_overlap(crop.unsqueeze(0), boxes) overlap = overlap.squeeze(0) if overlap.max().item() < min_overlap: continue new_image = image[:, top:bottom, left:right] bb_centers = (boxes[:, :2] + boxes[:, 2:]) / 2. centers_in_crop = (bb_centers[:, 0] > left) * ( bb_centers[:, 0] < right) * (bb_centers[:, 1] > top) * (bb_centers[:, 1] < bottom) if not centers_in_crop.any(): continue new_boxes = boxes[centers_in_crop, :] new_labels = labels[centers_in_crop] new_boxes[:, :2] = torch_max(new_boxes[:, :2], crop[:2]) new_boxes[:, :2] -= crop[:2] new_boxes[:, 2:] = torch_min(new_boxes[:, 2:], crop[2:]) new_boxes[:, 2:] -= crop[:2] return new_image, new_boxes, new_labels
def _bbox_ious(box1, box2, is_corner_coordinates=True): """Calculation of intersection over union function **for many predictions and ground truths** as used in YOLO rewrite in PyTorch. This is implemented as shown in https://github.com/CharlesPikachu/YOLO. Modifications are made for variable names only. All credits to @CharlesPikachu. """ x_left = torch_min(box1[0], box2[0]) if is_corner_coordinates else torch_min( box1[0] - box1[2] / 2.0, box2[0] - box2[2] / 2.0) x_right = torch_max(box1[2], box2[2]) if is_corner_coordinates else torch_max( box1[0] + box1[2] / 2.0, box2[0] + box2[2] / 2.0) y_top = torch_min(box1[1], box2[1]) if is_corner_coordinates else torch_min( box1[1] - box1[3] / 2.0, box2[1] - box2[3] / 2.0) y_bottom = torch_max(box1[3], box2[3]) if is_corner_coordinates else torch_max( box1[1] + box1[3] / 2.0, box2[1] + box2[3] / 2.0) box1_width = box1[2] - box1[0] if is_corner_coordinates else box1[2] box1_height = box1[3] - box1[1] if is_corner_coordinates else box1[3] box2_width = box2[2] - box2[0] if is_corner_coordinates else box2[2] box2_height = box2[3] - box2[1] if is_corner_coordinates else box2[3] raw_union_width = x_right - x_left raw_union_height = y_bottom - y_top intersection_width = box1_width + box2_width - raw_union_width intersection_height = box1_height + box2_height - raw_union_height mask = ((intersection_width <= 0) + (intersection_height <= 0) > 0) box1_area = box1_width * box1_height box2_area = box2_width * box2_height intersection = intersection_width * intersection_height intersection[mask] = 0 union = box1_area + box2_area - intersection return intersection / union
def _find_intersection(set1, set2): """Calculation of intersection of every box combination between two sets of boxes that are in boundary coordinates as used in SSD rewrite in PyTorch. This is implemented as shown in https://github.com/sgrvinod/a-PyTorch-Tutorial-to-Object-Detection. Some modifications are made. All credits to @sgrvinod. """ lower_bounds = torch_max(set1[:, :2].unsqueeze(1), set2[:, :2].unsqueeze(0)) upper_bounds = torch_min(set1[:, 2:].unsqueeze(1), set2[:, 2:].unsqueeze(0)) intersection_dims = torch_clamp(upper_bounds - lower_bounds, min=0) return intersection_dims[:, :, 0] * intersection_dims[:, :, 1]
def trainStep(self, batchSize=None): """ Performs a training step or update for the actor and critic models, based on transitions gathered in the buffer. It then resets the buffer. If provided with a batchSize, this is used instead of default self.batch_size :param: batchSize: int :return: None """ if batchSize is None: if len(self.buffer) < self.batch_size: return batchSize = self.batch_size state = tensor([t.state for t in self.buffer], dtype=torch_float) action = tensor([t.action for t in self.buffer], dtype=torch_long).view(-1, 1) reward = [t.reward for t in self.buffer] old_action_log_prob = tensor([t.a_log_prob for t in self.buffer], dtype=torch_float).view(-1, 1) # Unroll rewards R = 0 Gt = [] for r in reward[::-1]: R = r + self.gamma * R Gt.insert(0, R) Gt = tensor(Gt, dtype=torch_float) if self.use_cuda: state, action, old_action_log_prob = state.cuda(), action.cuda(), old_action_log_prob.cuda() Gt = Gt.cuda() for _ in range(self.ppo_update_iters): for index in BatchSampler(SubsetRandomSampler(range(len(self.buffer))), batchSize, False): # Calculate the advantage at each step Gt_index = Gt[index].view(-1, 1) V = self.critic_net(state[index]) delta = Gt_index - V advantage = delta.detach() # Get the current prob action_prob = self.actor_net(state[index]).gather(1, action[index]) # new policy # PPO ratio = (action_prob / old_action_log_prob[index]) surr1 = ratio * advantage surr2 = clamp(ratio, 1 - self.clip_param, 1 + self.clip_param) * advantage # update actor network action_loss = -torch_min(surr1, surr2).mean() # MAX->MIN descent self.actor_optimizer.zero_grad() action_loss.backward() nn.utils.clip_grad_norm_(self.actor_net.parameters(), self.max_grad_norm) self.actor_optimizer.step() # update critic network value_loss = F.mse_loss(Gt_index, V) self.critic_net_optimizer.zero_grad() value_loss.backward() nn.utils.clip_grad_norm_(self.critic_net.parameters(), self.max_grad_norm) self.critic_net_optimizer.step() del self.buffer[:]
def trainStep(self, batchSize=None): """ Performs a training step for the actor and critic models, based on transitions gathered in the buffer. It then resets the buffer. :param batchSize: Overrides agent set batch size, defaults to None :type batchSize: int, optional """ # Default behaviour waits for buffer to collect at least one batch_size of transitions if batchSize is None: if len(self.buffer) < self.batch_size: return batchSize = self.batch_size # Extract states, actions, rewards and action probabilities from transitions in buffer state = tensor([t.state for t in self.buffer], dtype=torch_float) action = tensor([t.action for t in self.buffer], dtype=torch_long).view(-1, 1) reward = [t.reward for t in self.buffer] old_action_log_prob = tensor([t.a_log_prob for t in self.buffer], dtype=torch_float).view(-1, 1) # Unroll rewards R = 0 Gt = [] for r in reward[::-1]: R = r + self.gamma * R Gt.insert(0, R) Gt = tensor(Gt, dtype=torch_float) # Send everything to cuda if used if self.use_cuda: state, action, old_action_log_prob = state.cuda(), action.cuda( ), old_action_log_prob.cuda() Gt = Gt.cuda() # Repeat the update procedure for ppo_update_iters for i in range(self.ppo_update_iters): # Create randomly ordered batches of size batchSize from buffer for index in BatchSampler( SubsetRandomSampler(range(len(self.buffer))), batchSize, False): # Calculate the advantage at each step Gt_index = Gt[index].view(-1, 1) V = self.critic_net(state[index]) delta = Gt_index - V advantage = delta.detach() # Get the current probabilities # Apply past actions with .gather() action_prob = self.actor_net(state[index]).gather( 1, action[index]) # new policy # PPO ratio = ( action_prob / old_action_log_prob[index] ) # Ratio between current and old policy probabilities surr1 = ratio * advantage surr2 = clamp(ratio, 1 - self.clip_param, 1 + self.clip_param) * advantage # update actor network action_loss = -torch_min(surr1, surr2).mean() # MAX->MIN descent self.actor_optimizer.zero_grad() # Delete old gradients action_loss.backward( ) # Perform backward step to compute new gradients nn.utils.clip_grad_norm_(self.actor_net.parameters(), self.max_grad_norm) # Clip gradients self.actor_optimizer.step( ) # Perform training step based on gradients # update critic network value_loss = F.mse_loss(Gt_index, V) self.critic_net_optimizer.zero_grad() value_loss.backward() nn.utils.clip_grad_norm_(self.critic_net.parameters(), self.max_grad_norm) self.critic_net_optimizer.step() # After each training step, the buffer is cleared del self.buffer[:]