def bbox_iou(box1, box2, x1y1x2y2=True): """ Returns the IoU of two bounding boxes """ if not x1y1x2y2: # Transform from center and width to exact coordinates b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2 b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2 b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2 b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2 else: # Get the coordinates of bounding boxes b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3] b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3] # get the corrdinates of the intersection rectangle inter_rect_x1 = torch.max(b1_x1, b2_x1) inter_rect_y1 = torch.max(b1_y1, b2_y1) inter_rect_x2 = torch.min(b1_x2, b2_x2) inter_rect_y2 = torch.min(b1_y2, b2_y2) # Intersection area inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1 + 1, min=0) * torch.clamp( inter_rect_y2 - inter_rect_y1 + 1, min=0 ) # Union Area b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1) b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1) iou = inter_area / (b1_area + b2_area - inter_area + 1e-16) return iou
def bbox_iou(box1, box2): """ Returns the IoU of two bounding boxes """ #Get the coordinates of bounding boxes b1_x1, b1_y1, b1_x2, b1_y2 = box1[:,0], box1[:,1], box1[:,2], box1[:,3] b2_x1, b2_y1, b2_x2, b2_y2 = box2[:,0], box2[:,1], box2[:,2], box2[:,3] #get the corrdinates of the intersection rectangle inter_rect_x1 = torch.max(b1_x1, b2_x1) inter_rect_y1 = torch.max(b1_y1, b2_y1) inter_rect_x2 = torch.min(b1_x2, b2_x2) inter_rect_y2 = torch.min(b1_y2, b2_y2) #Intersection area inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1 + 1, min=0) * torch.clamp(inter_rect_y2 - inter_rect_y1 + 1, min=0) #Union Area b1_area = (b1_x2 - b1_x1 + 1)*(b1_y2 - b1_y1 + 1) b2_area = (b2_x2 - b2_x1 + 1)*(b2_y2 - b2_y1 + 1) iou = inter_area / (b1_area + b2_area - inter_area) return iou
def bbox_overlaps(boxes, query_boxes): """ Parameters ---------- boxes: (N, 4) ndarray or tensor or variable query_boxes: (K, 4) ndarray or tensor or variable Returns ------- overlaps: (N, K) overlap between boxes and query_boxes """ if isinstance(boxes, np.ndarray): boxes = torch.from_numpy(boxes) query_boxes = torch.from_numpy(query_boxes) out_fn = lambda x: x.numpy() # If input is ndarray, turn the overlaps back to ndarray when return else: out_fn = lambda x: x box_areas = (boxes[:, 2] - boxes[:, 0] + 1) * \ (boxes[:, 3] - boxes[:, 1] + 1) query_areas = (query_boxes[:, 2] - query_boxes[:, 0] + 1) * \ (query_boxes[:, 3] - query_boxes[:, 1] + 1) iw = (torch.min(boxes[:, 2:3], query_boxes[:, 2:3].t()) - torch.max( boxes[:, 0:1], query_boxes[:, 0:1].t()) + 1).clamp(min=0) ih = (torch.min(boxes[:, 3:4], query_boxes[:, 3:4].t()) - torch.max( boxes[:, 1:2], query_boxes[:, 1:2].t()) + 1).clamp(min=0) ua = box_areas.view(-1, 1) + query_areas.view(1, -1) - iw * ih overlaps = iw * ih / ua return out_fn(overlaps)
def bbox_ious(boxes1, boxes2, x1y1x2y2=True): if x1y1x2y2: mx = torch.min(boxes1[0], boxes2[0]) Mx = torch.max(boxes1[2], boxes2[2]) my = torch.min(boxes1[1], boxes2[1]) My = torch.max(boxes1[3], boxes2[3]) w1 = boxes1[2] - boxes1[0] h1 = boxes1[3] - boxes1[1] w2 = boxes2[2] - boxes2[0] h2 = boxes2[3] - boxes2[1] else: mx = torch.min(boxes1[0]-boxes1[2]/2.0, boxes2[0]-boxes2[2]/2.0) Mx = torch.max(boxes1[0]+boxes1[2]/2.0, boxes2[0]+boxes2[2]/2.0) my = torch.min(boxes1[1]-boxes1[3]/2.0, boxes2[1]-boxes2[3]/2.0) My = torch.max(boxes1[1]+boxes1[3]/2.0, boxes2[1]+boxes2[3]/2.0) w1 = boxes1[2] h1 = boxes1[3] w2 = boxes2[2] h2 = boxes2[3] uw = Mx - mx uh = My - my cw = w1 + w2 - uw ch = h1 + h2 - uh mask = ((cw <= 0) + (ch <= 0) > 0) area1 = w1 * h1 area2 = w2 * h2 carea = cw * ch carea[mask] = 0 uarea = area1 + area2 - carea return carea/uarea
def updateOutput(self, input): self._lazyInit() dimension = self._getPositiveDimension(input) torch.min(input, dimension, out=(self._output, self._indices), keepdim=True) if input.dim() > 1: self.output.set_(self._output.select(dimension, 0)) else: self.output.set_(self._output) return self.output
def forward(self, v, u, d): """ @param v [batch_size, embedding_size] matrix to push @param u [batch_size,] vector of pop signals in (0, 1) @param d [batch_size,] vector of push signals in (0, 1) @return [batch_size, embedding_size] or [batch_size, self.k, embedding_size] read matrix """ # update V, which is of size [t, bach_size, embedding_size] v = v.view(1, self.batch_size, self.embedding_size) self.V = torch.cat([self.V, v], 0) if len(self.V.data) != 0 else v # TODO append to self.s so we can terminate lower loop early? # TODO initialize stack to fixed size # update s, which is of size [t, batch_size] old_t = self.s.data.shape[0] if self.s.data.shape else 0 s = Variable(torch.FloatTensor(old_t + 1, self.batch_size)) w = u for i in reversed(xrange(old_t)): s_ = F.relu(self.s[i,:] - w) w = F.relu(w - self.s[i,:]) s[i,:] = s_ s[old_t,:] = d self.s = s if self.k is None: # calculate r, which is of size [batch_size, embedding_size] r = Variable(torch.zeros([self.batch_size, self.embedding_size])) for i in reversed(xrange(old_t + 1)): used = torch.sum(self.s[i + 1:old_t + 1,:], 0) if i < old_t else self.zero coeffs = torch.min(self.s[i,:], F.relu(1 - used)) # reformating coeffs into a matrix that can be multiplied element-wise r += coeffs.view(self.batch_size, 1).repeat(1, self.embedding_size) * self.V[i,:,:] return r else: # calculate k read vectors # TODO can probably make this more efficient r = Variable(torch.zeros([self.batch_size, self.k, self.embedding_size])) for k in xrange(self.k): for i in reversed(xrange(old_t + 1)): used = torch.sum(self.s[i + 1:old_t + 1,:], 0) if i < old_t else self.zero coeffs = torch.min(self.s[i,:], F.relu(1 + k - used)) r[:,k,:] = r[:,k,:] + coeffs.view(self.batch_size, 1).repeat(1, self.embedding_size) * self.V[i,:,:] for k in reversed(xrange(1, self.k)): r[:,k,:] = r[:,k,:] - r[:,k - 1,:] return r
def bbox_transform(self, boxes, deltas, weights=(1.0, 1.0, 1.0, 1.0), clip_value=4.135166556742356): """Forward transform that maps proposal boxes to predicted ground-truth boxes using bounding-box regression deltas. See bbox_transform_inv for a description of the weights argument. """ if boxes.size(0) == 0: return None #return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype) # get boxes dimensions and centers widths = boxes[:, 2] - boxes[:, 0] + 1.0 heights = boxes[:, 3] - boxes[:, 1] + 1.0 ctr_x = boxes[:, 0] + 0.5 * widths ctr_y = boxes[:, 1] + 0.5 * heights wx, wy, ww, wh = weights dx = deltas[:, 0::4] / wx dy = deltas[:, 1::4] / wy dw = deltas[:, 2::4] / ww dh = deltas[:, 3::4] / wh clip_value = Variable(torch.FloatTensor([clip_value])) if boxes.is_cuda: clip_value = clip_value.cuda() # Prevent sending too large values into np.exp() dw = torch.min(dw,clip_value) dh = torch.min(dh,clip_value) pred_ctr_x = dx * widths.unsqueeze(1) + ctr_x.unsqueeze(1) pred_ctr_y = dy * heights.unsqueeze(1) + ctr_y.unsqueeze(1) pred_w = torch.exp(dw) * widths.unsqueeze(1) pred_h = torch.exp(dh) * heights.unsqueeze(1) # pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype) # x1 pred_boxes_x1 = pred_ctr_x - 0.5 * pred_w # y1 pred_boxes_y1 = pred_ctr_y - 0.5 * pred_h # x2 (note: "- 1" is correct; don't be fooled by the asymmetry) pred_boxes_x2 = pred_ctr_x + 0.5 * pred_w - 1 # y2 (note: "- 1" is correct; don't be fooled by the asymmetry) pred_boxes_y2 = pred_ctr_y + 0.5 * pred_h - 1 pred_boxes = torch.cat((pred_boxes_x1, pred_boxes_y1, pred_boxes_x2, pred_boxes_y2),1) return pred_boxes
def forward(self, v, u, d): """ @param v [batch_size, embedding_size] matrix to push @param u [batch_size,] vector of pop signals in (0, 1) @param d [batch_size,] vector of push signals in (0, 1) @return [batch_size, embedding_size] read matrix """ # update V, which is of size [t, bach_size, embedding_size] v = v.view(1, self.batch_size, self.embedding_size) self.V = torch.cat([self.V, v], 0) if len(self.V.data) != 0 else v # TODO initialize queue to fixed size # update s, which is of size [t, batch_size] old_t = self.s.size(0) if self.s.size() else 0 s = Variable(torch.FloatTensor(old_t + 1, self.batch_size)) w = u for i in xrange(old_t): s_ = F.relu(self.s[i,:] - w) w = F.relu(w - self.s[i,:]) s[i,:] = s_ # if len(torch.nonzero(w.data)) == 0: break # TODO does this if work properly now? s[old_t,:] = d self.s = s # calculate r, which is of size [batch_size, embedding_size] r = Variable(torch.zeros([self.batch_size, self.embedding_size])) for i in xrange(old_t + 1): used = torch.sum(self.s[:i,:], 0) if i > 0 else self.zero coeffs = torch.min(self.s[i,:], F.relu(1 - used)) # reformating coeffs into a matrix that can be multiplied element-wise r += coeffs.view(self.batch_size, 1).repeat(1, self.embedding_size) * self.V[i,:,:] return r
def my_loss_function(reconstructed_x, z_patches, prototypes, padding_idx, x, lambda_=0.01): """ reconstructed_x : batch_size, channels, height, width z_patches : batch_size, num_patches, embedding_dim prototypes : batch_size, num_prototypes, embedding_dim padding_idx : batch_size x : batch_size, channels, height, width """ assert not x.requires_grad batch_size = x.size(0) loss = F.mse_loss(reconstructed_x, x, size_average=False) for i in range(batch_size): image_patches = z_patches[i] image_prototypes = prototypes[i][:padding_idx[i]] dists = pairwise_squared_euclidean_distances( image_prototypes, image_patches) min_dists = torch.min(dists, dim=1)[0] prototype_loss = torch.sum(min_dists) loss = loss + (lambda_ * prototype_loss) loss = loss / batch_size return loss
def bisect_demo(): """ Bisect the LB/UB on specified columns. The key is to use scatter_() to convert indices into one-hot encodings. """ t1t2 = torch.stack((torch.randn(5, 4), torch.randn(5, 4)), dim=-1) lb, _ = torch.min(t1t2, dim=-1) ub, _ = torch.max(t1t2, dim=-1) print('LB:', lb) print('UB:', ub) # random idxs for testing idxs = torch.randn_like(lb) _, idxs = idxs.max(dim=-1) # <Batch> print('Split idxs:', idxs) idxs = idxs.unsqueeze(dim=-1) # Batch x 1 idxs = torch.zeros_like(lb).byte().scatter_(-1, idxs, 1) # convert into one-hot encoding print('Reorg idxs:', idxs) mid = (lb + ub) / 2.0 lefts_lb = lb lefts_ub = torch.where(idxs, mid, ub) # use the one-hot encoding to call torch.where() rights_lb = torch.where(idxs, mid, lb) # definitely faster than element-wise reassignment rights_ub = ub print('LEFT LB:', lefts_lb) print('LEFT UB:', lefts_ub) print('RIGHT LB:', rights_lb) print('RIGHT UB:', rights_ub) newlb = torch.cat((lefts_lb, rights_lb), dim=0) newub = torch.cat((lefts_ub, rights_ub), dim=0) return newlb, newub
def eval_one_batch(self, batch): enc_batch, enc_padding_mask, enc_lens, enc_batch_extend_vocab, extra_zeros, c_t_1, coverage = \ get_input_from_batch(batch, use_cuda) dec_batch, dec_padding_mask, max_dec_len, dec_lens_var, target_batch = \ get_output_from_batch(batch, use_cuda) encoder_outputs, encoder_hidden, max_encoder_output = self.model.encoder(enc_batch, enc_lens) s_t_1 = self.model.reduce_state(encoder_hidden) if config.use_maxpool_init_ctx: c_t_1 = max_encoder_output step_losses = [] for di in range(min(max_dec_len, config.max_dec_steps)): y_t_1 = dec_batch[:, di] # Teacher forcing final_dist, s_t_1, c_t_1,attn_dist, p_gen, coverage = self.model.decoder(y_t_1, s_t_1, encoder_outputs, enc_padding_mask, c_t_1, extra_zeros, enc_batch_extend_vocab, coverage) target = target_batch[:, di] gold_probs = torch.gather(final_dist, 1, target.unsqueeze(1)).squeeze() step_loss = -torch.log(gold_probs + config.eps) if config.is_coverage: step_coverage_loss = torch.sum(torch.min(attn_dist, coverage), 1) step_loss = step_loss + config.cov_loss_wt * step_coverage_loss step_mask = dec_padding_mask[:, di] step_loss = step_loss * step_mask step_losses.append(step_loss) sum_step_losses = torch.sum(torch.stack(step_losses, 1), 1) batch_avg_loss = sum_step_losses / dec_lens_var loss = torch.mean(batch_avg_loss) return loss.data[0]
def coverage_wu(self, beam, cov, beta=0.): """ NMT coverage re-ranking score from "Google's Neural Machine Translation System" :cite:`wu2016google`. """ penalty = -torch.min(cov, cov.clone().fill_(1.0)).log().sum(1) return beta * penalty
def forward(self, y_pred, y_true, eps=1e-6): return NotImplementedError torch.nn.modules.loss._assert_no_grad(y_true) assert y_pred.shape[1] == 2 same_left = torch.stack([y_true[:, 0], y_pred[:, 0]], dim=1) same_left, _ = torch.max(same_left, dim=1) same_right = torch.stack([y_true[:, 1], y_pred[:, 1]], dim=1) same_right, _ = torch.min(same_right, dim=1) same_len = same_right - same_left + 1 # (batch_size,) same_len = torch.stack([same_len, torch.zeros_like(same_len)], dim=1) same_len, _ = torch.max(same_len, dim=1) same_len = same_len.type(torch.float) pred_len = (y_pred[:, 1] - y_pred[:, 0] + 1).type(torch.float) true_len = (y_true[:, 1] - y_true[:, 0] + 1).type(torch.float) pre = same_len / (pred_len + eps) rec = same_len / (true_len + eps) f1 = 2 * pre * rec / (pre + rec + eps) return -torch.mean(f1)
def shortest_dist(dist_mat): """Parallel version. Args: dist_mat: pytorch Variable, available shape: 1) [m, n] 2) [m, n, N], N is batch size 3) [m, n, *], * can be arbitrary additional dimensions Returns: dist: three cases corresponding to `dist_mat`: 1) scalar 2) pytorch Variable, with shape [N] 3) pytorch Variable, with shape [*] """ m, n = dist_mat.size()[:2] # Just offering some reference for accessing intermediate distance. dist = [[0 for _ in range(n)] for _ in range(m)] for i in range(m): for j in range(n): if (i == 0) and (j == 0): dist[i][j] = dist_mat[i, j] elif (i == 0) and (j > 0): dist[i][j] = dist[i][j - 1] + dist_mat[i, j] elif (i > 0) and (j == 0): dist[i][j] = dist[i - 1][j] + dist_mat[i, j] else: dist[i][j] = torch.min(dist[i - 1][j], dist[i][j - 1]) + dist_mat[i, j] dist = dist[-1][-1] return dist
def __call__(self, reconstructed_x, z_patches, prototypes, padding_idx, x): """ reconstructed_x : batch_size, channels, height, width z_patches : batch_size, num_patches, embedding_dim prototypes : batch_size, num_prototypes, embedding_dim padding_idx : batch_size x : batch_size, channels, height, width """ assert not x.requires_grad lambda_val = self._lambda_val batch_size = x.size(0) loss = lambda_val * self._reconstruction_loss( reconstructed_x, x, size_average=False) for i in range(batch_size): image_patches = z_patches[i] associated_prototypes = prototypes[i][:padding_idx[i]] dists = pairwise_squared_euclidean_distances( associated_prototypes, image_patches) nearest_patch_idxs = torch.min(dists, dim=1)[1] loss += self._xentropy_loss(-dists, nearest_patch_idxs) loss = loss / batch_size return loss
def generate_smooth_grad(Backprop, prep_img, target_class, param_n, param_sigma_multiplier): """ Generates smooth gradients of given Backprop type. You can use this with both vanilla and guided backprop Args: Backprop (class): Backprop type prep_img (torch Variable): preprocessed image target_class (int): target class of imagenet param_n (int): Amount of images used to smooth gradient param_sigma_multiplier (int): Sigma multiplier when calculating std of noise """ # Generate an empty image/matrix smooth_grad = np.zeros(prep_img.size()[1:]) mean = 0 sigma = param_sigma_multiplier / (torch.max(prep_img) - torch.min(prep_img)).data[0] for x in range(param_n): # Generate noise noise = Variable(prep_img.data.new(prep_img.size()).normal_(mean, sigma**2)) # Add noise to the image noisy_img = prep_img + noise # Calculate gradients vanilla_grads = Backprop.generate_gradients(noisy_img, target_class) # Add gradients to smooth_grad smooth_grad = smooth_grad + vanilla_grads # Average it out smooth_grad = smooth_grad / param_n return smooth_grad
def box_iou(box1, box2): """ 计算两个box之间的IOU,其中box1为default_box_xyxy(format 为xyxy),box2为bounding box :param box1: default_boxes,[#default_boxes, 4] :param box2: bounding_boxes,[#bounding_boxes, 4] :return: iou,sized [#default_boxes, #bounding_boxes] """ # print('box1.size():{}'.format(box1.size())) # print('box2.size():{}'.format(box2.size())) lt = torch.max(box1[:, None, :2], box2[:, :2]) # [#default_boxes, #bounding_boxes, 2] rb = torch.min(box1[:, None, 2:], box2[:, 2:]) # [#default_boxes, #bounding_boxes, 2] # print('lt:{}'.format(lt)) # print('rb:{}'.format(rb)) wh = (rb-lt).clamp(min=0) # [#default_boxes, #bounding_boxes, 2] inter = wh[:, :, 0] * wh[:, :, 1] # [#default_boxes, #bounding_boxes] # print('inter:{}'.format(inter)) area1 = (box1[:, 2]-box1[:, 0])*(box1[:, 3]-box1[:, 1]) # [#default_boxes] area2 = (box2[:, 2]-box2[:, 0])*(box2[:, 3]-box2[:, 1]) # [#bounding_boxes] # print('area1:{}'.format(area1)) # print('area2:{}'.format(area2)) iou = inter / (area1[:, None] + area2 - inter) # print('iou:{}'.format(iou)) return iou
def read(self, strength): """ The read operation looks at the first few items on the stack, in the order determined by self._read_indices, such that the total strength of these items is equal to the value of the strength parameter. If necessary, the strength of the last vector is reduced so that the total strength of the items read is exactly equal to the strength parameter. The output of the read operation is computed by taking the sum of all the vectors looked at, weighted by their strengths. :type strength: float :param strength: The total amount of vectors to look at, measured by their strengths :rtype: Variable :return: The output of the read operation, described above """ r = Variable(torch.zeros([self.batch_size, self.embedding_size])) str_used = Variable(torch.zeros(self.batch_size)) for i in self._read_indices(): str_i = self.strengths[i, :] str_weights = torch.min(str_i, relu(1 - str_used)) str_weights = str_weights.view(self.batch_size, 1) str_weights = str_weights.repeat(1, self.embedding_size) r += str_weights * self.contents[i, :, :] str_used = str_used + str_i return r
def box_iou(box1, box2, order='xyxy'): '''Compute the intersection over union of two set of boxes. The default box order is (xmin, ymin, xmax, ymax). Args: box1: (tensor) bounding boxes, sized [N,4]. box2: (tensor) bounding boxes, sized [M,4]. order: (str) box order, either 'xyxy' or 'xywh'. Return: (tensor) iou, sized [N,M]. Reference: https://github.com/chainer/chainercv/blob/master/chainercv/utils/bbox/bbox_iou.py ''' if order == 'xywh': box1 = change_box_order(box1, 'xywh2xyxy') box2 = change_box_order(box2, 'xywh2xyxy') N = box1.size(0) M = box2.size(0) lt = torch.max(box1[:,None,:2], box2[:,:2]) # [N,M,2] rb = torch.min(box1[:,None,2:], box2[:,2:]) # [N,M,2] wh = (rb-lt+1).clamp(min=0) # [N,M,2] inter = wh[:,:,0] * wh[:,:,1] # [N,M] area1 = (box1[:,2]-box1[:,0]+1) * (box1[:,3]-box1[:,1]+1) # [N,] area2 = (box2[:,2]-box2[:,0]+1) * (box2[:,3]-box2[:,1]+1) # [M,] iou = inter / (area1[:,None] + area2 - inter) return iou
def forward(self, batch): X_data, X_padding_mask, X_lens, X_batch_extend_vocab, X_extra_zeros, context, coverage = self.get_input_from_batch(batch) y_data, y_padding_mask, y_max_len, y_lens_var, target_data = self.get_output_from_batch(batch) encoder_outputs, encoder_hidden, max_encoder_output = self.encoder(X_data, X_lens) s_t_1 = self.reduce_state(encoder_hidden) if config.use_maxpool_init_ctx: context = max_encoder_output step_losses = [] for di in range(min(y_max_len, self.args.max_decoder_steps)): y_t_1 = y_data[:, di] # Teacher forcing final_dist, s_t_1, context, attn_dist, p_gen, coverage = self.decoder(y_t_1, s_t_1, encoder_outputs, X_padding_mask, context, X_extra_zeros, X_batch_extend_vocab, coverage) target = target_data[:, di] gold_probs = torch.gather(final_dist, 1, target.unsqueeze(1)).squeeze() step_loss = -torch.log(gold_probs + self.args.eps) if self.args.is_coverage: step_coverage_loss = torch.sum(torch.min(attn_dist, coverage), 1) step_loss = step_loss + config.cov_loss_wt * step_coverage_loss step_mask = y_padding_mask[:, di] step_loss = step_loss * step_mask step_losses.append(step_loss) sum_losses = torch.sum(torch.stack(step_losses, 1), 1) batch_avg_loss = sum_losses / y_lens_var loss = torch.mean(batch_avg_loss) return loss
def clip_boxes_graph(boxes, window): """ boxes: [N, 4] each row is y1, x1, y2, x2 window: [4] in the form y1, x1, y2, x2 """ # Split corners wy1, wx1, wy2, wx2 = window y1, x1, y2, x2 = boxes # Clip y1 = torch.max(torch.min(y1, wy2), wy1) x1 = torch.max(torch.min(x1, wx2), wx1) y2 = torch.max(torch.min(y2, wy2), wy1) x2 = torch.max(torch.min(x2, wx2), wx1) clipped = torch.stack([x1, y1, x2, y2], dim=2) return clipped
def train_actor_critic(actor, critic, memory, actor_optim, critic_optim, args): memory = np.array(memory) states = np.vstack(memory[:, 0]) actions = list(memory[:, 1]) rewards = list(memory[:, 2]) masks = list(memory[:, 3]) old_values = critic(torch.Tensor(states)) returns, advants = get_gae(rewards, masks, old_values, args) mu, std = actor(torch.Tensor(states)) old_policy = log_prob_density(torch.Tensor(actions), mu, std) criterion = torch.nn.MSELoss() n = len(states) arr = np.arange(n) for _ in range(args.ppo_update_num): np.random.shuffle(arr) for i in range(n // args.batch_size): batch_index = arr[args.batch_size * i : args.batch_size * (i + 1)] batch_index = torch.LongTensor(batch_index) inputs = torch.Tensor(states)[batch_index] actions_samples = torch.Tensor(actions)[batch_index] returns_samples = returns.unsqueeze(1)[batch_index] advants_samples = advants.unsqueeze(1)[batch_index] oldvalue_samples = old_values[batch_index].detach() values = critic(inputs) clipped_values = oldvalue_samples + \ torch.clamp(values - oldvalue_samples, -args.clip_param, args.clip_param) critic_loss1 = criterion(clipped_values, returns_samples) critic_loss2 = criterion(values, returns_samples) critic_loss = torch.max(critic_loss1, critic_loss2).mean() loss, ratio, entropy = surrogate_loss(actor, advants_samples, inputs, old_policy.detach(), actions_samples, batch_index) clipped_ratio = torch.clamp(ratio, 1.0 - args.clip_param, 1.0 + args.clip_param) clipped_loss = clipped_ratio * advants_samples actor_loss = -torch.min(loss, clipped_loss).mean() loss = actor_loss + 0.5 * critic_loss - 0.001 * entropy critic_optim.zero_grad() loss.backward(retain_graph=True) critic_optim.step() actor_optim.zero_grad() loss.backward() actor_optim.step()
def _action_history_match(predicted: List[int], targets: torch.LongTensor) -> int: # TODO(mattg): this could probably be moved into a FullSequenceMatch metric, or something. # Check if target is big enough to cover prediction (including start/end symbols) if len(predicted) > targets.size(1): return 0 predicted_tensor = targets.new_tensor(predicted) targets_trimmed = targets[:, :len(predicted)] # Return 1 if the predicted sequence is anywhere in the list of targets. return torch.max(torch.min(targets_trimmed.eq(predicted_tensor), dim=1)[0]).item()
def triplet_margin_loss(anchor, positive, negative, margin=1.0, p=2, eps=1e-6, swap=False): r"""Creates a criterion that measures the triplet loss given an input tensors x1, x2, x3 and a margin with a value greater than 0. This is used for measuring a relative similarity between samples. A triplet is composed by `a`, `p` and `n`: anchor, positive examples and negative example respectively. The shape of all input variables should be :math:`(N, D)`. The distance swap is described in detail in the paper `Learning shallow convolutional feature descriptors with triplet losses`_ by V. Balntas, E. Riba et al. .. math:: L(a, p, n) = \frac{1}{N} \left( \sum_{i=1}^N \max \{d(a_i, p_i) - d(a_i, n_i) + {\rm margin}, 0\} \right) where :math:`d(x_i, y_i) = \| {\bf x}_i - {\bf y}_i \|_2^2`. Args: anchor: anchor input tensor positive: positive input tensor negative: negative input tensor margin: the margin value. Default: 1 p: the norm degree. Default: 2 eps: small epsilon value to avoid numerical issues. Default: 1e-6 swap: compute distance swap. Default: False Shape: - Input: :math:`(N, D)` where `D = vector dimension` - Output: :math:`(N, 1)` Example:: >>> input1 = autograd.Variable(torch.randn(100, 128)) >>> input2 = autograd.Variable(torch.randn(100, 128)) >>> input3 = autograd.Variable(torch.randn(100, 128)) >>> output = F.triplet_margin_loss(input1, input2, input3, p=2) >>> output.backward() .. _Learning shallow convolutional feature descriptors with triplet losses: http://www.iis.ee.ic.ac.uk/%7Evbalnt/shallow_descr/TFeat_paper.pdf """ assert anchor.size() == positive.size(), "Input sizes between positive and negative must be equal." assert anchor.size() == negative.size(), "Input sizes between anchor and negative must be equal." assert positive.size() == negative.size(), "Input sizes between positive and negative must be equal." assert anchor.dim() == 2, "Inputd must be a 2D matrix." assert margin > 0.0, 'Margin should be positive value.' d_p = pairwise_distance(anchor, positive, p, eps) d_n = pairwise_distance(anchor, negative, p, eps) if swap: d_s = pairwise_distance(positive, negative, p, eps) d_n = torch.min(d_n, d_s) dist_hinge = torch.clamp(margin + d_p - d_n, min=0.0) loss = torch.mean(dist_hinge) return loss
def hard_example_mining(dist_mat, labels, return_inds=False): """For each anchor, find the hardest positive and negative sample. Args: dist_mat: pytorch Variable, pair wise distance between samples, shape [N, N] labels: pytorch LongTensor, with shape [N] return_inds: whether to return the indices. Save time if `False`(?) Returns: dist_ap: pytorch Variable, distance(anchor, positive); shape [N] dist_an: pytorch Variable, distance(anchor, negative); shape [N] p_inds: pytorch LongTensor, with shape [N]; indices of selected hard positive samples; 0 <= p_inds[i] <= N - 1 n_inds: pytorch LongTensor, with shape [N]; indices of selected hard negative samples; 0 <= n_inds[i] <= N - 1 NOTE: Only consider the case in which all labels have same num of samples, thus we can cope with all anchors in parallel. """ assert len(dist_mat.size()) == 2 assert dist_mat.size(0) == dist_mat.size(1) N = dist_mat.size(0) # shape [N, N] is_pos = labels.expand(N, N).eq(labels.expand(N, N).t()) is_neg = labels.expand(N, N).ne(labels.expand(N, N).t()) # `dist_ap` means distance(anchor, positive) # both `dist_ap` and `relative_p_inds` with shape [N, 1] dist_ap, relative_p_inds = torch.max( dist_mat[is_pos].contiguous().view(N, -1), 1, keepdim=True) # `dist_an` means distance(anchor, negative) # both `dist_an` and `relative_n_inds` with shape [N, 1] dist_an, relative_n_inds = torch.min( dist_mat[is_neg].contiguous().view(N, -1), 1, keepdim=True) # shape [N] dist_ap = dist_ap.squeeze(1) dist_an = dist_an.squeeze(1) if return_inds: # shape [N, N] ind = (labels.new().resize_as_(labels) .copy_(torch.arange(0, N).long()) .unsqueeze( 0).expand(N, N)) # shape [N, 1] p_inds = torch.gather( ind[is_pos].contiguous().view(N, -1), 1, relative_p_inds.data) n_inds = torch.gather( ind[is_neg].contiguous().view(N, -1), 1, relative_n_inds.data) # shape [N] p_inds = p_inds.squeeze(1) n_inds = n_inds.squeeze(1) return dist_ap, dist_an, p_inds, n_inds return dist_ap, dist_an
def bbox_overlap(boxes, query_boxes): """ :param boxes <torch.Tensor>: (N, 6) :param query_boxes <torch.Tensor>: (K, 6) :return: overlaps (N, K) overlap between boxes and query boxes """ out_fn = lambda x: x box_ares = (boxes[:, 3] - boxes[:, 0]) * (boxes[:, 4] - boxes[:, 1]) * (boxes[:, 5] - boxes[:, 2]) query_ares = (query_boxes[:, 3] - query_boxes[:, 0]) * (query_boxes[:, 4] - query_boxes[:, 1]) *\ (query_boxes[:, 5] - query_boxes[:, 2]) iw = (torch.min(boxes[:, 3:4], query_boxes[:, 3:4].t()) - torch.max(boxes[:, 0:1], query_boxes[:, 0:1].t())).clamp(min=0) ih = (torch.min(boxes[:, 4:5], query_boxes[:, 4:5].t()) - torch.max(boxes[:, 1:2], query_boxes[:, 1:2].t())).clamp(min=0) il = (torch.min(boxes[:, 5:6], query_boxes[:, 5:6].t()) - torch.max(boxes[:, 2:3], query_boxes[:, 2:3].t())).clamp(min=0) ua = box_ares.view(-1, 1) + query_ares.view(1, -1) - iw*ih*il overlaps = iw*ih*il / ua return out_fn(overlaps)
def coverage_wu(self, cov, beta=0.): """GNMT coverage re-ranking score. See "Google's Neural Machine Translation System" :cite:`wu2016google`. ``cov`` is expected to be sized ``(*, seq_len)``, where ``*`` is probably ``batch_size x beam_size`` but could be several dimensions like ``(batch_size, beam_size)``. If ``cov`` is attention, then the ``seq_len`` axis probably sums to (almost) 1. """ penalty = -torch.min(cov, cov.clone().fill_(1.0)).log().sum(-1) return beta * penalty
def intersection_area(yx_min1, yx_max1, yx_min2, yx_max2): """ Calculates the intersection area of two lists of bounding boxes. :author 申瑞珉 (Ruimin Shen) :param yx_min1: The top left coordinates (y, x) of the first list (size [N1, 2]) of bounding boxes. :param yx_max1: The bottom right coordinates (y, x) of the first list (size [N1, 2]) of bounding boxes. :param yx_min2: The top left coordinates (y, x) of the second list (size [N2, 2]) of bounding boxes. :param yx_max2: The bottom right coordinates (y, x) of the second list (size [N2, 2]) of bounding boxes. :return: The matrix (size [N1, N2]) of the intersection area. """ ymin1, xmin1 = torch.split(yx_min1, 1, -1) ymax1, xmax1 = torch.split(yx_max1, 1, -1) ymin2, xmin2 = torch.split(yx_min2, 1, -1) ymax2, xmax2 = torch.split(yx_max2, 1, -1) max_ymin = torch.max(ymin1.repeat(1, ymin2.size(0)), torch.transpose(ymin2, 0, 1).repeat(ymin1.size(0), 1)) # PyTorch's bug min_ymax = torch.min(ymax1.repeat(1, ymax2.size(0)), torch.transpose(ymax2, 0, 1).repeat(ymax1.size(0), 1)) # PyTorch's bug height = torch.clamp(min_ymax - max_ymin, min=0) max_xmin = torch.max(xmin1.repeat(1, xmin2.size(0)), torch.transpose(xmin2, 0, 1).repeat(xmin1.size(0), 1)) # PyTorch's bug min_xmax = torch.min(xmax1.repeat(1, xmax2.size(0)), torch.transpose(xmax2, 0, 1).repeat(xmax1.size(0), 1)) # PyTorch's bug width = torch.clamp(min_xmax - max_xmin, min=0) return height * width
def forward(self, sent_tuple): # sent_len: [max_len, ..., min_len] (batch) # sent: Variable(seqlen x batch x worddim) sent, sent_len = sent_tuple bsize = sent.size(1) self.init_lstm = self.init_lstm if bsize == self.init_lstm.size(1) else \ Variable(torch.FloatTensor(2, bsize, self.enc_lstm_dim).zero_()).cuda() # Sort by length (keep idx) sent_len, idx_sort = np.sort(sent_len)[::-1], np.argsort(-sent_len) sent = sent.index_select(1, Variable(torch.cuda.LongTensor(idx_sort))) # Handling padding in Recurrent Networks sent_packed = nn.utils.rnn.pack_padded_sequence(sent, sent_len) sent_output = self.enc_lstm(sent_packed, (self.init_lstm, self.init_lstm))[0] # seqlen x batch x 2*nhid sent_output = nn.utils.rnn.pad_packed_sequence(sent_output)[0] # Un-sort by length idx_unsort = np.argsort(idx_sort) sent_output = sent_output.index_select(1, Variable(torch.cuda.LongTensor(idx_unsort))) sent_output = sent_output.transpose(0,1).contiguous() sent_output_proj = self.proj_lstm(sent_output.view(-1, 2*self.enc_lstm_dim)).view(bsize, -1, 2*self.enc_lstm_dim) sent_keys = self.proj_enc(sent_output.view(-1, 2*self.enc_lstm_dim)).view(bsize, -1, 2*self.enc_lstm_dim) sent_max = torch.max(sent_output, 1)[0].squeeze(1) # (bsize, 2*nhid) sent_summary = self.proj_query( sent_max).unsqueeze(1).expand_as(sent_keys) # (bsize, seqlen, 2*nhid) sent_M = torch.tanh(sent_keys + sent_summary) # (bsize, seqlen, 2*nhid) YANG : M = tanh(Wh_i + Wh_avg sent_w = self.query_embedding(Variable(torch.LongTensor( bsize*[0]).cuda())).unsqueeze(2) # (bsize, 2*nhid, 1) sent_alphas = self.softmax(sent_M.bmm(sent_w).squeeze(2)).unsqueeze(1) # (bsize, 1, seqlen) if int(time.time()) % 200 == 0: print('w', torch.max(sent_w[0]), torch.min(sent_w[0])) print('alphas', sent_alphas[0][0][0:sent_len[0]]) # Get attention vector emb = sent_alphas.bmm(sent_output_proj).squeeze(1) return emb
def forward(self, x): # feed through neural network h = F.relu(self.fc1(x)) # h = F.relu(self.fc2(h)) fudge_lower_bdd = torch.Tensor([-8]) h = torch.max(self.fc3(h), fudge_lower_bdd) h = torch.min(h, - fudge_lower_bdd) log_class_weights = self.log_softmax(h) return log_class_weights
def update(self, replay_buffer, n_iter): for i in range(n_iter): state, action, reward, next_state, done = replay_buffer.sample( batch_size) state = torch.FloatTensor(state).to(device) action = torch.FloatTensor(action).to(device) reward = torch.FloatTensor(reward).to(device) next_state = torch.FloatTensor(next_state).to(device) done = torch.FloatTensor(done).to(device) # Select next action according to target policy: noise = torch.empty_like(action).data.normal_( 0, policy_noise).to(device) noise = noise.clamp(-noise_clip, noise_clip) next_action = (self.actor_target(next_state) + noise) next_action = next_action.clamp(-self.max_action, self.max_action) # Compute target Q-value: target_Q1 = self.critic_1_target(next_state, next_action) target_Q2 = self.critic_2_target(next_state, next_action) target_Q = torch.min(target_Q1, target_Q2) target_Q = reward + ((1 - done) * gamma * target_Q).detach() # Optimize Critic 1: current_Q1 = self.critic_1(state, action) loss_Q1 = F.mse_loss(current_Q1, target_Q) self.critic_1_optimizer.zero_grad() loss_Q1.backward() self.critic_1_optimizer.step() # Optimize Critic 2: current_Q2 = self.critic_2(state, action) loss_Q2 = F.mse_loss(current_Q2, target_Q) self.critic_2_optimizer.zero_grad() loss_Q2.backward() self.critic_2_optimizer.step() # Delayed policy updates: if i % policy_delay == 0: # Compute actor loss: actor_loss = -self.critic_1(state, self.actor(state)).mean() # Optimize the actor self.actor_optimizer.zero_grad() actor_loss.backward() self.actor_optimizer.step() # Polyak averaging update: for param, target_param in zip(self.actor.parameters(), self.actor_target.parameters()): target_param.data.copy_((polyak * target_param.data) + ((1 - polyak) * param.data)) for param, target_param in zip( self.critic_1.parameters(), self.critic_1_target.parameters()): target_param.data.copy_((polyak * target_param.data) + ((1 - polyak) * param.data)) for param, target_param in zip( self.critic_2.parameters(), self.critic_2_target.parameters()): target_param.data.copy_((polyak * target_param.data) + ((1 - polyak) * param.data))
def train(self, replay_buffer, iterations, batch_size=30, discount=0.99, tau=0.005, policy_noise=0.2, noise_clip=0.5, policy_freq=2): # self.critic_scheduler.step(critic_loss) # self.actor_scheduler.step(actor_loss) for it in range(iterations): # Step 4: We sample a batch of transitions (s, s’, a, r) from the memory batch_states1, batch_states2, batch_next_states1, batch_next_states2, batch_actions, batch_rewards, batch_dones = replay_buffer.sample( batch_size) state1 = torch.Tensor(batch_states1).to(device) state2 = torch.Tensor(batch_states2).to(device) next_state1 = torch.Tensor(batch_next_states1).to(device) next_state2 = torch.Tensor(batch_next_states2).to(device) action = torch.Tensor(batch_actions).to(device).unsqueeze(1) reward = torch.Tensor(batch_rewards).to(device) dones = torch.Tensor(batch_dones).to(device) # Step 5: From the next state s’, the Actor target plays the next action a’ next_action = self.actor_target(next_state1, next_state2) # Step 6: We add Gaussian noise to this next action a’ and we clamp it in a range of values supported by the environment noise = torch.Tensor(batch_actions).data.normal_( 0, policy_noise).to(device) noise = noise.clamp(-noise_clip, noise_clip).unsqueeze(1) next_action = (next_action + noise).clamp(-self.max_action, self.max_action) # Step 7: The two Critic targets take each the couple (s’, a’) as input and return two Q-values Qt1(s’,a’) and Qt2(s’,a’) as outputs target_Q1, target_Q2 = self.critic_target(next_state1, next_state2, next_action) # Step 8: We keep the minimum of these two Q-values: min(Qt1, Qt2) target_Q = torch.min(target_Q1, target_Q2) # Step 9: We get the final target of the two Critic models, which is: Qt = r + γ * min(Qt1, Qt2), where γ is the discount factor target_Q = reward + ((1 - dones) * discount * target_Q).detach() # Step 10: The two Critic models take each the couple (s, a) as input and return two Q-values Q1(s,a) and Q2(s,a) as outputs current_Q1, current_Q2 = self.critic(state1, state2, action) # Step 11: We compute the loss coming from the two Critic models: Critic Loss = MSE_Loss(Q1(s,a), Qt) + MSE_Loss(Q2(s,a), Qt) critic_loss = F.mse_loss(current_Q1, target_Q) + F.mse_loss( current_Q2, target_Q) if it % 20 == 0: print("_________________________") print("iteration number", it) print("critic loss", critic_loss) # Step 12: We backpropagate this Critic loss and update the parameters of the two Critic models with a SGD optimizer self.critic_optimizer.zero_grad() critic_loss.backward() # self.critic_scheduler.step() # self.critic_scheduler.step(critic_loss) self.critic_optimizer.step() # Step 13: Once every two iterations, we update our Actor model by performing gradient ascent on the output of the first Critic model if it % policy_freq == 0: actor_loss = -self.critic.Q1( state1, state2, self.actor(state1, state2)).mean() if it % 20 == 0: print("actor loss", actor_loss) self.actor_optimizer.zero_grad() actor_loss.backward() # self.actor_scheduler.step() # self.actor_scheduler.step(actor_loss) self.actor_optimizer.step() # if it % 2*policy_freq == 0: # Step 14: Still once every two iterations, we update the weights of the Actor target by polyak averaging for param, target_param in zip(self.actor.parameters(), self.actor_target.parameters()): target_param.data.copy_(tau * param.data + (1 - tau) * target_param.data) # Step 15: Still once every two iterations, we update the weights of the Critic target by polyak averaging for param, target_param in zip( self.critic.parameters(), self.critic_target.parameters()): target_param.data.copy_(tau * param.data + (1 - tau) * target_param.data)
}, "training": { "epochs": 10000, "batchsize": 64, "savefile": "../keras_models/fully_trained_stamp" }, "directory": "./test_data/", "imgtype": "png", "scale_integer": False, "shape": [201, 201], "noise": 0.05, "ext_noise" : 0.01, "train": {"nframes": 10}, "test": {"nframes": 10}, "eval": {"nframes": 10}, "overwrite": True, "max_overlaps": 2, "scale_integer": False, "delete_files_after_training": False } makedata(config) dl = EstimatorDataset(config) for i in range(len(dl)): print(torch.max(dl[i][0]),torch.min(dl[i][0])) print(dl[i][0].shape) print(type(dl[i][0]))
def m2min(x, y): return torch.min(x.unsqueeze(1), y)
def mmin2(x): return torch.min(x, dim=2)
def interpolate_dense_features(pos, dense_features, return_corners=False): device = pos.device ids = torch.arange(0, pos.size(1), device=device) _, h, w = dense_features.size() i = pos[0, :] j = pos[1, :] # Valid corners i_top_left = torch.floor(i).long() j_top_left = torch.floor(j).long() valid_top_left = torch.min(i_top_left >= 0, j_top_left >= 0) i_top_right = torch.floor(i).long() j_top_right = torch.ceil(j).long() valid_top_right = torch.min(i_top_right >= 0, j_top_right < w) i_bottom_left = torch.ceil(i).long() j_bottom_left = torch.floor(j).long() valid_bottom_left = torch.min(i_bottom_left < h, j_bottom_left >= 0) i_bottom_right = torch.ceil(i).long() j_bottom_right = torch.ceil(j).long() valid_bottom_right = torch.min(i_bottom_right < h, j_bottom_right < w) valid_corners = torch.min(torch.min(valid_top_left, valid_top_right), torch.min(valid_bottom_left, valid_bottom_right)) i_top_left = i_top_left[valid_corners] j_top_left = j_top_left[valid_corners] i_top_right = i_top_right[valid_corners] j_top_right = j_top_right[valid_corners] i_bottom_left = i_bottom_left[valid_corners] j_bottom_left = j_bottom_left[valid_corners] i_bottom_right = i_bottom_right[valid_corners] j_bottom_right = j_bottom_right[valid_corners] ids = ids[valid_corners] if ids.size(0) == 0: raise EmptyTensorError # Interpolation i = i[ids] j = j[ids] dist_i_top_left = i - i_top_left.float() dist_j_top_left = j - j_top_left.float() w_top_left = (1 - dist_i_top_left) * (1 - dist_j_top_left) w_top_right = (1 - dist_i_top_left) * dist_j_top_left w_bottom_left = dist_i_top_left * (1 - dist_j_top_left) w_bottom_right = dist_i_top_left * dist_j_top_left descriptors = ( w_top_left * dense_features[:, i_top_left, j_top_left] + w_top_right * dense_features[:, i_top_right, j_top_right] + w_bottom_left * dense_features[:, i_bottom_left, j_bottom_left] + w_bottom_right * dense_features[:, i_bottom_right, j_bottom_right]) pos = torch.cat([i.view(1, -1), j.view(1, -1)], dim=0) if not return_corners: return [descriptors, pos, ids] else: corners = torch.stack([ torch.stack([i_top_left, j_top_left], dim=0), torch.stack([i_top_right, j_top_right], dim=0), torch.stack([i_bottom_left, j_bottom_left], dim=0), torch.stack([i_bottom_right, j_bottom_right], dim=0) ], dim=0) return [descriptors, pos, ids, corners]
def initialize(self, image, info: dict) -> dict: # Initialize some stuff self.frame_num = 1 if not self.params.has('device'): self.params.device = 'cuda' if self.params.use_gpu else 'cpu' # Initialize network self.initialize_features() # The DiMP network # self.net = self.params.net self.net_rgb = self.params.net_rgb self.net_d = self.params.net_d # Time initialization tic = time.time() # Convert image im = image[:, :, :3] dp = image[:, :, 3:] im = numpy_to_torch(im) # dp = numpy_to_torch(dp) # Get target position and size state = info['init_bbox'] self.pos = torch.Tensor([state[1] + (state[3] - 1)/2, state[0] + (state[2] - 1)/2]) self.target_sz = torch.Tensor([state[3], state[2]]) # Get object id self.object_id = info.get('object_ids', [None])[0] self.id_str = '' if self.object_id is None else ' {}'.format(self.object_id) # Set sizes self.image_sz = torch.Tensor([im.shape[2], im.shape[3]]) sz = self.params.image_sample_size sz = torch.Tensor([sz, sz] if isinstance(sz, int) else sz) if self.params.get('use_image_aspect_ratio', False): sz = self.image_sz * sz.prod().sqrt() / self.image_sz.prod().sqrt() stride = self.params.get('feature_stride', 32) sz = torch.round(sz / stride) * stride self.img_sample_sz = sz self.img_support_sz = self.img_sample_sz # Set search area search_area = torch.prod(self.target_sz * self.params.search_area_scale).item() self.target_scale = math.sqrt(search_area) / self.img_sample_sz.prod().sqrt() # Target size in base scale self.base_target_sz = self.target_sz / self.target_scale # Setup scale factors if not self.params.has('scale_factors'): self.params.scale_factors = torch.ones(1) elif isinstance(self.params.scale_factors, (list, tuple)): self.params.scale_factors = torch.Tensor(self.params.scale_factors) # Setup scale bounds self.min_scale_factor = torch.max(10 / self.base_target_sz) self.max_scale_factor = torch.min(self.image_sz / self.base_target_sz) # Extract and transform sample init_backbone_feat_rgb, init_backbone_feat_d = self.generate_init_samples(im, dp) # Initialize classifier self.init_classifier(init_backbone_feat_rgb, init_backbone_feat_d) # Initialize IoUNet if self.params.get('use_iou_net', True): self.init_iou_net(init_backbone_feat_rgb, init_backbone_feat_d) out = {'time': time.time() - tic} return out
def mmin(x): return torch.min(x, dim=1, keepdim=True) #
def sgnmin(y, w): return torch.sign(y * w) * torch.min(torch.abs(y), torch.abs(w))
#coeffs = eval_model.rbfweights.detach().data.numpy() mx, mn = -np.inf, np.inf cnter = 0 for i in range(len(graph_list)): dat = data[i] G = graph_list[i] #dat['secondary_gram'] = torch.matmul(dat['secondary_gram'], principals) dat['secondary_gram'] = torch.from_numpy(u[cnter:cnter + len(G), indices]) cnter += len(G) #hks = torch.flatten(torch.matmul(dat['secondary_gram'], torch.from_numpy(coeffs))).float().detach() hks = torch.flatten(torch.matmul(dat['secondary_gram'], winit)).float().detach() mx = max(float(torch.max(hks)), mx) mn = min(float(torch.min(hks)), mn) st = simplex_tree_constructor([list(e) for e in G.edges()]) dat['simplex_tree'] = filtration_update(st, hks.numpy()) dat['vectors'] = torch.zeros([1, 18]) #recompute persistence pers = dat['simplex_tree'].persistence(homology_coeff_field=2) del pers label = torch.tensor(label).float() print(mn, mx) torch.set_rng_state(rng_state) #fix init state #if fix wavelet eval_model = ModelStatsRBF(pds, mn=mn, mx=mx, weightinit=winit) eval_model.update = True
def _compute_coverage_loss(self, std_attn, coverage_attn): covloss = torch.min(std_attn, coverage_attn).sum() covloss *= self.lambda_coverage return covloss
def initialize(self, image, state, *args, **kwargs): # Initialize some stuff self.frame_num = 1 if not hasattr(self.params, 'device'): self.params.device = 'cuda' if self.params.use_gpu else 'cpu' # Initialize features self.initialize_features() # Check if image is color self.params.features.set_is_color(image.shape[2] == 3) # Get feature specific params self.fparams = self.params.features.get_fparams('feature_params') self.time = 0 tic = time.time() # Get position and size self.pos = torch.Tensor( [state[1] + (state[3] - 1) / 2, state[0] + (state[2] - 1) / 2]) self.target_sz = torch.Tensor([state[3], state[2]]) # Set search area self.target_scale = 1.0 search_area = torch.prod(self.target_sz * self.params.search_area_scale).item() if search_area > self.params.max_image_sample_size: self.target_scale = math.sqrt(search_area / self.params.max_image_sample_size) elif search_area < self.params.min_image_sample_size: self.target_scale = math.sqrt(search_area / self.params.min_image_sample_size) # Check if IoUNet is used self.use_iou_net = getattr(self.params, 'use_iou_net', True) # Target size in base scale self.base_target_sz = self.target_sz / self.target_scale # Use odd square search area and set sizes feat_max_stride = max(self.params.features.stride()) if getattr(self.params, 'search_area_shape', 'square') == 'square': self.img_sample_sz = torch.round( torch.sqrt( torch.prod(self.base_target_sz * self.params.search_area_scale))) * torch.ones(2) elif self.params.search_area_shape == 'initrect': self.img_sample_sz = torch.round(self.base_target_sz * self.params.search_area_scale) else: raise ValueError('Unknown search area shape') if self.params.feature_size_odd: self.img_sample_sz += feat_max_stride - self.img_sample_sz % ( 2 * feat_max_stride) else: self.img_sample_sz += feat_max_stride - ( self.img_sample_sz + feat_max_stride) % (2 * feat_max_stride) # Set sizes self.img_support_sz = self.img_sample_sz self.feature_sz = self.params.features.size(self.img_sample_sz) self.output_sz = self.params.score_upsample_factor * self.img_support_sz # Interpolated size of the output self.kernel_size = self.fparams.attribute('kernel_size') self.iou_img_sample_sz = self.img_sample_sz # Optimization options self.params.precond_learning_rate = self.fparams.attribute( 'learning_rate') if self.params.CG_forgetting_rate is None or max( self.params.precond_learning_rate) >= 1: self.params.direction_forget_factor = 0 else: self.params.direction_forget_factor = ( 1 - max(self.params.precond_learning_rate) )**self.params.CG_forgetting_rate self.output_window = None if getattr(self.params, 'window_output', False): if getattr(self.params, 'use_clipped_window', False): self.output_window = dcf.hann2d_clipped( self.output_sz.long(), self.output_sz.long() * self.params.effective_search_area / self.params.search_area_scale, centered=False).to(self.params.device) else: self.output_window = dcf.hann2d(self.output_sz.long(), centered=False).to( self.params.device) # Initialize some learning things self.init_learning() # Convert image im = numpy_to_torch(image) self.im = im # For debugging only # Setup scale bounds self.image_sz = torch.Tensor([im.shape[2], im.shape[3]]) self.min_scale_factor = torch.max(10 / self.base_target_sz) self.max_scale_factor = torch.min(self.image_sz / self.base_target_sz) # Extract and transform sample x = self.generate_init_samples(im) # Initialize iounet if self.use_iou_net: self.init_iou_net() # Initialize projection matrix self.init_projection_matrix(x) # Transform to get the training sample train_x = self.preprocess_sample(x) # Generate label function init_y = self.init_label_function(train_x) # Init memory self.init_memory(train_x) # Init optimizer and do initial optimization self.init_optimization(train_x, init_y) self.pos_iounet = self.pos.clone() self.time += time.time() - tic
def _rot_loss_fn(pred, target): loss0 = _quat_loss_fn(pred, target[:, 0]) loss1 = _quat_loss_fn(pred, target[:, 1]) return torch.min(loss0, loss1)
def backward(ctx, grad_output): z, scales, locs, logits, pis = ctx.saved_tensors dim = scales.size(-1) K = logits.size(-1) g = grad_output # l b i g = g.unsqueeze(-2) # l b 1 i batch_dims = locs.dim() - 2 locs_tilde = locs / scales # b j i sigma_0 = torch.min(scales, -2, keepdim=True)[0] # b 1 i z_shift = (z.unsqueeze(-2) - locs) / sigma_0 # l b j i z_tilde = z.unsqueeze(-2) / scales - locs_tilde # l b j i mu_cd = locs.unsqueeze(-2) - locs.unsqueeze(-3) # b c d i mu_cd_norm = torch.pow(mu_cd, 2.0).sum(-1).sqrt() # b c d mu_cd /= mu_cd_norm.unsqueeze(-1) # b c d i diagonals = z.new_empty((K, ), dtype=torch.long) torch.arange(K, out=diagonals) mu_cd[..., diagonals, diagonals, :] = 0.0 mu_ll_cd = (locs.unsqueeze(-2) * mu_cd).sum(-1) # b c d z_ll_cd = (z.unsqueeze(-2).unsqueeze(-2) * mu_cd).sum(-1) # l b c d z_perp_cd = z.unsqueeze(-2).unsqueeze( -2) - z_ll_cd.unsqueeze(-1) * mu_cd # l b c d i z_perp_cd_sqr = torch.pow(z_perp_cd, 2.0).sum(-1) # l b c d shift_indices = z.new_empty((dim, ), dtype=torch.long) torch.arange(dim, out=shift_indices) shift_indices = shift_indices - 1 shift_indices[0] = 0 z_shift_cumsum = torch.pow(z_shift, 2.0) z_shift_cumsum = z_shift_cumsum.sum(-1, keepdim=True) - torch.cumsum( z_shift_cumsum, dim=-1) # l b j i z_tilde_cumsum = torch.cumsum(torch.pow(z_tilde, 2.0), dim=-1) # l b j i z_tilde_cumsum = torch.index_select(z_tilde_cumsum, -1, shift_indices) z_tilde_cumsum[..., 0] = 0.0 r_sqr_ji = z_shift_cumsum + z_tilde_cumsum # l b j i log_scales = torch.log(scales) # b j i epsilons_sqr = torch.pow(z_tilde, 2.0) # l b j i log_qs = -0.5 * epsilons_sqr - 0.5 * math.log( 2.0 * math.pi) - log_scales # l b j i log_q_j = log_qs.sum(-1, keepdim=True) # l b j 1 q_j = torch.exp(log_q_j) # l b j 1 q_tot = (pis * q_j.squeeze(-1)).sum(-1) # l b q_tot = q_tot.unsqueeze(-1) # l b 1 root_two = math.sqrt(2.0) shift_log_scales = log_scales[..., shift_indices] shift_log_scales[..., 0] = 0.0 sigma_products = torch.cumsum(shift_log_scales, dim=-1).exp() # b j i reverse_indices = z.new_tensor(range(dim - 1, -1, -1), dtype=torch.long) reverse_log_sigma_0 = sigma_0.log()[..., reverse_indices] # b 1 i sigma_0_products = torch.cumsum(reverse_log_sigma_0, dim=-1).exp()[..., reverse_indices - 1] # b 1 i sigma_0_products[..., -1] = 1.0 sigma_products *= sigma_0_products logits_grad = torch.erf(z_tilde / root_two) - torch.erf( z_shift / root_two) # l b j i logits_grad *= torch.exp(-0.5 * r_sqr_ji) # l b j i logits_grad = (logits_grad * g / sigma_products).sum(-1) # l b j logits_grad = sum_leftmost(logits_grad / q_tot, -1 - batch_dims) # b j logits_grad *= 0.5 * math.pow(2.0 * math.pi, -0.5 * (dim - 1)) logits_grad = -pis * logits_grad logits_grad = logits_grad - logits_grad.sum(-1, keepdim=True) * pis mu_ll_dc = torch.transpose(mu_ll_cd, -1, -2) v_cd = torch.erf((z_ll_cd - mu_ll_cd) / root_two) - torch.erf( (z_ll_cd + mu_ll_dc) / root_two) v_cd *= torch.exp(-0.5 * z_perp_cd_sqr) # l b c d mu_cd_g = (g.unsqueeze(-2) * mu_cd).sum(-1) # l b c d v_cd *= -mu_cd_g * pis.unsqueeze(-2) * 0.5 * math.pow( 2.0 * math.pi, -0.5 * (dim - 1)) # l b c d v_cd = pis * sum_leftmost(v_cd.sum(-1) / q_tot, -1 - batch_dims) logits_grad += v_cd prefactor = pis.unsqueeze(-1) * q_j * g / q_tot.unsqueeze(-1) locs_grad = sum_leftmost(prefactor, -2 - batch_dims) scales_grad = sum_leftmost(prefactor * z_tilde, -2 - batch_dims) return locs_grad, scales_grad, logits_grad, None, None, None
def minimum(self, a, b): a_ = self.as_tensor(a) b_ = self.as_tensor(b).to(a_.dtype) return torch.min(a_, other=b_)
def oxy(x, y): xu = x.unsqueeze(1) return torch.min(torch.sqrt(xu) * y, xu * torch.sqrt(y))
def forward(self, state, time_step, args, reset_flag=False): if args.demo_type == 'uav': # SenAvo if args.variance and args.prior_decay: coefs = [args.variance, args.variance] prior_decay = args.prior_decay # prior_decay:为了减小先验策略,引入减小prior_sigma的因子 else: coefs = [0.09, 0.09] prior_decay = 0.005 time_step = torch.Tensor([time_step])[0] perspective = torch.atan(state[12] / state[13]) first_perspective = torch.where( state[13] > 0, # cos朝向角度 (度数,不是pi torch.where(state[12] > 0, perspective / np.pi * 180.0, (perspective + 2 * np.pi) / np.pi * 180.0), (perspective + np.pi) / np.pi * 180.0) target = torch.atan(state[10] / state[11]) # 目标和自己的连线的角度信息 position_target = torch.where( state[11] > 0, torch.where(state[10] > 0, target / np.pi * 180.0, (target + 2 * np.pi) / np.pi * 180.0), (target + np.pi) / np.pi * 180.0) first_target = torch.remainder( # 确定夹角 remainder(input,divisor) 返回一个新张量,包含输入input张量每个元素的除法余数,余数与除数有相同的符号。 first_perspective - position_target, 360.0) average_direction = torch.where( # 规范化夹角 torch.sign()输入一个张量,如果是正数返回1.0,负数返回-1.0。即如果夹角大于180则直接除以180,否则取互补的角度 torch.sign(180.0 - first_target) + 1.0 > 0, -first_target / 180.0, (360.0 - first_target) / 180.0) variance_direction = 0.1 * average_direction + coefs[0] # 0.1 turning_free = torch.where( # argmin:返回指定维度最小的编号。state[0~9]记录的基本方向上的距离信息。最小距离编号大于5(即即将碰撞的方向为左侧)则取前者(正向加45),否则后者 torch.sign(4 - torch.argmin(state[0:9]).float()) + 1.0 > 0, 45.0 + 0.1 * average_direction, -45.0 + 0.1 * average_direction) # 0 0.1 average_free = turning_free / 180.0 # 调整的方向 variance_free = 0.1 * average_free + coefs[0] # 0.1 average_steer = torch.where( # 最近距离是否大于碰撞距离。如果是不用转向,如果不是就调整方向 torch.sign(100 * torch.min(state[0:9]) - 15.0) + 1.0 > 0, average_direction, average_free) variance_steer = torch.where( torch.sign(100 * torch.min(state[0:9]) - 15.0) + 1.0 > 0, variance_direction, variance_free) speed = state[14] average_throttle = torch.clamp(2.5 - 50 * (speed / 2 + 0.5), -0.5, 0.5) variance_throttle = 0.1 * average_throttle + coefs[1] # 0.1 decay = prior_decay * (time_step - 1) + 1 covariance = torch.cat( # 按维数0拼接 (variance_steer.unsqueeze_(0), variance_throttle.unsqueeze_(0)), 0) * decay # 公式(25) average = torch.cat( (average_steer.unsqueeze_(0), average_throttle.unsqueeze_(0)), 0) elif args.demo_type == 'uav_wrong': # Naive if reset_flag: self.current_direct_wrong = 'north' self.min_distance_x = 50.0 self.min_distance_y = 50.0 if args: coefs = args.variance * 2 prior_decay = args.prior_decay else: coefs = [0.09, 0.09] prior_decay = 0.005 time_step = torch.Tensor([time_step])[0] perspective = torch.atan(state[12] / state[13]) first_perspective = torch.where( state[13] > 0, torch.where(state[12] > 0, perspective / np.pi * 180.0, (perspective + 2 * np.pi) / np.pi * 180.0), (perspective + np.pi) / np.pi * 180.0) target = torch.atan(state[10] / state[11]) position_target = torch.where( state[11] > 0, torch.where(state[10] > 0, target / np.pi * 180.0, (target + 2 * np.pi) / np.pi * 180.0), (target + np.pi) / np.pi * 180.0) distance = (state[9] / 2 + 0.5) * \ (torch.sqrt(torch.Tensor([2])[0]) * 3000) distance_y = torch.abs(distance * torch.sin( 2 * position_target / 360 * torch.Tensor([np.pi])[0])) distance_x = torch.abs(distance * torch.cos( 2 * position_target / 360 * torch.Tensor([np.pi])[0])) if distance_y > self.min_distance_y: self.current_direct_wrong = 'north' elif distance_x > self.min_distance_x: if self.current_direct_wrong == 'north': self.min_distance_x -= 5 self.current_direct_wrong = 'east' else: if self.current_direct_wrong == 'east': self.min_distance_y -= 5 self.current_direct_wrong = 'north' if self.current_direct_wrong == 'north': if position_target > 0 and position_target < 180: position_target = 90 else: position_target = 270 else: if position_target < 90 or position_target > 270: position_target = 0 else: position_target = 180 first_target = torch.remainder(first_perspective - position_target, 360.0) average_direction = torch.where( torch.sign(180.0 - first_target) + 1.0 > 0, -first_target / 180.0, (360.0 - first_target) / 180.0) variance_direction = 0.0 * average_direction + coefs[0] # 0.1 turning_free = torch.where( torch.sign(4 - torch.argmin(state[0:9]).float()) + 1.0 > 0, 45.0 + 0 * average_direction, -45.0 + 0 * average_direction) average_free = turning_free / 180.0 variance_free = 0.0 * average_free + coefs[0] # 0.1 average_steer = torch.where( torch.sign(100 * torch.min(state[0:9]) - 15.0) + 1.0 > 0, average_direction, average_free) variance_steer = torch.where( torch.sign(100 * torch.min(state[0:9]) - 15.0) + 1.0 > 0, variance_direction, variance_free) speed = state[14] average_throttle = torch.clamp(2.5 - 50 * (speed / 2 + 0.5), -0.5, 0.5) variance_throttle = 0.0 * average_throttle + coefs[1] decay = prior_decay * (time_step - 1) + 1 covariance = torch.cat( (variance_steer.unsqueeze_(0), variance_throttle.unsqueeze_(0)), 0) * decay average = torch.cat( (average_steer.unsqueeze_(0), average_throttle.unsqueeze_(0)), 0) else: average = self.agent_ddpg.select_action(state) # 无策略则随便选 time_step = torch.Tensor([time_step])[0] decay = args.prior_decay * (time_step - 1) + 1 covariance = torch.ones(average.shape) * 0.1 * decay return average, covariance
def c_luckas(x, y): xu = x.unsqueeze(1) m = torch.ones_like(y) return torch.min(m, xu + y)
if CUDA: torch.cuda.synchronize() try: output except NameError: print("No detections were made") exit() output_recast = time.time() #TODO: Uncomment # im_dim_list = torch.index_select(im_dim_list, 0, output[:,0].long()) scaling_factor = torch.min(inp_dim/im_dim_list,1)[0].view(-1,1) output[:,[1,3]] -= (inp_dim - scaling_factor*im_dim_list[:,0].view(-1,1))/2 output[:,[2,4]] -= (inp_dim - scaling_factor*im_dim_list[:,1].view(-1,1))/2 output[:,1:5] /= scaling_factor for i in range(output.shape[0]): output[i, [1,3]] = torch.clamp(output[i, [1,3]], 0.0, im_dim_list[0,0]) output[i, [2,4]] = torch.clamp(output[i, [2,4]], 0.0, im_dim_list[0,1]) class_load = time.time() colors = pkl.load(open("pallete.dms", "rb")) draw = time.time() def write(x, results):
def wh_iou(wh1, wh2): # Returns the nxm IoU matrix. wh1 is nx2, wh2 is mx2 wh1 = wh1[:, None] # [N,1,2] wh2 = wh2[None] # [1,M,2] inter = torch.min(wh1, wh2).prod(2) # [N,M] return inter / (wh1.prod(2) + wh2.prod(2) - inter) # iou = inter / (area1 + area2 - inter)