def get_loss(self, image_a_pred, image_b_pred, mask_a, mask_b): loss = 0 # get the nonzero indices mask_a_indices_flat = torch.nonzero(mask_a) mask_b_indices_flat = torch.nonzero(mask_b) if len(mask_a_indices_flat) == 0: return Variable(torch.cuda.LongTensor([0]), requires_grad=True) if len(mask_b_indices_flat) == 0: return Variable(torch.cuda.LongTensor([0]), requires_grad=True) # take 5000 random pixel samples of the object, using the mask num_samples = 10000 rand_numbers_a = (torch.rand(num_samples)*len(mask_a_indices_flat)).cuda() rand_indices_a = Variable(torch.floor(rand_numbers_a).type(torch.cuda.LongTensor), requires_grad=False) randomized_mask_a_indices_flat = torch.index_select(mask_a_indices_flat, 0, rand_indices_a).squeeze(1) rand_numbers_b = (torch.rand(num_samples)*len(mask_b_indices_flat)).cuda() rand_indices_b = Variable(torch.floor(rand_numbers_b).type(torch.cuda.LongTensor), requires_grad=False) randomized_mask_b_indices_flat = torch.index_select(mask_b_indices_flat, 0, rand_indices_b).squeeze(1) # index into the image and get descriptors M_margin = 0.5 # margin parameter random_img_a_object_descriptors = torch.index_select(image_a_pred, 1, randomized_mask_a_indices_flat) random_img_b_object_descriptors = torch.index_select(image_b_pred, 1, randomized_mask_b_indices_flat) pixel_wise_loss = (random_img_a_object_descriptors - random_img_b_object_descriptors).pow(2).sum(dim=2) pixel_wise_loss = torch.add(pixel_wise_loss, -2*M_margin) zeros_vec = torch.zeros_like(pixel_wise_loss) loss += torch.max(zeros_vec, pixel_wise_loss).sum() return loss
def __call__(self, spec_f): spec_f, is_variable = _check_is_variable(spec_f) n_fft = spec_f.size(2) m_min = 0. if self.f_min == 0 else 2595 * np.log10(1. + (self.f_min / 700)) m_max = 2595 * np.log10(1. + (self.f_max / 700)) m_pts = torch.linspace(m_min, m_max, self.n_mels + 2) f_pts = (700 * (10**(m_pts / 2595) - 1)) bins = torch.floor(((n_fft - 1) * 2) * f_pts / self.sr).long() fb = torch.zeros(n_fft, self.n_mels) for m in range(1, self.n_mels + 1): f_m_minus = bins[m - 1].item() f_m = bins[m].item() f_m_plus = bins[m + 1].item() if f_m_minus != f_m: fb[f_m_minus:f_m, m - 1] = (torch.arange(f_m_minus, f_m) - f_m_minus) / (f_m - f_m_minus) if f_m != f_m_plus: fb[f_m:f_m_plus, m - 1] = (f_m_plus - torch.arange(f_m, f_m_plus)) / (f_m_plus - f_m) fb = Variable(fb) spec_m = torch.matmul(spec_f, fb) # (c, l, n_fft) dot (n_fft, n_mels) -> (c, l, n_mels) return spec_m if is_variable else spec_m.data
def pixelcnn_generate(self, z1, z2): # Sampling from PixelCNN x_zeros = torch.zeros( (z1.size(0), self.args.input_size[0], self.args.input_size[1], self.args.input_size[2])) if self.args.cuda: x_zeros = x_zeros.cuda() for i in range(self.args.input_size[1]): for j in range(self.args.input_size[2]): samples_mean, samples_logvar = self.p_x(Variable(x_zeros, volatile=True), z1, z2) samples_mean = samples_mean.view(samples_mean.size(0), self.args.input_size[0], self.args.input_size[1], self.args.input_size[2]) if self.args.input_type == 'binary': probs = samples_mean[:, :, i, j].data x_zeros[:, :, i, j] = torch.bernoulli(probs).float() samples_gen = samples_mean elif self.args.input_type == 'gray' or self.args.input_type == 'continuous': binsize = 1. / 256. samples_logvar = samples_logvar.view(samples_mean.size(0), self.args.input_size[0], self.args.input_size[1], self.args.input_size[2]) means = samples_mean[:, :, i, j].data logvar = samples_logvar[:, :, i, j].data # sample from logistic distribution u = torch.rand(means.size()).cuda() y = torch.log(u) - torch.log(1. - u) sample = means + torch.exp(logvar) * y x_zeros[:, :, i, j] = torch.floor(sample / binsize) * binsize samples_gen = samples_mean return samples_gen
def random_sample_from_masked_image_torch(img_mask, num_samples): """ :param img_mask: Numpy array [H,W] or torch.Tensor with shape [H,W] :type img_mask: :param num_samples: an integer :type num_samples: :return: tuple of torch.LongTensor in (u,v) format. Each torch.LongTensor has shape [num_samples] :rtype: """ image_height, image_width = img_mask.shape if isinstance(img_mask, np.ndarray): img_mask_torch = torch.from_numpy(img_mask).float() else: img_mask_torch = img_mask # This code would randomly subsample from the mask mask = img_mask_torch.view(image_width*image_height,1).squeeze(1) mask_indices_flat = torch.nonzero(mask) if len(mask_indices_flat) == 0: return (None, None) rand_numbers = torch.rand(num_samples)*len(mask_indices_flat) rand_indices = torch.floor(rand_numbers).long() uv_vec_flattened = torch.index_select(mask_indices_flat, 0, rand_indices).squeeze(1) uv_vec = utils.flattened_pixel_locations_to_u_v(uv_vec_flattened, image_width) return uv_vec
def _PyramidRoI_Feat(self, feat_maps, rois, im_info): ''' roi pool on pyramid feature maps''' # do roi pooling based on predicted rois img_area = im_info[0][0] * im_info[0][1] h = rois.data[:, 4] - rois.data[:, 2] + 1 w = rois.data[:, 3] - rois.data[:, 1] + 1 roi_level = torch.log(torch.sqrt(h * w) / 224.0) / np.log(2) roi_level = torch.floor(roi_level + 4) # -------- # roi_level = torch.log(torch.sqrt(h * w) / 224.0) # roi_level = torch.round(roi_level + 4) # ------ roi_level[roi_level < 2] = 2 roi_level[roi_level > 5] = 5 # roi_level.fill_(5) if cfg.POOLING_MODE == 'crop': # pdb.set_trace() # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5)) # NOTE: need to add pyrmaid grid_xy = _affine_grid_gen(rois, feat_maps.size()[2:], self.grid_size) ## grid_yx = torch.stack([grid_xy.data[:,:,:,1], grid_xy.data[:,:,:,0]], 3).contiguous() roi_pool_feat = self.RCNN_roi_crop(feat_maps, Variable(grid_yx).detach()) ## if cfg.CROP_RESIZE_WITH_MAX_POOL: roi_pool_feat = F.max_pool2d(roi_pool_feat, 2, 2) elif cfg.POOLING_MODE == 'align': roi_pool_feats = [] box_to_levels = [] for i, l in enumerate(range(2, 6)): if (roi_level == l).sum() == 0: continue idx_l = (roi_level == l).nonzero().squeeze() box_to_levels.append(idx_l) scale = feat_maps[i].size(2) / im_info[0][0] feat = self.RCNN_roi_align(feat_maps[i], rois[idx_l], scale) roi_pool_feats.append(feat) roi_pool_feat = torch.cat(roi_pool_feats, 0) box_to_level = torch.cat(box_to_levels, 0) idx_sorted, order = torch.sort(box_to_level) roi_pool_feat = roi_pool_feat[order] elif cfg.POOLING_MODE == 'pool': roi_pool_feats = [] box_to_levels = [] for i, l in enumerate(range(2, 6)): if (roi_level == l).sum() == 0: continue idx_l = (roi_level == l).nonzero().squeeze() box_to_levels.append(idx_l) scale = feat_maps[i].size(2) / im_info[0][0] feat = self.RCNN_roi_pool(feat_maps[i], rois[idx_l], scale) roi_pool_feats.append(feat) roi_pool_feat = torch.cat(roi_pool_feats, 0) box_to_level = torch.cat(box_to_levels, 0) idx_sorted, order = torch.sort(box_to_level) roi_pool_feat = roi_pool_feat[order] return roi_pool_feat
def _create_dummy_data(filename): data = torch.rand(num_examples * maxlen) data = 97 + torch.floor(26 * data).int() with open(os.path.join(data_dir, filename), 'w') as h: offset = 0 for _ in range(num_examples): ex_len = random.randint(1, maxlen) ex_str = ' '.join(map(chr, data[offset:offset+ex_len])) print(ex_str, file=h) offset += ex_len
def th_random_choice(a, n_samples=1, replace=True, p=None): """ Parameters ----------- a : 1-D array-like If a th.Tensor, a random sample is generated from its elements. If an int, the random sample is generated as if a was th.range(n) n_samples : int, optional Number of samples to draw. Default is None, in which case a single value is returned. replace : boolean, optional Whether the sample is with or without replacement p : 1-D array-like, optional The probabilities associated with each entry in a. If not given the sample assumes a uniform distribution over all entries in a. Returns -------- samples : 1-D ndarray, shape (size,) The generated random samples """ if isinstance(a, int): a = th.arange(0, a) if p is None: if replace: idx = th.floor(th.rand(n_samples)*a.size(0)).long() else: idx = th.randperm(len(a))[:n_samples] else: if abs(1.0-sum(p)) > 1e-3: raise ValueError('p must sum to 1.0') if not replace: raise ValueError('replace must equal true if probabilities given') idx_vec = th.cat([th.zeros(round(p[i]*1000))+i for i in range(len(p))]) idx = (th.floor(th.rand(n_samples)*999)).long() idx = idx_vec[idx].long() selection = a[idx] if n_samples == 1: selection = selection[0] return selection
def tanh_quantize(input, bits): assert bits >= 1, bits if bits == 1: return torch.sign(input) input = torch.tanh(input) # [-1, 1] input_rescale = (input + 1.0) / 2 #[0, 1] n = math.pow(2.0, bits) - 1 v = torch.floor(input_rescale * n + 0.5) / n v = 2 * v - 1 # [-1, 1] v = 0.5 * torch.log((1 + v) / (1 - v)) # arctanh return v
def linear_quantize(input, sf, bits): assert bits >= 1, bits if bits == 1: return torch.sign(input) - 1 delta = math.pow(2.0, -sf) bound = math.pow(2.0, bits-1) min_val = - bound max_val = bound - 1 rounded = torch.floor(input / delta + 0.5) clipped_value = torch.clamp(rounded, min_val, max_val) * delta return clipped_value
def __call__(self, boxlists): """ Arguments: boxlists (list[BoxList]) """ # Compute level ids s = torch.sqrt(cat([boxlist.area() for boxlist in boxlists])) # Eqn.(1) in FPN paper target_lvls = torch.floor(self.lvl0 + torch.log2(s / self.s0 + self.eps)) target_lvls = torch.clamp(target_lvls, min=self.k_min, max=self.k_max) return target_lvls.to(torch.int64) - self.k_min
def min_max_quantize(input, bits): assert bits >= 1, bits if bits == 1: return torch.sign(input) - 1 min_val, max_val = input.min(), input.max() if isinstance(min_val, Variable): max_val = float(max_val.data.cpu().numpy()[0]) min_val = float(min_val.data.cpu().numpy()[0]) input_rescale = (input - min_val) / (max_val - min_val) n = math.pow(2.0, bits) - 1 v = torch.floor(input_rescale * n + 0.5) / n v = v * (max_val - min_val) + min_val return v
def log_Logistic_256(x, mean, logvar, average=False, reduce=True, dim=None): bin_size = 1. / 256. # implementation like https://github.com/openai/iaf/blob/master/tf_utils/distributions.py#L28 scale = torch.exp(logvar) x = (torch.floor(x / bin_size) * bin_size - mean) / scale cdf_plus = torch.sigmoid(x + bin_size/scale) cdf_minus = torch.sigmoid(x) # calculate final log-likelihood for an image log_logist_256 = - torch.log(cdf_plus - cdf_minus + 1.e-7) if reduce: if average: return torch.mean(log_logist_256, dim) else: return torch.sum(log_logist_256, dim) else: return log_logist_256
def fit_positive(rows, cols, yx_min, yx_max, anchors): device_id = anchors.get_device() if torch.cuda.is_available() else None batch_size, num, _ = yx_min.size() num_anchors, _ = anchors.size() valid = torch.prod(yx_min < yx_max, -1) center = (yx_min + yx_max) / 2 ij = torch.floor(center) i, j = torch.unbind(ij.long(), -1) index = i * cols + j anchors2 = anchors / 2 iou_matrix = utils.iou.torch.iou_matrix((yx_min - center).view(-1, 2), (yx_max - center).view(-1, 2), -anchors2, anchors2).view(batch_size, -1, num_anchors) iou, index_anchor = iou_matrix.max(-1) _positive = [] cells = rows * cols for valid, index, index_anchor in zip(torch.unbind(valid), torch.unbind(index), torch.unbind(index_anchor)): index, index_anchor = (t[valid] for t in (index, index_anchor)) t = utils.ensure_device(torch.ByteTensor(cells, num_anchors).zero_(), device_id) t[index, index_anchor] = 1 _positive.append(t) return torch.stack(_positive)
def compute_frustum_bounds(self, world_to_grid, camera_to_world): corner_points = camera_to_world.new(8, 4, 1).fill_(1) # depth min corner_points[0,:3,0] = self.depth_to_skeleton(0, 0, self.depth_min) corner_points[1,:3,0] = self.depth_to_skeleton(self.image_dims[0] - 1, 0, self.depth_min) corner_points[2,:3,0] = self.depth_to_skeleton(self.image_dims[0] - 1, self.image_dims[1] - 1, self.depth_min) corner_points[3,:3,0] = self.depth_to_skeleton(0, self.image_dims[1] - 1, self.depth_min) # depth max corner_points[4,:3,0] = self.depth_to_skeleton(0, 0, self.depth_max) corner_points[5,:3,0] = self.depth_to_skeleton(self.image_dims[0] - 1, 0, self.depth_max) corner_points[6,:3,0] = self.depth_to_skeleton(self.image_dims[0] - 1, self.image_dims[1] - 1, self.depth_max) corner_points[7,:3,0] = self.depth_to_skeleton(0, self.image_dims[1] - 1, self.depth_max) p = torch.bmm(camera_to_world.repeat(8, 1, 1), corner_points) pl = torch.round(torch.bmm(world_to_grid.repeat(8, 1, 1), torch.floor(p))) pu = torch.round(torch.bmm(world_to_grid.repeat(8, 1, 1), torch.ceil(p))) bbox_min0, _ = torch.min(pl[:, :3, 0], 0) bbox_min1, _ = torch.min(pu[:, :3, 0], 0) bbox_min = np.minimum(bbox_min0, bbox_min1) bbox_max0, _ = torch.max(pl[:, :3, 0], 0) bbox_max1, _ = torch.max(pu[:, :3, 0], 0) bbox_max = np.maximum(bbox_max0, bbox_max1) return bbox_min, bbox_max
def create_non_correspondences(uv_b_matches, img_b_shape, num_non_matches_per_match=100, img_b_mask=None): """ Takes in pixel matches (uv_b_matches) that correspond to matches in another image, and generates non-matches by just sampling in image space. Optionally, the non-matches can be sampled from a mask for image b. Returns non-matches as pixel positions in image b. Please see 'coordinate_conventions.md' documentation for an explanation of pixel coordinate conventions. ## Note that arg uv_b_matches are the outputs of batch_find_pixel_correspondences() :param uv_b_matches: tuple of torch.FloatTensors, where each FloatTensor is length n, i.e.: (torch.FloatTensor, torch.FloatTensor) :param img_b_shape: tuple of (H,W) which is the shape of the image (optional) :param num_non_matches_per_match: int (optional) :param img_b_mask: torch.FloatTensor (can be cuda or not) - masked image, we will select from the non-zero entries - shape is H x W :return: tuple of torch.FloatTensors, i.e. (torch.FloatTensor, torch.FloatTensor). - The first element of the tuple is all "u" pixel positions, and the right element of the tuple is all "v" positions - Each torch.FloatTensor is of shape torch.Shape([num_matches, non_matches_per_match]) - This shape makes it so that each row of the non-matches corresponds to the row for the match in uv_a """ image_width = img_b_shape[1] image_height = img_b_shape[0] if uv_b_matches == None: return None num_matches = len(uv_b_matches[0]) def get_random_uv_b_non_matches(): return pytorch_rand_select_pixel(width=image_width,height=image_height, num_samples=num_matches*num_non_matches_per_match) if img_b_mask is not None: img_b_mask_flat = img_b_mask.view(-1,1).squeeze(1) mask_b_indices_flat = torch.nonzero(img_b_mask_flat) if len(mask_b_indices_flat) == 0: print "warning, empty mask b" uv_b_non_matches = get_random_uv_b_non_matches() else: num_samples = num_matches*num_non_matches_per_match rand_numbers_b = torch.rand(num_samples)*len(mask_b_indices_flat) rand_indices_b = torch.floor(rand_numbers_b).long() randomized_mask_b_indices_flat = torch.index_select(mask_b_indices_flat, 0, rand_indices_b).squeeze(1) uv_b_non_matches = (randomized_mask_b_indices_flat%image_width, randomized_mask_b_indices_flat/image_width) else: uv_b_non_matches = get_random_uv_b_non_matches() # for each in uv_a, we want non-matches # first just randomly sample "non_matches" # we will later move random samples that were too close to being matches uv_b_non_matches = (uv_b_non_matches[0].view(num_matches,num_non_matches_per_match), uv_b_non_matches[1].view(num_matches,num_non_matches_per_match)) # uv_b_matches can now be used to make sure no "non_matches" are too close # to preserve tensor size, rather than pruning, we can perturb these in pixel space copied_uv_b_matches_0 = torch.t(uv_b_matches[0].repeat(num_non_matches_per_match, 1)) copied_uv_b_matches_1 = torch.t(uv_b_matches[1].repeat(num_non_matches_per_match, 1)) diffs_0 = copied_uv_b_matches_0 - uv_b_non_matches[0].type(dtype_float) diffs_1 = copied_uv_b_matches_1 - uv_b_non_matches[1].type(dtype_float) diffs_0_flattened = diffs_0.view(-1,1) diffs_1_flattened = diffs_1.view(-1,1) diffs_0_flattened = torch.abs(diffs_0_flattened).squeeze(1) diffs_1_flattened = torch.abs(diffs_1_flattened).squeeze(1) need_to_be_perturbed = torch.zeros_like(diffs_0_flattened) ones = torch.zeros_like(diffs_0_flattened) num_pixels_too_close = 1.0 threshold = torch.ones_like(diffs_0_flattened)*num_pixels_too_close # determine which pixels are too close to being matches need_to_be_perturbed = where(diffs_0_flattened < threshold, ones, need_to_be_perturbed) need_to_be_perturbed = where(diffs_1_flattened < threshold, ones, need_to_be_perturbed) minimal_perturb = num_pixels_too_close/2 minimal_perturb_vector = (torch.rand(len(need_to_be_perturbed))*2).floor()*(minimal_perturb*2)-minimal_perturb std_dev = 10 random_vector = torch.randn(len(need_to_be_perturbed))*std_dev + minimal_perturb_vector perturb_vector = need_to_be_perturbed*random_vector uv_b_non_matches_0_flat = uv_b_non_matches[0].view(-1,1).type(dtype_float).squeeze(1) uv_b_non_matches_1_flat = uv_b_non_matches[1].view(-1,1).type(dtype_float).squeeze(1) uv_b_non_matches_0_flat = uv_b_non_matches_0_flat + perturb_vector uv_b_non_matches_1_flat = uv_b_non_matches_1_flat + perturb_vector # now just need to wrap around any that went out of bounds # handle wrapping in width lower_bound = 0.0 upper_bound = image_width*1.0 - 1 lower_bound_vec = torch.ones_like(uv_b_non_matches_0_flat) * lower_bound upper_bound_vec = torch.ones_like(uv_b_non_matches_0_flat) * upper_bound uv_b_non_matches_0_flat = where(uv_b_non_matches_0_flat > upper_bound_vec, uv_b_non_matches_0_flat - upper_bound_vec, uv_b_non_matches_0_flat) uv_b_non_matches_0_flat = where(uv_b_non_matches_0_flat < lower_bound_vec, uv_b_non_matches_0_flat + upper_bound_vec, uv_b_non_matches_0_flat) # handle wrapping in height lower_bound = 0.0 upper_bound = image_height*1.0 - 1 lower_bound_vec = torch.ones_like(uv_b_non_matches_1_flat) * lower_bound upper_bound_vec = torch.ones_like(uv_b_non_matches_1_flat) * upper_bound uv_b_non_matches_1_flat = where(uv_b_non_matches_1_flat > upper_bound_vec, uv_b_non_matches_1_flat - upper_bound_vec, uv_b_non_matches_1_flat) uv_b_non_matches_1_flat = where(uv_b_non_matches_1_flat < lower_bound_vec, uv_b_non_matches_1_flat + upper_bound_vec, uv_b_non_matches_1_flat) return (uv_b_non_matches_0_flat.view(num_matches, num_non_matches_per_match), uv_b_non_matches_1_flat.view(num_matches, num_non_matches_per_match))
def mod289(x): return x - torch.floor(x * (1.0 / 289.0)) * 289.0
def mask2d(batch_size, dim, keep_prob, device): mask = torch.floor(torch.rand(batch_size, dim, device=device) + keep_prob) / keep_prob return mask
def fill_norm(yx_min, yx_max, anchors): center = (yx_min + yx_max) / 2 ij = torch.floor(center) center_offset = center - ij size = yx_max - yx_min return center_offset, torch.log(size / anchors.view(1, -1, 2))
def generate_vcmr_predictions_from_res(eval_res, max_prop_per_query=300, query_bsz_in_sort=1000): """ This function is for Video Corpus Moment Retrieval (VCMR). Generate prediction file which could be evaluated using standalone_eval.eval. Args: eval_res: dict( query_meta=query_meta_list, # N_q * dict(), each dict is {"desc_id": int, "desc": str} video_meta=video_meta_list, # N_videos * dict(), {"vid_name": str, "duration": float, "proposals": ndarray} video2idx=eval_dataset.video2idx, # dict {vid_name: index} video_bsz_in_sort=[], # N_videos * (N_q, N_prop) ) max_prop_per_query: int or None. If None, generate ranking for all possible moments, else generate top {}. query_bsz_in_sort: int, only sort a subset of queries at a time, it will be too large to sort all queries. return: list(dicts): each dict is dict(desc=str, desc_id=int, predictions=list(sublist)), each sublist is [vid_name (str), st (float), ed (float), score (float)], score is negative distance. """ # video2idx video2idx = eval_res["video2idx"] # (N_videos, N_prop, N_q), (N_videos, N_prop) padded_dist, padded_mask = pad_sequences_1d([e.transpose(0, 1) for e in eval_res["query_prop_dist_vcmr"]], dtype=eval_res["query_prop_dist_vcmr"][0].dtype, device=eval_res["query_prop_dist_vcmr"][0].device) # putting 'NaN' into the invalid bits, torch.sort considers 'NaN' as larger than any number!!! padded_dist += (padded_mask.unsqueeze(2) == 0).float() * 1e10 n_videos, n_prop, n_q = padded_dist.shape print("n_videos, n_prop, n_q {}".format((n_videos, n_prop, n_q))) padded_dist = padded_dist.view(n_videos * n_prop, n_q).transpose(0, 1).contiguous() # (N_q, N_video*N_prop) print("padded_dist, {}".format(padded_dist.shape)) sorted_distances, sorted_indices = torch.topk(padded_dist.to(torch.device("cuda:0"), non_blocking=True), k=min(max_prop_per_query, n_videos * n_prop), dim=1, largest=False, sorted=True) # (N_q, max_prop_per_query) * 2 sorted_distances = - sorted_distances.cpu().numpy() # (N_q, max_prop_per_query) * 2, prop_indices: inside video indices. video_meta_indices = torch.floor(sorted_indices.float() / n_prop).long().cpu().numpy() prop_indices = torch.remainder(sorted_indices, n_prop).cpu().numpy() vr_res = [] query_meta = eval_res["query_meta"] for i in trange(n_q, desc="[VR] Loop over queries to generate predictions"): row = video_meta_indices[i] score_row = - sorted_distances[i] cur_vr_redictions = [] for j, meta_idx in enumerate(row): video_idx = video2idx[eval_res["video_meta"][meta_idx]["vid_name"]] cur_vr_redictions.append([video_idx, 0, 0, float(score_row[j])]) cur_query_pred = dict( desc_id=query_meta[i]["desc_id"], desc=query_meta[i]["desc"], predictions=cur_vr_redictions ) vr_res.append(cur_query_pred) vcmr_res = [] logger.debug("sorted_indices {}".format(sorted_indices.shape)) logger.debug("sorted_distances {}".format(sorted_distances.shape)) for idx, (vm_row_indices, p_row_indices) in tqdm(enumerate(zip(video_meta_indices, prop_indices)), desc="[VCMR] Loop over queries to generate predictions", total=n_q): # query sorted_distances_row = - sorted_distances[idx] # converted to negative distance # [video_idx(int), st(float), ed(float), score(float)] cur_ranked_predictions = [] for col_idx, (v_col_idx, p_col_idx) in enumerate(zip(vm_row_indices, p_row_indices)): cur_pred = [] cur_pred += [video2idx[eval_res["video_meta"][v_col_idx]["vid_name"]], ] cur_pred += eval_res["video_meta"][v_col_idx]["proposals"][p_col_idx].tolist() cur_pred += [float(sorted_distances_row[col_idx])] cur_ranked_predictions.append(cur_pred) cur_query_pred = dict( desc_id=eval_res["query_meta"][idx]["desc_id"], desc=eval_res["query_meta"][idx]["desc"], predictions=cur_ranked_predictions ) vcmr_res.append(cur_query_pred) return vcmr_res, vr_res
def floor_ste(x: torch.Tensor) -> torch.Tensor: y = torch.floor(x) return _ste(x, y)
def regularize(f): f_ = (f - PI) / TWO_PI return TWO_PI * (f_ - torch.floor(f_) - 0.5)
def train(self): if self.T - self.target_sync_T > self.args.target: self.sync_target_network() self.target_sync_T = self.T info = {} for _ in range(self.args.iters): self.dqn.eval() batch, indices, is_weights = self.replay.Sample_N(self.args.batch_size, self.args.n_step, self.args.gamma) columns = list(zip(*batch)) states = Variable(torch.from_numpy(np.array(columns[0])).float().transpose_(1, 3)) actions = Variable(torch.LongTensor(columns[1])) terminal_states = Variable(torch.FloatTensor(columns[5])) rewards = Variable(torch.FloatTensor(columns[2])) # Have to clip rewards for DQN rewards = torch.clamp(rewards, -1, 1) steps = Variable(torch.FloatTensor(columns[4])) new_states = Variable(torch.from_numpy(np.array(columns[3])).float().transpose_(1, 3)) target_dqn_qvals = self.target_dqn(new_states).cpu() # Make a new variable with those values so that these are treated as constants target_dqn_qvals_data = Variable(target_dqn_qvals.data) q_value_gammas = (Variable(torch.ones(terminal_states.size()[0])) - terminal_states) inter = Variable(torch.ones(terminal_states.size()[0]) * self.args.gamma) # print(steps) q_value_gammas = q_value_gammas * torch.pow(inter, steps) values = torch.linspace(self.args.v_min, self.args.v_max, steps=self.args.atoms) values = Variable(values) values = values.view(1, 1, self.args.atoms) values = values.expand(self.args.batch_size, self.args.actions, self.args.atoms) # print(values) q_value_gammas = q_value_gammas.view(self.args.batch_size, 1, 1) q_value_gammas = q_value_gammas.expand(self.args.batch_size, self.args.actions, self.args.atoms) # print(q_value_gammas) gamma_values = q_value_gammas * values # print(gamma_values) rewards = rewards.view(self.args.batch_size, 1, 1) rewards = rewards.expand(self.args.batch_size, self.args.actions, self.args.atoms) # print(rewards) operator_q_values = rewards + gamma_values # print(operator_q_values) clipped_operator_q_values = torch.clamp(operator_q_values, self.args.v_min, self.args.v_max) delta_z = (self.args.v_max - self.args.v_min) / (self.args.atoms - 1) # Using the notation from the categorical paper b_j = (clipped_operator_q_values - self.args.v_min) / delta_z # print(b_j) lower_bounds = torch.floor(b_j) upper_bounds = torch.ceil(b_j) # Work out the max action atom_values = Variable(torch.linspace(self.args.v_min, self.args.v_max, steps=self.args.atoms)) atom_values = atom_values.view(1, 1, self.args.atoms) atom_values = atom_values.expand(self.args.batch_size, self.args.actions, self.args.atoms) # Sum over the atoms dimension target_expected_qvalues = torch.sum(target_dqn_qvals_data * atom_values, dim=2) # Get the maximum actions index across the batch size max_actions = target_expected_qvalues.max(dim=1)[1].view(-1) # Project back onto the original support for the max actions q_value_distribution_targets = torch.zeros(self.args.batch_size, self.args.atoms) # Distributions for the max actions # print(target_dqn_qvals_data, max_actions) q_value_max_actions_distribs = target_dqn_qvals_data.index_select(dim=1, index=max_actions)[:,0,:] # print(q_value_max_actions_distribs) # Lower_bounds_actions lower_bounds_actions = lower_bounds.index_select(dim=1, index=max_actions)[:,0,:] upper_bounds_actions = upper_bounds.index_select(dim=1, index=max_actions)[:,0,:] b_j_actions = b_j.index_select(dim=1, index=max_actions)[:,0,:] lower_bound_values_to_add = q_value_max_actions_distribs * (upper_bounds_actions - b_j_actions) upper_bound_values_to_add = q_value_max_actions_distribs * (b_j_actions - lower_bounds_actions) # print(lower_bounds_actions) # print(lower_bound_values_to_add) # Naive looping for b in range(self.args.batch_size): for l, pj in zip(lower_bounds_actions.data.type(torch.LongTensor)[b], lower_bound_values_to_add[b].data): q_value_distribution_targets[b][l] += pj for u, pj in zip(upper_bounds_actions.data.type(torch.LongTensor)[b], upper_bound_values_to_add[b].data): q_value_distribution_targets[b][u] += pj self.dqn.train() if self.args.gpu: actions = actions.cuda() # q_value_targets = q_value_targets.cuda() q_value_distribution_targets = q_value_distribution_targets.cuda() model_predictions = self.dqn(states).index_select(1, actions.view(-1))[:,0,:] q_value_distribution_targets = Variable(q_value_distribution_targets) # print(q_value_distribution_targets) # print(model_predictions) # Cross entropy loss ce_loss = -torch.sum(q_value_distribution_targets * torch.log(model_predictions), dim=1) ce_batch_loss = ce_loss.mean() info = {} self.log("DQN/X_Entropy_Loss", ce_batch_loss.data[0], step=self.T) # Update self.optimizer.zero_grad() ce_batch_loss.backward() # Taken from pytorch clip_grad_norm # Remove once the pip version it up to date with source gradient_norm = clip_grad_norm(self.dqn.parameters(), self.args.clip_value) if gradient_norm is not None: info["Norm"] = gradient_norm self.optimizer.step() if "States" in info: states_trained = info["States"] info["States"] = states_trained + columns[0] else: info["States"] = columns[0] # Pad out the states to be of size batch_size if len(info["States"]) < self.args.batch_size: old_states = info["States"] new_states = old_states[0] * (self.args.batch_size - len(old_states)) info["States"] = new_states return info
def _get_random_inputs(device): return torch.floor(torch.rand( (BATCH_SIZE, 2)) * EMB_SIZE).to(dtype=torch.long, device=device)
def _interpolate(im, x, y, out_size): # constants num_batch, height, width, channels = im.shape[0], im.shape[ 1], im.shape[2], im.shape[3] out_height = out_size[0] out_width = out_size[1] max_y = int(height - 1) max_x = int(width - 1) # scale indices from [-1, 1] to [0, width/height] x = (x + 1.0) * (width - 1.0) / 2.0 y = (y + 1.0) * (height - 1.0) / 2.0 # do sampling x0 = (torch.floor(x)).int() x1 = x0 + 1 y0 = (torch.floor(y)).int() y1 = y0 + 1 x0_c = torch.clamp(x0, 0, max_x) x1_c = torch.clamp(x1, 0, max_x) y0_c = torch.clamp(y0, 0, max_y) y1_c = torch.clamp(y1, 0, max_y) dim2 = width dim1 = width * height base = _repeat( torch.arange(0, num_batch) * dim1, out_height * out_width).to(im.get_device()) base_y0 = base + y0_c * dim2 base_y1 = base + y1_c * dim2 idx_a = base_y0 + x0_c idx_b = base_y1 + x0_c idx_c = base_y0 + x1_c idx_d = base_y1 + x1_c # use indices to lookup pixels in the flat image and restore # channels dim im_flat = im.view([-1, channels]) im_flat = im_flat.float() # and finally calculate interpolated values x0_f = x0.float() x1_f = x1.float() y0_f = y0.float() y1_f = y1.float() wa = ((x1_f - x) * (y1_f - y)).unsqueeze(1) wb = ((x1_f - x) * (y - y0_f)).unsqueeze(1) wc = ((x - x0_f) * (y1_f - y)).unsqueeze(1) wd = ((x - x0_f) * (y - y0_f)).unsqueeze(1) zerof = torch.zeros_like(wa) wa = torch.where( (torch.eq(x0_c, x0) & torch.eq(y0_c, y0)).unsqueeze(1), wa, zerof) wb = torch.where( (torch.eq(x0_c, x0) & torch.eq(y1_c, y1)).unsqueeze(1), wb, zerof) wc = torch.where( (torch.eq(x1_c, x1) & torch.eq(y0_c, y0)).unsqueeze(1), wc, zerof) wd = torch.where( (torch.eq(x1_c, x1) & torch.eq(y1_c, y1)).unsqueeze(1), wd, zerof) zeros = torch.zeros( size=[int(num_batch) * int(height) * int(width), int(channels)], dtype=torch.float) output = zeros.to(im.get_device()) output = output.scatter_add(dim=0, index=idx_a.long().unsqueeze(1).repeat( 1, channels), src=im_flat * wa) output = output.scatter_add(dim=0, index=idx_b.long().unsqueeze(1).repeat( 1, channels), src=im_flat * wb) output = output.scatter_add(dim=0, index=idx_c.long().unsqueeze(1).repeat( 1, channels), src=im_flat * wc) output = output.scatter_add(dim=0, index=idx_d.long().unsqueeze(1).repeat( 1, channels), src=im_flat * wd) return output
def pytorch_rand_select_pixel(width,height,num_samples=1): two_rand_numbers = torch.rand(2,num_samples) two_rand_numbers[0,:] = two_rand_numbers[0,:]*width two_rand_numbers[1,:] = two_rand_numbers[1,:]*height two_rand_ints = torch.floor(two_rand_numbers).type(dtype_long) return (two_rand_ints[0], two_rand_ints[1])
def random_int_tensor(seed, size, low=0, high=2**32, a=22695477, c=1, m=2**32): """ Same as random_float_tensor but integers between [low, high) """ return ( torch.floor(random_float_tensor(seed, size, a, c, m) * (high - low)) + low).to(torch.int64)
def _PyramidRoI_Feat(self, feat_maps, rois, im_info): ''' roi pool on pyramid feature maps''' # do roi pooling based on predicted rois img_area = im_info[0][0] * im_info[0][1] h = rois.data[:, 4] - rois.data[:, 2] + 1 w = rois.data[:, 3] - rois.data[:, 1] + 1 roi_level = torch.log(torch.sqrt(h * w) / 224.0) / np.log(2) roi_level = torch.floor(roi_level + 4) # -------- # roi_level = torch.log(torch.sqrt(h * w) / 224.0) # roi_level = torch.round(roi_level + 4) # ------ roi_level[roi_level < 2] = 2 roi_level[roi_level > 5] = 5 # roi_level.fill_(5) if cfg.POOLING_MODE == 'crop': # pdb.set_trace() # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5)) # NOTE: need to add pyrmaid grid_xy = _affine_grid_gen(rois, feat_maps.size()[2:], self.grid_size) ## grid_yx = torch.stack( [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]], 3).contiguous() roi_pool_feat = self.RCNN_roi_crop(feat_maps, Variable(grid_yx).detach()) ## if cfg.CROP_RESIZE_WITH_MAX_POOL: roi_pool_feat = F.max_pool2d(roi_pool_feat, 2, 2) elif cfg.POOLING_MODE == 'align': roi_pool_feats = [] box_to_levels = [] for i, l in enumerate(range(2, 6)): if (roi_level == l).sum() == 0: continue idx_l = (roi_level == l).nonzero().squeeze() box_to_levels.append(idx_l) scale = feat_maps[i].size(2) / im_info[0][0] feat = self.RCNN_roi_align(feat_maps[i], rois[idx_l], scale) roi_pool_feats.append(feat) roi_pool_feat = torch.cat(roi_pool_feats, 0) box_to_level = torch.cat(box_to_levels, 0) idx_sorted, order = torch.sort(box_to_level) roi_pool_feat = roi_pool_feat[order] elif cfg.POOLING_MODE == 'pool': roi_pool_feats = [] box_to_levels = [] for i, l in enumerate(range(2, 6)): if (roi_level == l).sum() == 0: continue idx_l = (roi_level == l).nonzero().squeeze() box_to_levels.append(idx_l) scale = feat_maps[i].size(2) / im_info[0][0] feat = self.RCNN_roi_pool(feat_maps[i], rois[idx_l], scale) roi_pool_feats.append(feat) roi_pool_feat = torch.cat(roi_pool_feats, 0) box_to_level = torch.cat(box_to_levels, 0) idx_sorted, order = torch.sort(box_to_level) roi_pool_feat = roi_pool_feat[order] return roi_pool_feat