def forward(self, c_ref, p_ref, tf, tm, tx, gm, loss_weight): # b,c,h,w // b,4 (y,x,h,w) # if first target frame (no tb) if tm is None: tm = ToCudaVariable([0.5 * torch.ones(gm.size())], requires_grad=False)[0] tb = self.masks2yxhw(tm, tx, scale=1.5) oh, ow = tf.size()[2], tf.size()[3] # original size fw_grid, bw_grid, theta = self.get_ROI_grid(tb, src_size=(oh, ow), dst_size=(256, 256), scale=1.0) # Sample target frame tf_roi = F.grid_sample(tf, fw_grid) tm_roi = F.grid_sample(torch.unsqueeze(tm, dim=1).float(), fw_grid)[:, 0] tx_roi = F.grid_sample(torch.unsqueeze(tx, dim=1).float(), fw_grid)[:, 0] # run Siamese Encoder tr5, tr4, tr3, tr2 = self.Encoder(tf_roi, tm_roi, tx_roi) if p_ref is None: a_ref = c_ref.detach() else: a_ref = self.SEFA(c_ref.detach(), p_ref.detach()) em_roi = self.Decoder(a_ref, tr5, tr4, tr3, tr2) ## Losses are computed within ROI # CE loss gm_roi = F.grid_sample(torch.unsqueeze(gm, dim=1).float(), fw_grid)[:, 0] gm_roi = gm_roi.detach() # CE loss CE = nn.CrossEntropyLoss(reduce=False) batch_CE = ToCudaVariable([torch.zeros(gm_roi.size()[0]) ])[0] # batch sized loss container sizes = [(256, 256), (64, 64), (32, 32), (16, 16), (8, 8)] for s in range(5): if s == 0: CE_s = CE(em_roi[s], torch.round(gm_roi).long()).mean(-1).mean( -1) # mean over h,w batch_CE += loss_weight[s] * CE_s else: if loss_weight[s]: gm_roi_s = torch.round( F.upsample(torch.unsqueeze(gm_roi, dim=1), size=sizes[s], mode='bilinear')[:, 0]).long() CE_s = CE(em_roi[s], gm_roi_s).mean(-1).mean(-1) # mean over h,w batch_CE += loss_weight[s] * CE_s # get final output via inverse warping em = F.grid_sample(F.softmax(em_roi[0], dim=1), bw_grid)[:, 1] return em, batch_CE, a_ref
def Encode_MS(val_F1, val_P1, scales): ref = {} for sc in scales: if sc != 1.0: msv_F1, msv_P1 = downsample([val_F1, val_P1], sc) msv_F1, msv_P1 = ToCudaVariable([msv_F1, msv_P1], volatile=True) ref[sc] = model.module.Encoder(msv_F1, msv_P1)[0] else: msv_F1, msv_P1 = ToCudaVariable([val_F1, val_P1], volatile=True) ref[sc] = model.module.Encoder(msv_F1, msv_P1)[0] return ref
def is_there_scribble(self, p, n): num_pixel_p = np.sum(p.data.cpu().numpy(), axis=(1, 2)) num_pixel_n = np.sum(n.data.cpu().numpy(), axis=(1, 2)) num_pixel = num_pixel_p + num_pixel_n yes = (num_pixel > 0).astype(np.float32) mulplier = 1 / (np.mean(yes) + 0.001) yes = yes * mulplier return ToCudaVariable([torch.from_numpy(yes.copy()).float()])[0]
def all2yxhw(self, mask, pos, neg, scale=1.0): np_mask = mask.data.cpu().numpy() np_pos = pos.data.cpu().numpy() np_neg = neg.data.cpu().numpy() np_yxhw = np.zeros((np_mask.shape[0], 4), dtype=np.float32) for b in range(np_mask.shape[0]): mys, mxs = np.where(np_mask[b] >= 0.49) pys, pxs = np.where(np_pos[b] >= 0.49) nys, nxs = np.where(np_neg[b] >= 0.49) all_ys = np.concatenate([mys, pys, nys]) all_xs = np.concatenate([mxs, pxs, nxs]) if all_ys.size == 0 or all_xs.size == 0: # if no pixel, return whole ymin, ymax = 0, np_mask.shape[1] xmin, xmax = 0, np_mask.shape[2] else: ymin, ymax = np.min(all_ys), np.max(all_ys) xmin, xmax = np.min(all_xs), np.max(all_xs) # make sure minimum 128 original size if (ymax - ymin) < 128: res = 128. - (ymax - ymin) ymin -= int(res / 2) ymax += int(res / 2) if (xmax - xmin) < 128: res = 128. - (xmax - xmin) xmin -= int(res / 2) xmax += int(res / 2) # apply scale # y = (ymax + ymin) / 2. # x = (xmax + xmin) / 2. orig_h = ymax - ymin + 1 orig_w = xmax - xmin + 1 ymin = np.maximum(-5, ymin - (scale - 1) / 2. * orig_h) ymax = np.minimum(np_mask.shape[1] + 5, ymax + (scale - 1) / 2. * orig_h) xmin = np.maximum(-5, xmin - (scale - 1) / 2. * orig_w) xmax = np.minimum(np_mask.shape[2] + 5, xmax + (scale - 1) / 2. * orig_w) # final ywhw y = (ymax + ymin) / 2. x = (xmax + xmin) / 2. h = ymax - ymin + 1 w = xmax - xmin + 1 yxhw = np.array([y, x, h, w], dtype=np.float32) np_yxhw[b] = yxhw return ToCudaVariable([torch.from_numpy(np_yxhw.copy()).float()])[0]
def init_variables(self, frames): self.all_F = torch.unsqueeze(torch.from_numpy(np.transpose(frames, (3, 0, 1, 2))).float() / 255., dim=0) # 1,3,t,h,w self.all_E = torch.zeros(1, self.num_frames, self.height, self.width) # 1,t,h,w self.prev_E = torch.zeros(1, self.num_frames, self.height, self.width) # 1,t,h,w self.dummy_M = torch.zeros(1, self.height, self.width).long() # to cuda self.all_F, self.all_E, self.prev_E, self.dummy_M = ToCudaVariable([self.all_F, self.all_E, self.prev_E, self.dummy_M], volatile=True) self.ref = None self.a_ref = None self.next_a_ref = None self.prev_targets = []
def Propagate_MS(ref, val_F2, val_P2, scales): h, w = val_F2.size()[2], val_F2.size()[3] msv_E2 = {} for sc in scales: if sc != 1.0: msv_F2, msv_P2 = downsample([val_F2, val_P2], sc) msv_F2, msv_P2 = ToCudaVariable([msv_F2, msv_P2], volatile=True) r5, r4, r3, r2 = model.module.Encoder(msv_F2, msv_P2) e2 = model.module.Decoder(r5, ref[sc], r4, r3, r2) msv_E2[sc] = upsample( F.softmax(e2[0], dim=1)[:, 1].data.cpu(), (h, w)) else: msv_F2, msv_P2 = ToCudaVariable([val_F2, val_P2], volatile=True) r5, r4, r3, r2 = model.module.Encoder(msv_F2, msv_P2) e2 = model.module.Decoder(r5, ref[sc], r4, r3, r2) msv_E2[sc] = F.softmax(e2[0], dim=1)[:, 1].data.cpu() val_E2 = torch.zeros(val_P2.size()) for sc in scales: val_E2 += msv_E2[sc] val_E2 /= len(scales) return val_E2
def get_ROI_grid(self, roi, src_size, dst_size, scale=1.): # scale height and width ry, rx, rh, rw = roi[:, 0], roi[:, 1], scale * roi[:, 2], scale * roi[:, 3] # convert ti minmax ymin = ry - rh / 2. ymax = ry + rh / 2. xmin = rx - rw / 2. xmax = rx + rw / 2. h, w = src_size[0], src_size[1] # theta theta = ToCudaVariable([torch.zeros(roi.size()[0], 2, 3)])[0] theta[:, 0, 0] = (xmax - xmin) / (w - 1) theta[:, 0, 2] = (xmin + xmax - (w - 1)) / (w - 1) theta[:, 1, 1] = (ymax - ymin) / (h - 1) theta[:, 1, 2] = (ymin + ymax - (h - 1)) / (h - 1) #inverse of theta inv_theta = ToCudaVariable([torch.zeros(roi.size()[0], 2, 3)])[0] det = theta[:, 0, 0] * theta[:, 1, 1] adj_x = -theta[:, 0, 2] * theta[:, 1, 1] adj_y = -theta[:, 0, 0] * theta[:, 1, 2] inv_theta[:, 0, 0] = w / (xmax - xmin) inv_theta[:, 1, 1] = h / (ymax - ymin) inv_theta[:, 0, 2] = adj_x / det inv_theta[:, 1, 2] = adj_y / det # make affine grid fw_grid = F.affine_grid( theta, torch.Size((roi.size()[0], 1, dst_size[0], dst_size[1]))) bw_grid = F.affine_grid( inv_theta, torch.Size( (roi.size()[0], 1, src_size[0], src_size[1]))) return fw_grid, bw_grid, theta