def graclus_out(pos, x, cluster): uniques = cluster.unique() new_nr = uniques.size(0) new_pos = torch.zero(new_nr, 3) new_x = torch.zero(new_nr, x.size(1), x.size(2)) for i in range(new_nr): cluster_id = cluster[i] new_pos[cluster_id] = pos[i] return
def JointEmbeddingLoss(fea_txt, fea_img, labels): batchsize = fea_img.size(0) num_class = fea_txt.size(1) score = torch.zero(batchsize, num_class) loss = 0 #acc_batch = 0 for i in range(batchsize): for j in range(num_class): score[i][j] = torch.dot(fea_img[i], fea_txt[:,j]) label_score = score[i, labels[i]] for j in range(num_class): if j != labels[i]: cur_score = score[i][j] thresh = cur_score - label_score + 1 if thresh > 0: loss += thresh txt_diff = fea_txt[:,j] - fea_txt[:, labels[i]] max_score, max_ix = score[i].max() if max_ix[1][1] == labels[i]: acc_batch += 1 #acc_batch = 100 * (acc_batch / batchsize) denom = batchsize * num_class return loss / denom
def calculate_l2(index, pair_list, output, labels, CUDA=True): same_label_loss = 0 same_label_count = 0 diff_label_loss = 0 diff_label_count = 0 for i in pair_list: if i == index: continue if labels[i] == labels[index]: same_label_loss += torch.dist(output[i], output[index], 2) same_label_count += 1 else: diff_label_loss += torch.dist(output[i], output[index], 2) diff_label_count += 1 if same_label_count == 0: if CUDA: return torch.Tensor([0 ]).cuda(0), diff_label_loss / diff_label_count else: return torch.Tensor([0]), diff_label_loss / diff_label_count elif diff_label_count == 0: if CUDA: return same_label_loss / same_label_count, torch.zero().cuda(0) else: return same_label_loss / same_label_count, torch.Tensor([0]) else: return same_label_loss / same_label_count, diff_label_loss / diff_label_count
def __getitem__(self, index): img_basename = self.list_sample[index] path_img = os.path.join(self.root_img, img_basename) path_seg = os.path.join(self.root_seg, img_basename.replace('.jpg', '.png')) assert os.path.exists(path_img), '[{}] does not exist'.format(path_img) assert os.path.exists(path_seg), '[{}] does not exist'.format(path_seg) # load image and label try: img = imread(path_img, mode='RGB') seg = imread(path_seg) assert (img.ndim == 3) assert (seg.ndim == 2) assert (img.shape[0] == seg.shape[0]) assert (img.shape[1] == seg.shape[1]) # random scale, crop, flip if self.imgSize > 0: img, seg = self._scale_and_corp(img, seg, self.imgSize, self.is_train) if random.choice([-1, 1]) > 0: img, seg = self._flip(img, seg) # image to float img = img.astype(np.float32) / 255 img = img.transpose(2, 0, 1) # label to int from -1 to 149 totall 151 seg = seg.astype(np.int) - 1 # to torch tensor image = torch.from_numpy(img) segmentation = torch.from_numpy(seg) except Exception as e: print('Failed loading image/segmentation [{}]: {}'.format( path_img, e)) # dummy data image = torch.zero(3, self.imgSize, self.imgSize) segmentation = -1 * torch.ones(self.segSize, self.segSize).long() return image, segmentation, img_basename # substracted by mean and divided by std image = self.img_transform(image) return image, segmentation, img_basename
def matrix2angle(matrix): """ ref: https://github.com/matthew-brett/transforms3d/blob/master/transforms3d/euler.py input size: ... * 3 * 3 output size: ... * 3 """ i = 0 j = 1 k = 2 dims = [dim for dim in matrix.shape] M = matrix.contiguous().view(-1, 3, 3) cy = torch.sqrt(M[:, i, i] * M[:, i, i] + M[:, j, i] * M[:, j, i]) if torch.max(cy).item() > 1e-15 * 4: ax = torch.atan2(M[:, k, j], M[:, k, k]) ay = torch.atan2(-M[:, k, i], cy) az = torch.atan2(M[:, j, i], M[:, i, i]) else: ax = torch.atan2(-M[:, j, k], M[:, j, j]) ay = torch.atan2(-M[:, k, i], cy) az = torch.zero(matrix.shape[:-1]) return torch.cat([torch.unsqueeze(ax, -1), torch.unsqueeze(ay, -1), torch.unsqueeze(az, -1)], -1).view(dims[:-1])
def forward(self, rnn_inputs): # TODO careful here, check num_steps = rnn_inputs.shape[1] # TODO: beware, rnn_inputs needs to be a vector of # shape (seq_len, batch_input_size) output, hidden = self.rnn(rnn_inputs, (self.h0, self.c0)) # add softmax layer # TODO check in corresponding tf code following line # also possible: self.softmax = nn.Softmax(dim=0); # out = self.softmax(output) out = F.softmax(output, dim=0) if not D_DIFF and G_DIFF: # depend on D_DIFF W = torch.randn((self.state_size, 1)) b = torch.zeros([1]) # logits_t = torch.matmul(output, W) + b logits_t = torch.mm(output, W) + b logits_t = F.elu(logits_t) + 1 logits_t = torch.cumsum(logits_t, dim=1) out = logits_t if MARK: W = torch.randn((self.state_size, 1)) b = torch.zeros([1]) logits_t = torch.mm(output, W) + b # redeclare W, b W = torch.randn((self.state_size, DIM_SIZE)) b = torch.zero([DIM_SIZE]) logits_prob = torch.mm(output, W) + b logits_prob = nn.Softmax(logits_prob) logits = torch.cat([logits_t, logits_prob], dim=1) logits.resize_(self.batch_size, num_steps, DIM_SIZE + 1) out = logits else: out.resize(self.batch_size, num_steps, 1) return out, hidden
def __init__(self, d_model, max_len=512): """ d_model: 一个标量。模型的维度,论文默认是512 max_seq_len: 一个标量。文本序列的最大长度 位置编码与词向量编码维度都是512,是为了能够相加在一起 Transformer位置编码的每一个维度对应正弦曲线,波长构成了从 2*pi 到 10000*2*pi 的等比数列。 注意:Bert/Gpt使用的是绝对位置编码,而Transformer使用的相对位置编码 """ super().__init__() pe = torch.zero(max_len, d_model).float() pe.require_grad = False position = torch.arange(0, max_len).float().unsqueenze(1) div_term = (torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model)).exp() pe[:, 0::2] = torch.sin(position * div_term) pe[:, 1::2] = torch.cos(position * div_term) pe = pe.unsqueeze(0) self.register_buffer('pe', pe)
def extract(self, root_dir: str, vocabs): path = "{}/output_images_features".format(root_dir) if not os.path.exists(path): os.makedirs(path) for i in range(len(vocabs)): vocab = vocabs[i] temp = torch.zero(1, 2048) vocab_images_path = "{}/output_vocab_images".format(root_dir) vocab_features = "{}/{}".format(path, vocab) vocab_images = [ "{}/{}".format(vocab_images_path, p) for p in os.listdir(vocab_images_path) ] for j in range(len(vocab_images)): vocab_image = vocab_images[j] image = cv2.imread(vocab_image) image = cv2.resize(image, (224, 244)) image = np.swapaxes(image, 0, 2) image = np.swapaxes(image, 1, 2) # resnet101 input image = Variable(torch.from_numpy(image).cuda().unsqueeze(0)) temp += model(image).squeeze(0).view(1, 2048) temp /= 10 torch.save(temp, vocab_features)
def create_new_state(self, batch_size: int): return torch.zero(batch_size, self.N)
def forward(self, loc, conf, dbox_list): """ 順伝播の計算を実行する ・input ⇒ loc:オフセット情報 ⇒ conf:検出の信頼度 ⇒ dbox_list:Dboxのリスト[8732,4] ・outtput ⇒ torch.Size([batch_num, 21, confのtop_k個分, BBox情報]) """ # 各サイズを取得 num_batch = loc.size(0) # バッチサイズ num_dbox = loc.size(1) # Dboxの数(8732個) num_classes = loc.size(2) # クラス数 # confはsoftmaxを用いて正規化する conf = self.softmax(conf) # 出力の型を作成する[batch_num, 21, confのtop_k個分, BBox情報] output = torch.zero(num_batch, num_classes, self.top_k, 5) # confの列を並び替える conf_pred = conf.transpose(2, 1) # ミニバッチごとのループ処理 for i in range(num_batch): # decodeを用いてlocとDbox情報からBboxを求める decoded_boxes = decode(loc[i], dbox_list) # confのコピーを作成 conf_score = conf_pred[i].clone() # クラスごとのループ処理(背景クラス(idx0)は処理しないため1から) for cl in range(1, num_classes): # conf_score:[21, 8732] ⇒ 各クラスのscoreをDbox毎に持っている cs = conf_score[cl] c_mask = cs.gt(self.conf_threshold ) # 信頼度が〇以上のものは1, それ以外は0となる(gt:greater_than) scores = cs[c_mask] # 閾値を超えたBboxの個数となる if scores.nelement( ) == 0: # nelement:要素の個数を算出する ⇒ つまりscoreが[]の場合は何もしない continue # c_mask([8732])をdecoded_scoreにも適応できるようにリサイズ l_mask = c_mask.unsqueeze(1).expand_as( decoded_boxes) # tensor([8732, 4]) # decoded_boxes[l_mask]で1次元になってしまうため、viewでtensor([閾値以上のBbox数, 4])とする boxes = decoded_boxes[l_mask].view(-1, 4) # non_maximum_supressionを実施し、被っているBboxは除去する # ids:confの降順にnon_maximum_supressionを通過したBboxのindexリスト # count:non_maximum_supressionを通過したBboxの数 ids, count = non_maximun_supression(boxes, scores, self.nms_threshold, self.top_k) # outputにBbox結果を格納する out[i, cl, :count] = torch.cat( (scores[ids[:count]].unsqueeze(1), boxes[ids[:count]]), 1) return output # tensor([1, 21, 200, 5]) ⇒ 1枚毎のBbox情報(non_maximum_supression済)
def __getitem__(self, index): """__getitem__ :param index: rstrip() : 删除字符串末尾的指定字符(默认为空格). os.path.basename(): 返回path最后的文件名. 如果path以/或\结尾,则返回空值. """ img_path = self.files[self.split][index].rstrip() label_path = os.path.join(self.annotations_base, os.path.basename(img_path)[:-4] + '.png') #print(img_path) #print(label_path) # assert whether path exists 断言路径是否存在 assert os.path.exists(img_path), '[{}] does not exist'.format(img_path) assert os.path.exists(label_path), '[{}] does not exist'.format(label_path) # load image and label try: img = m.imread(img_path, mode='RGB') #img = np.array(img, dtype=np.uint8) #img = img[:, :, ::-1] # RGB --> BGR !!! #print(img.shape) # eg. (512, 512, 3) label = m.imread(label_path) #label = np.array(label, dtype=np.uint8) #print(label.shape) # eg. (512, 512) assert(img.ndim == 3) assert(label.ndim == 2) assert(img.shape[0] == label.shape[0]) assert(img.shape[1] == label.shape[1]) # image to float # img = img[:, :, ::-1] # RGB --> BGR !!! #img = img.astype(np.float32)[:, :, ::-1] # 64 #label = label.astype(np.float32) # classes = np.unique(label) # scale and crop # flip if self.split == "training": random_flip = np.random.choice([-1, 0, 1, 2]) # print(random_flip) if random_flip == 1: #img, label = self._flip(img, label) img = cv2.flip(img, 1) label = cv2.flip(label, 1) elif random_flip == 0: img = cv2.flip(img, 0) label = cv2.flip(label, 0) else: img = img.copy() label = label.copy() """ elif random_flip == -1: img = cv2.flip(img, -1) label = cv2.flip(label, -1) """ if self.img_size[0] > 0 and self.img_size[1] > 0: img, label = self._scale_and_corp(img, label, self.img_size, self.split) # order默认是1,双线性. resize后image的范围又变为[0 - 1] # img = transform.resize(img, (self.img_size[0], self.img_size[1]), order=1) # img = m.imresize(img, (self.img_size[0], self.img_size[1]), interp='bilinear') # uint8 with RGB mode # img = cv2.resize(img.copy(), (self.img_size[0], self.img_size[1])) # image to float # image_ori = img.copy() # original image # img = img.astype(np.float32) # 64 img = img.astype(np.float32)[:, :, ::-1] # RGB --> BGR !!! # label = label.astype(np.float32) if self.img_norm: # Resize scales images from 0 to 255, thus we need to divide by 255.0 img = img.astype(np.float32) / 255.0 img = img.transpose((2, 0, 1)) # NHWC --> NCHW # classes = np.unique(label) # np.unique() :对于一维数组或列表,函数去除其中的重复元素,并按元素又小到大返回一个新的无元素重复的数组或列表 # label = label.astype(np.float64) # label = m.imresize(label, (self.img_size[0], self.img_size[1]), interp='nearest', mode='F') # print(classes) # print('label', np.unique(label)) # label = m.imresize(label, (self.img_size[0], self.img_size[1]), interp='nearest') # label to int from 0/-1 to 150/149 totall 151 label = label.astype(np.int) - 1 # label = label.astype(np.int) """ if not np.all(classes == np.unique(label)): print("WARN: resizing labels yielded fewer classes") """ if not np.all(np.unique(label) < self.n_classes): raise ValueError("Segmentation map contained invalid class values") if self.augmentations is not None: img, label = self.augmentations(img, label) if self.is_transform: img, label = self.transform(img, label) # to torch tensor image = torch.from_numpy(img) # segmentation = torch.from_numpy(label) segmentation = torch.from_numpy(label).long() # segmentation = segmentation - 1 # substracted by mean and divided by std # image = self.img_transfrom(image) except Exception as e: print('Failed loading image/label [{}]: {}'.format(img_path, e)) # dummy datw image = torch.zero(3, self.img_size[0], self.img_size[1]) # (C, H, W) segmentation = -1 * torch.ones(self.img_size[0], self.img_size[1]).long() return image, segmentation # substracted by mean and divided by std image = self.img_transfrom(image) return image, segmentation, img_path
def initHidden(self): return torch.zero(1, 1, self.hiddden_size, device=device)
def __getitem__(self, index): img_path = self.files[self.split][index].rstrip() label_path = os.path.join(self.annotations_base, os.path.basename(img_path)[:-4] + '.png') # assert whether path exists 断言路径是否存在 assert os.path.exists(img_path), '[{}] does not exist'.format(img_path) assert os.path.exists(label_path), '[{}] does not exist'.format( label_path) # load image and label try: img = m.imread(img_path, mode='RGB') #img = np.array(img, dtype=np.uint8) img = img[:, :, ::-1] # RGB --> BGR !!! #print(img.shape) # eg. (512, 512, 3) label = m.imread(label_path) #label = np.array(label, dtype=np.uint8) #print(label.shape) # eg. (512, 512) assert (img.ndim == 3) assert (label.ndim == 2) assert (img.shape[0] == label.shape[0]) assert (img.shape[1] == label.shape[1]) """ # random scale , crop, flip if self.img_size[0] > 0 and self.img_size[1] > 0: img, seg = self._scale_and_corp(img, label, self.img_size, self.split) """ # flip if self.split == "training": random_flip = np.random.choice([0, 1]) if random_flip == 1: img, label = self._flip(img, label) #img = cv2.flip(img, 1) #label = cv2.flip(label, 1) img = m.imresize(img, (self.img_size[0], self.img_size[1]), interp='bilinear') # uint8 with RGB mode #img = cv2.resize(img.copy(), (self.img_size[0], self.img_size[1])) # image to float # Resize scales images from 0 to 255, thus we need to divide by 255.0 img = img.astype(np.float32) / 255.0 img = img.transpose((2, 0, 1)) # NHWC --> NCHW label = m.imresize(label, (self.img_size[0], self.img_size[1]), interp='nearest', mode='F') # label to int from 0/-1 to 150/149 totall 151 label = label.astype(np.int) - 1 # to torch tensor image = torch.from_numpy(img) segmentation = torch.from_numpy(label).long() except Exception as e: print('Failed loading image/label [{}]: {}'.format(img_path, e)) # dummy datw image = torch.zero(3, self.img_size[0], self.img_size[1]) # (C, H, W) segmentation = -1 * torch.ones(self.img_size[0], self.img_size[1]).long() return image, segmentation # substracted by mean and divided by std image = self.img_transfrom(image) return image, segmentation, img_path
def update_recency_map(self, nn_indices): mask = Variable(torch.zero(*nn_indices.size()).cuda().fill_(1)) self.recency_map.scatter_add(0, nn_indices.view(-1), mask) self.recency_map.add_(-1).clamp_(0, 100) _, self.stale_ind = self.recency_map.min(0)