def _get_single_item(self, index): start_ind, end_ind, pid, label, camid = self.seqset[index] imgseq = [] flowseq = [] for ind in range(start_ind, end_ind): fname = self.identities[pid][camid][ind] fpath_img = osp.join(self.root[0], fname) imgrgb = Image.open(fpath_img).convert('RGB') fpath_flow = osp.join(self.root[1], fname) flowrgb = Image.open(fpath_flow).convert('RGB') imgseq.append(imgrgb) flowseq.append(flowrgb) while len(imgseq) < self.seq_len: imgseq.append(imgrgb) flowseq.append(flowrgb) seq = [imgseq, flowseq] if self.transform is not None: seq = self.transform(seq) img_tensor = torch.stack(seq[0], 0) if len(self.root) == 2: flow_tensor = torch.stack(seq[1], 0) else: flow_tensor = None return img_tensor, flow_tensor, pid, camid
def lk_forward_backward_batch(features, locations, window, steps): sequence, C, H, W = list(features.size()) seq, num_pts, _ = list(locations.size()) assert seq == sequence, '{:} vs {:}'.format(features.size(), locations.size()) previous_pts = [ locations[0] ] for iseq in range(1, sequence): feature_old = features.narrow(0, iseq-1, 1) feature_new = features.narrow(0, iseq , 1) nextPts = lk_tensor_track_batch(feature_old, feature_new, previous_pts[iseq-1], window, steps, None) previous_pts.append(nextPts) fback_pts = [None] * (sequence-1) + [ previous_pts[-1] ] for iseq in range(sequence-2, -1, -1): feature_old = features.narrow(0, iseq+1, 1) feature_new = features.narrow(0, iseq , 1) backPts = lk_tensor_track_batch(feature_old, feature_new, fback_pts[iseq+1] , window, steps, None) fback_pts[iseq] = backPts back_pts = [None] * (sequence-1) + [ locations[-1] ] for iseq in range(sequence-2, -1, -1): feature_old = features.narrow(0, iseq+1, 1) feature_new = features.narrow(0, iseq , 1) backPts = lk_tensor_track_batch(feature_old, feature_new, back_pts[iseq+1] , window, steps, None) back_pts[iseq] = backPts return torch.stack(previous_pts), torch.stack(fback_pts), torch.stack(back_pts)
def forward(self, hidden, encoder_outputs, attn_mask): # Create variable to store attention energies # hidden is 16 by 512 # encoder_outputs is 16 by 72 by 512 # this just uses the top layer of the 2-layer decoder. # okay? hidden = hidden.squeeze(0) batch_size = hidden.size()[0] attn_energies = [] for i in range(batch_size): attn_energies.append(self.score(hidden[i], encoder_outputs[i])) attn_energies = torch.stack(attn_energies).squeeze(0) # attn_energies is 32 by 72 if attn_mask is not None: attn_energies = attn_mask * attn_energies attn_energies[attn_energies == 0] = -1e10 # i want to mask the attention energies if attn_mask is None: attn_energies = attn_energies.view(1, -1) attn_energies = self.softmax(attn_energies) context_vectors = [] for i in range(batch_size): context_vectors.append(torch.matmul(attn_energies[i], encoder_outputs[i])) context_vectors = torch.stack(context_vectors) return context_vectors
def process_batch_for_length(self, sequences, c_sequences): """ Assemble and pad data. """ assert len(sequences) == len(c_sequences) lengths = Variable(self.tensor_type([len(seq) for seq in sequences])) max_length = max(len(seq) for seq in sequences) max_c_length = max(max(len(chars) for chars in seq) for seq in c_sequences) def _padded(seq, max_length): _padded_seq = self.tensor_type(max_length).zero_() _padded_seq[:len(seq)] = self.tensor_type(seq) return _padded_seq sequences = Variable(torch.stack( [_padded(seq, max_length) for seq in sequences])) def _padded_char(seq, max_length, max_c_length): _padded = self.tensor_type(max_length, max_c_length).zero_() for ind, tok in enumerate(seq): _padded[ind, :len(tok)] = self.tensor_type(tok) return _padded c_sequences = Variable(torch.stack([ _padded_char(seq, max_length, max_c_length) for seq in c_sequences])) return (sequences, c_sequences, lengths)
def __getitem__(self, index): if self.mode == 'test': img_path, img_name = self.imgs[index] img = Image.open(os.path.join(img_path, img_name + '.jpg')).convert('RGB') if self.transform is not None: img = self.transform(img) return img_name, img img_path, mask_path = self.imgs[index] img = Image.open(img_path).convert('RGB') if self.mode == 'train': mask = sio.loadmat(mask_path)['GTcls']['Segmentation'][0][0] mask = Image.fromarray(mask.astype(np.uint8)) else: mask = Image.open(mask_path) if self.joint_transform is not None: img, mask = self.joint_transform(img, mask) if self.sliding_crop is not None: img_slices, mask_slices, slices_info = self.sliding_crop(img, mask) if self.transform is not None: img_slices = [self.transform(e) for e in img_slices] if self.target_transform is not None: mask_slices = [self.target_transform(e) for e in mask_slices] img, mask = torch.stack(img_slices, 0), torch.stack(mask_slices, 0) return img, mask, torch.LongTensor(slices_info) else: if self.transform is not None: img = self.transform(img) if self.target_transform is not None: mask = self.target_transform(mask) return img, mask
def _construct_previous(self, layer, direction, inputs, tree, idx): if direction == 'up': oidx = tree.children_idx(idx) else: oidx = tree.parents_idx(idx) if oidx: h_prev, c_prev = [], [] for i in oidx: h_prev_i, c_prev_i = self._upward_downward(layer, direction, inputs, tree, i) h_prev.append(h_prev_i) c_prev.append(c_prev_i) h_prev = torch.stack(h_prev, 1) c_prev = torch.stack(c_prev, 1) elif inputs.is_cuda: h_prev = torch.zeros(self.hidden_size, 1).cuda() c_prev = torch.zeros(self.hidden_size, 1).cuda() else: h_prev = torch.zeros(self.hidden_size, 1) c_prev = torch.zeros(self.hidden_size, 1) return oidx, (h_prev, c_prev)
def singleTagLoss(pred_tag, keypoints): """ associative embedding loss for one image """ eps = 1e-6 tags = [] pull = 0 for i in keypoints: tmp = [] for j in i: if j[1]>0: tmp.append(pred_tag[j[0]]) if len(tmp) == 0: continue tmp = torch.stack(tmp) tags.append(torch.mean(tmp, dim=0)) pull = pull + torch.mean((tmp - tags[-1].expand_as(tmp))**2) if len(tags) == 0: return make_input(torch.zeros([1]).float()), make_input(torch.zeros([1]).float()) tags = torch.stack(tags)[:,0] num = tags.size()[0] size = (num, num, tags.size()[1]) A = tags.unsqueeze(dim=1).expand(*size) B = A.permute(1, 0, 2) diff = A - B diff = torch.pow(diff, 2).sum(dim=2)[:,:,0] push = torch.exp(-diff) push = (torch.sum(push) - num) return push/((num - 1) * num + eps) * 0.5, pull/(num + eps)
def predict(self, x_de, x_en): bs = x_de.size(0) emb_de = self.embedding_de(x_de) # bs,n_de,word_dim emb_en = self.embedding_en(x_en) # bs,n_en,word_dim h = Variable(torch.zeros(self.n_layers*self.directions, bs, self.hidden_dim).cuda()) c = Variable(torch.zeros(self.n_layers*self.directions, bs, self.hidden_dim).cuda()) enc_h, _ = self.encoder(emb_de, (h, c)) dec_h, _ = self.decoder(emb_en, (h, c)) # all the same. enc_h is bs,n_de,hiddensz*n_directions. h and c are both n_layers*n_directions,bs,hiddensz if self.directions == 2: enc_h = self.dim_reduce(enc_h) # bs,n_de,hiddensz scores = torch.bmm(enc_h, dec_h.transpose(1,2)) # (bs,n_de,hiddensz) * (bs,hiddensz,n_en) = (bs,n_de,n_en) y = [Variable(torch.cuda.LongTensor([sos_token]*bs))] # bs self.attn = [] for t in range(x_en.size(1)-1): # iterate over english words, with teacher forcing attn_dist = F.softmax(scores[:,:,t],dim=1) # bs,n_de self.attn.append(attn_dist.data) if self.attn_type == "hard": _, argmax = attn_dist.max(1) # bs. for each batch, select most likely german word to pay attention to one_hot = Variable(torch.zeros_like(attn_dist.data).scatter_(-1, argmax.data.unsqueeze(1), 1).cuda()) context = torch.bmm(one_hot.unsqueeze(1), enc_h).squeeze(1) else: context = torch.bmm(attn_dist.unsqueeze(1), enc_h).squeeze(1) # the difference btwn hard and soft is just whether we use a one_hot or a distribution # context is bs,hiddensz pred = self.vocab_layer(torch.cat([dec_h[:,t,:], context], 1)) # bs,len(EN.vocab) _, next_token = pred.max(1) # bs y.append(next_token) self.attn = torch.stack(self.attn, 0).transpose(0, 1) # bs,n_en,n_de (for visualization!) y = torch.stack(y,0).transpose(0,1) # bs,n_en return y,self.attn
def random_sample(batch): imgids, sentids, imgfeats, textfeats = batch ### image as anchor anchor_img, positive_text, negative_text = [],[],[] for i,iid in enumerate(imgids): for j,iid2 in enumerate(imgids): if iid!=iid2: anchor_img.append(imgfeats[i]) positive_text.append(textfeats[i]) negative_text.append(textfeats[j]) anchor_img, positive_text, negative_text = torch.stack(anchor_img), torch.stack(positive_text), torch.stack(negative_text) ### text as anchof anchor_text, positive_img, negative_img = [],[],[] for i,iid in enumerate(imgids): for j,iid2 in enumerate(imgids): if iid!=iid2: anchor_text.append(textfeats[i]) positive_img.append(imgfeats[i]) negative_img.append(imgfeats[j]) anchor_text, positive_img, negative_img = torch.stack(anchor_text), torch.stack(positive_img), torch.stack(negative_img) positive_text = positive_text.type(torch.FloatTensor) negative_text = negative_text.type(torch.FloatTensor) return anchor_img, positive_text, negative_text, anchor_text, positive_img, negative_img
def __getitem__(self, index): img_path, mask_path = self.imgs[index] img, mask = Image.open(img_path).convert('RGB'), Image.open(mask_path) mask = np.array(mask) mask_copy = mask.copy() for k, v in self.id_to_trainid.items(): mask_copy[mask == k] = v mask = Image.fromarray(mask_copy.astype(np.uint8)) if self.joint_transform is not None: img, mask = self.joint_transform(img, mask) if self.sliding_crop is not None: img_slices, mask_slices, slices_info = self.sliding_crop(img, mask) if self.transform is not None: img_slices = [self.transform(e) for e in img_slices] if self.target_transform is not None: mask_slices = [self.target_transform(e) for e in mask_slices] img, mask = torch.stack(img_slices, 0), torch.stack(mask_slices, 0) return img, mask, torch.LongTensor(slices_info) else: if self.transform is not None: img = self.transform(img) if self.target_transform is not None: mask = self.target_transform(mask) return img, mask
def forward(self, z_seq, a_seq, term_seq): # x: [B,2,84,84] # T = x.size()[0] h = torch.zeros(1,self.h_size).cuda() z_losses = [] term_losses = [] for t in range(len(term_seq)-1): inter = self.encode_az(a_seq[t], z_seq[t]) h = self.update_h(h, inter) z_pred, term_pred = self.predict_output(h, inter) z_loss = torch.mean((z_seq[t+1] - z_pred)**2) term_loss = F.binary_cross_entropy_with_logits(input=term_pred, target=term_seq[t+1]) z_losses.append(z_loss) term_losses.append(term_loss) z_loss = torch.mean(torch.stack(z_losses)) term_loss = torch.mean(torch.stack(term_losses)) loss = z_loss + term_loss return loss, z_loss, term_loss
def default_collate(batch): "Puts each data field into a tensor with outer dimension batch size" if torch.is_tensor(batch[0]): out = None if _use_shared_memory: # If we're in a background process, concatenate directly into a # shared memory tensor to avoid an extra copy numel = sum([x.numel() for x in batch]) storage = batch[0].storage()._new_shared(numel) out = batch[0].new(storage) return torch.stack(batch, 0, out=out) elif type(batch[0]).__module__ == 'numpy': elem = batch[0] if type(elem).__name__ == 'ndarray': return torch.stack([torch.from_numpy(b) for b in batch], 0) if elem.shape == (): # scalars py_type = float if elem.dtype.name.startswith('float') else int return numpy_type_map[elem.dtype.name](list(map(py_type, batch))) elif isinstance(batch[0], int): return torch.LongTensor(batch) elif isinstance(batch[0], float): return torch.DoubleTensor(batch) elif isinstance(batch[0], string_classes): return batch elif isinstance(batch[0], collections.Mapping): return {key: default_collate([d[key] for d in batch]) for key in batch[0]} elif isinstance(batch[0], collections.Sequence): transposed = zip(*batch) return [default_collate(samples) for samples in transposed] raise TypeError(("batch must contain tensors, numbers, dicts or lists; found {}" .format(type(batch[0]))))
def predict(self, x, attn_type = "hard"): #predict with greedy decoding emb = self.embedding(x) h = Variable(torch.zeros(1, x.size(0), self.hidden_dim)) c = Variable(torch.zeros(1, x.size(0), self.hidden_dim)) enc_h, _ = self.encoder(emb, (h, c)) y = [Variable(torch.zeros(x.size(0)).long())] self.attn = [] for t in range(x.size(1)): emb_t = self.embedding(y[-1]) dec_h, (h, c) = self.decoder(emb_t.unsqueeze(1), (h, c)) scores = torch.bmm(enc_h, dec_h.transpose(1,2)).squeeze(2) attn_dist = F.softmax(scores, dim = 1) self.attn.append(attn_dist.data) if attn_type == "hard": _, argmax = attn_dist.max(1) one_hot = Variable(torch.zeros_like(attn_dist.data).scatter_(-1, argmax.data.unsqueeze(1), 1)) context = torch.bmm(one_hot.unsqueeze(1), enc_h).squeeze(1) else: context = torch.bmm(attn_dist.unsqueeze(1), enc_h).squeeze(1) pred = self.vocab_layer(torch.cat([dec_h.squeeze(1), context], 1)) _, next_token = pred.max(1) y.append(next_token) self.attn = torch.stack(self.attn, 0).transpose(0, 1) return torch.stack(y, 0).transpose(0, 1)
def forward(self, input_): #init hidden state with xavier vert_state = torch.zeros(input_[0].size(1), self.vert_state_dim).cuda() edge_state = torch.zeros(input_[1].size(1), self.edge_state_dim).cuda() '''if self.gpu_mode >= 0: vert_state = torch.tensor(vert_state.cuda()) edge_state = torch.tensor(edge_state.cuda())''' batch_size = input_[0].size(0) vert_input = input_[0] edge_input = input_[1] #print('vert and edge input', vert_input.size(), edge_input.size()) vert_state_list = [] edge_state_list = [] #todo: can this be parallelized? for i in range(batch_size): torch.nn.init.xavier_uniform(vert_state) torch.nn.init.xavier_uniform(edge_state) vert_state = self.vert_gru(vert_input[i], vert_state) edge_state = self.edge_gru(edge_input[i], edge_state) #todo: check whether this way is correct, TF code uses a separate global var to keep hidden state for i in range(self.num_steps): edge_context = self.get_edge_context(edge_state, vert_state) vert_context = self.get_vert_context(vert_state, edge_state) edge_state = self.edge_gru(edge_context, edge_state) vert_state = self.vert_gru(vert_context, vert_state) vert_state_list.append(vert_state) edge_state_list.append(edge_state) return torch.stack(vert_state_list), torch.stack(edge_state_list)
def rollouts_batch(self, batch): batch_size = batch.size()[0] batch_rest = batch.size()[1:] if batch_size == 1: obs_batch_v = batch.expand(batch_size * self.n_actions, *batch_rest) else: obs_batch_v = batch.unsqueeze(1) obs_batch_v = obs_batch_v.expand(batch_size, self.n_actions, *batch_rest) obs_batch_v = obs_batch_v.contiguous().view(-1, *batch_rest) actions = np.tile(np.arange(0, self.n_actions, dtype=np.int64), batch_size) step_obs, step_rewards = [], [] for step_idx in range(self.rollout_steps): actions_t = torch.tensor(actions).to(batch.device) obs_next_v, reward_v = self.net_em(obs_batch_v, actions_t) step_obs.append(obs_next_v.detach()) step_rewards.append(reward_v.detach()) # don't need actions for the last step if step_idx == self.rollout_steps-1: break # combine the delta from EM into new observation cur_plane_v = obs_batch_v[:, 1:2] new_plane_v = cur_plane_v + obs_next_v obs_batch_v = torch.cat((cur_plane_v, new_plane_v), dim=1) # select actions logits_v, _ = self.net_policy(obs_batch_v) probs_v = F.softmax(logits_v, dim=1) probs = probs_v.data.cpu().numpy() actions = self.action_selector(probs) step_obs_v = torch.stack(step_obs) step_rewards_v = torch.stack(step_rewards) flat_enc_v = self.encoder(step_obs_v, step_rewards_v) return flat_enc_v.view(batch_size, -1)
def encode(self, article, art_lens=None): size = ( self._init_enc_h.size(0), len(art_lens) if art_lens else 1, self._init_enc_h.size(1) ) init_enc_states = ( self._init_enc_h.unsqueeze(1).expand(*size), self._init_enc_c.unsqueeze(1).expand(*size) ) enc_art, final_states = lstm_encoder( article, self._enc_lstm, art_lens, init_enc_states, self._embedding ) if self._enc_lstm.bidirectional: h, c = final_states final_states = ( torch.cat(h.chunk(2, dim=0), dim=2), torch.cat(c.chunk(2, dim=0), dim=2) ) init_h = torch.stack([self._dec_h(s) for s in final_states[0]], dim=0) init_c = torch.stack([self._dec_c(s) for s in final_states[1]], dim=0) init_dec_states = (init_h, init_c) attention = torch.matmul(enc_art, self._attn_wm).transpose(0, 1) init_attn_out = self._projection(torch.cat( [init_h[-1], sequence_mean(attention, art_lens, dim=1)], dim=1 )) return attention, (init_dec_states, init_attn_out)
def forward(self, y_pred, y_true, eps=1e-6): return NotImplementedError torch.nn.modules.loss._assert_no_grad(y_true) assert y_pred.shape[1] == 2 same_left = torch.stack([y_true[:, 0], y_pred[:, 0]], dim=1) same_left, _ = torch.max(same_left, dim=1) same_right = torch.stack([y_true[:, 1], y_pred[:, 1]], dim=1) same_right, _ = torch.min(same_right, dim=1) same_len = same_right - same_left + 1 # (batch_size,) same_len = torch.stack([same_len, torch.zeros_like(same_len)], dim=1) same_len, _ = torch.max(same_len, dim=1) same_len = same_len.type(torch.float) pred_len = (y_pred[:, 1] - y_pred[:, 0] + 1).type(torch.float) true_len = (y_true[:, 1] - y_true[:, 0] + 1).type(torch.float) pre = same_len / (pred_len + eps) rec = same_len / (true_len + eps) f1 = 2 * pre * rec / (pre + rec + eps) return -torch.mean(f1)
def setUp(self, size=(2, 5), batch=3, dtype=torch.float64, device=None, seed=None, mu=None, cov=None, A=None, b=None): '''Test the correctness of batch implementation of mean(). This function will stack `[1 * mu, 2 * mu, ..., batch * mu]`. Then, it will see whether the batch output is accurate or not. Args: size: Tuple size of matrix A. batch: The batch size > 0. dtype: data type. device: In which device. seed: Seed for the random number generator. mu: To test a specific mean mu. cov: To test a specific covariance matrix. A: To test a specific A matrix. b: To test a specific bias b. ''' if seed is not None: torch.manual_seed(seed) if A is None: A = torch.rand(size, dtype=dtype, device=device) if b is None: b = torch.rand(size[0], dtype=dtype, device=device) if mu is None: mu = torch.rand(size[1], dtype=dtype, device=device) if cov is None: cov = rand.definite(size[1], dtype=dtype, device=device, positive=True, semi=False, norm=10**2) self.A = A self.b = b var = torch.diag(cov) self.batch_mean = torch.stack([(i + 1) * mu for i in range(batch)]) self.batch_cov = torch.stack([(i + 1) * cov for i in range(batch)]) self.batch_var = torch.stack([(i + 1) * var for i in range(batch)])
def plot_rec(x, netEC, netEP, netD): x_c = x[0] x_p = x[np.random.randint(1, opt.max_step)] h_c = netEC(x_c) h_p = netEP(x_p) # print('h_c shape: ', h_c.shape) # print('h p shape: ', h_p.shape) rec = netD([h_c, h_p]) x_c, x_p, rec = x_c.data, x_p.data, rec.data fname = '%s/rec/rec_test.png' % (opt.log_dir) comparison = None for i in range(len(x_c)): if comparison is None: comparison = torch.stack([x_c[i], x_p[i], rec[i]]) else: new_comparison = torch.stack([x_c[i], x_p[i], rec[i]]) comparison = torch.cat([comparison, new_comparison]) print('comparison: ', comparison.shape) # row_sz = 5 # nplot = 20 # for i in range(0, nplot - row_sz, row_sz): # row = [[xc, xp, xr] for xc, xp, xr in zip(x_c[i:i + row_sz], x_p[i:i + row_sz], rec[i:i + row_sz])] # print('row: ', row) # to_plot.append(list(itertools.chain(*row))) # print(len(to_plot[0])) # utils.save_tensors_image(fname, comparison) if not os.path.exists(os.path.dirname(fname)): os.makedirs(os.path.dirname(fname)) save_image(comparison.cpu(), fname, nrow=3)
def collate_fn(self, batch): '''Pad images and encode targets. As for images are of different sizes, we need to pad them to the same size. Args: batch: (list) of images, cls_targets, loc_targets. Returns: padded images, stacked cls_targets, stacked loc_targets. ''' imgs = [x[0] for x in batch] boxes = [x[1] for x in batch] labels = [x[2] for x in batch] h = w = self.input_size num_imgs = len(imgs) inputs = torch.zeros(num_imgs, 3, h, w) loc_targets = [] cls_targets = [] for i in range(num_imgs): inputs[i] = imgs[i] loc_target, cls_target = self.encoder.encode(boxes[i], labels[i], input_size=(w,h)) loc_targets.append(loc_target) cls_targets.append(cls_target) return inputs, torch.stack(loc_targets), torch.stack(cls_targets)
def adpW(self,x): ''' calculate the pairwise_att of everypair of inputs output_size: (x.size(0),x.size(1)/2) ''' x = x.detach() x = self.adp_metric_embedding1(x) x = self.adp_metric_embedding1_bn(x) x = F.relu(x) x = self.adp_metric_embedding2(x) x = self.adp_metric_embedding2_bn(x) x = F.relu(x) x = self.adp_metric_embedding3(x) x = self.adp_metric_embedding3_bn(x) x = F.relu(x) pairwise_att = F.sigmoid(self.adp_metric_embedding4(x)) # x = self.adp_metric_embedding2_bn(x) diag_matrix1 = [] diag_matrix2 = [] for i in range(x.size(0)): diag_matrix1.append(torch.diag(pairwise_att[i, :x.size(1)/2])) for i in range(x.size(0)): diag_matrix2.append(torch.diag(pairwise_att[i, x.size(1)/2:])) pairwise_att1 = torch.stack(diag_matrix1) pairwise_att2 = torch.stack(diag_matrix1) return pairwise_att1, pairwise_att2
def forward(self, inputs=None, encoder_hidden=None, encoder_outputs=None, pg_encoder_states=None, function=F.log_softmax, teacher_forcing_ratio=0, context_embedding=None): ret_dict = dict() ret_dict[DecoderRNNFB.KEY_ATTN_SCORE] = list() inputs, batch_size, max_length = self._validate_args(inputs, encoder_hidden, encoder_outputs, function, teacher_forcing_ratio) decoder_hidden = self._init_state(encoder_hidden) use_teacher_forcing = True if teacher_forcing_ratio == 1 else False if use_teacher_forcing: decoder_input = inputs[:, :-1] decoder_outputs, decoder_output_states, decoder_hidden, attn = \ self.forward_step(decoder_input, pg_encoder_states, decoder_hidden, encoder_outputs, context_embedding) else: decoder_outputs = [] decoder_output_states = [] sequence_symbols = [] lengths = np.array([max_length] * batch_size) def decode(step, step_output, step_output_state=None, step_attn=None): if step_output_state is not None: decoder_outputs.append(step_output) decoder_output_states.append(step_output_state) ret_dict[DecoderRNNFB.KEY_ATTN_SCORE].append(step_attn) symbols = step_output.topk(1)[1] sequence_symbols.append(symbols) eos_batches = symbols.data.eq(self.eos_id) if eos_batches.dim() > 0: eos_batches = eos_batches.cpu().view(-1).numpy() update_idx = ((lengths > step) & eos_batches) != 0 lengths[update_idx] = len(sequence_symbols) return symbols decoder_input = inputs[:, 0].unsqueeze(1) for di in range(max_length): decoder_output, decoder_output_state, decoder_hidden, step_attn = \ self.forward_step(decoder_input, pg_encoder_states, decoder_hidden, encoder_outputs, context_embedding) # # not allow decoder to output UNK decoder_output[:, :, 3] = -float('inf') step_output = decoder_output.squeeze(1) step_output_state = decoder_output_state.squeeze(1) symbols = decode(di, step_output, step_output_state, step_attn) decoder_input = symbols decoder_outputs = torch.stack(decoder_outputs, dim=1) decoder_output_states = torch.stack(decoder_output_states, dim=1) ret_dict[DecoderRNNFB.KEY_SEQUENCE] = sequence_symbols ret_dict[DecoderRNNFB.KEY_LENGTH] = lengths.tolist() return decoder_outputs, decoder_output_states, ret_dict
def backward(ctx, grad_output): input, = ctx.saved_tensors grad_input = torch.stack((grad_output, torch.zeros_like(grad_output)), dim=len(grad_output.shape)) phase_input = angle(input) phase_input = torch.stack((torch.cos(phase_input), torch.sin(phase_input)), dim=len(grad_output.shape)) grad_input = multiply_complex(phase_input, grad_input) return 0.5*grad_input
def collate_fn(self, data): x, y, lens = zip(*data) max_len = max(lens) x = torch.stack(x)[:, :max_len] y = torch.stack(y)[:, :max_len] lens = torch.tensor(lens) return x, y, lens
def post_process_latents(latents): z_where, z_pres = latents z_where = [z.cpu() for z in z_where] z_pres = [z.cpu() for z in z_pres] z_where_t = torch.stack(z_where).transpose(0, 1) z_pres_t = torch.stack(z_pres).transpose(0, 1) out = [] for z_where_i, z_pres_i in zip(z_where_t, z_pres_t): out.append([z_obj._make(torch.cat([zw.data, zp.data])) for zw, zp in zip(z_where_i, z_pres_i)]) return out
def predict_batchwise(model, dataloader): with torch.no_grad(): X, Y = zip(*[ [x, y] for X, Y in dataloader for x, y in zip( model(X.cuda()).cpu(), Y ) ]) return torch.stack(X), torch.stack(Y)
def collate_fn(self, data): x, y, lens = zip( *sorted(data, key=lambda x: x[-1], reverse=True) ) max_len = lens[0] x = torch.stack(x)[:, :max_len] y = torch.stack(y)[:, :max_len] lens = torch.tensor(lens) return x, y, lens
def update(self, done): # print(self.action_indices) if done: # without bootstrap R_rev = [Variable(T.from_numpy(np.zeros(1, dtype=np.float32)))] A_rev = [Variable(T.from_numpy(np.zeros(1, dtype=np.float32)))] self.V_preds.append(0) else: # with bootstrap R_rev = [self.V_preds[-1]] A_rev = [Variable(T.from_numpy(np.zeros(1, dtype=np.float32)))] self.R_preds = self.R_preds[:-1] # delete bootstrap element self.rewards = self.rewards[:-1] # accumulated rewards r_rev = self.rewards[::-1] for r in r_rev: R_rev.append(r + GAMMA*R_rev[-1]) R = T.stack(R_rev[1:][::-1]) # (TRAIN_INTERVAL, 1) # advantages N = len(r_rev) assert len(self.V_preds) == N+1 for i in range(N): delta = r_rev[i] + GAMMA*self.V_preds[N-i] - self.V_preds[N-i-1] A_rev.append(delta + GAMMA*LAMBDA*A_rev[-1]) A = T.stack(A_rev[1:][::-1]) # MBP loss V_preds = T.stack(self.V_preds[:-1]) R_preds = T.stack(self.R_preds) #assert len(R) == len(R_preds) == len(V_preds) == len(A) R_loss = (T.sum((V_preds - R)*(V_preds - R)) + T.sum((R_preds - R)*(R_preds - R))) / 2. self.mbp_loss = self.mbp_loss + (ALPHA_RETURN * R_loss) self.mbp_loss = self.mbp_loss * ETA_MBP # Policy gradient A_ = 0 H = 0 self.actions = self.actions[1:] # delete initial action for i in range(N): log_pi = self.log_pies[i] # log_pi*T.from_numpy(np.array(self.actions[i]==1).astype("float32")) # A_ += A[i] * log_pi[self.actions[i]==1] _t = T.sum(log_pi*T.from_numpy(np.array(self.actions[i]==1).astype("float32"))).view(1) A_ = A_ + (A[i] * _t) H = H - T.matmul(T.exp(log_pi), log_pi) self.policy_loss = self.policy_loss + (A_[0] + ALPHA_ENTROPY*H ) # gradient ascend self.policy_loss = self.policy_loss * ETA_POLICY # update self.mbp_loss_log.append(self.mbp_loss.data) self.policy_loss_log.append(self.policy_loss.data) print("(mbp loss, policy loss): ", self.mbp_loss, self.policy_loss) return self.mbp_loss + self.policy_loss
def tagLoss(tags, keypoints): """ accumulate the tag loss for each image in the batch """ pushes, pulls = [], [] keypoints = keypoints.cpu().data.numpy() for i in range(tags.size()[0]): push, pull = singleTagLoss(tags[i], keypoints[i%len(keypoints)]) pushes.append(push) pulls.append(pull) return torch.stack(pushes), torch.stack(pulls)
def custom_collate_fn(batch): batch = zip(*batch) # transpose image, label, attributes, \ num_nonzero_attributes = batch image = torch.stack(image) label = torch.LongTensor(label) attributes = torch.stack([torch.LongTensor(a) for a in attributes]) padding_idx = torch.LongTensor(num_nonzero_attributes) return image, label, attributes, padding_idx
def forward(self, inputs): x = inputs # input shape: b,c,h,2w batch_size, c, h, w = x.size(0), x.size(1), x.size(2), x.size(3) // 2 block_size = h // self.scale value = self.f_value(x) query = self.f_query(x) key = self.f_key(x) value = torch.stack([value[:, :, :, :w], value[:, :, :, w:]], 4) # B*N*H*W*2 query = torch.stack([query[:, :, :, :w], query[:, :, :, w:]], 4) # B*N*H*W*2 key = torch.stack([key[:, :, :, :w], key[:, :, :, w:]], 4) # B*N*H*W*2 v_list = torch.split(value, block_size, dim=2) v_locals = torch.cat(v_list, 0) v_list = torch.split(v_locals, block_size, dim=3) v_locals = torch.cat(v_list) q_list = torch.split(query, block_size, dim=2) q_locals = torch.cat(q_list, 0) q_list = torch.split(q_locals, block_size, dim=3) q_locals = torch.cat(q_list) k_list = torch.split(key, block_size, dim=2) k_locals = torch.cat(k_list, 0) k_list = torch.split(k_locals, block_size, dim=3) k_locals = torch.cat(k_list) # self-attention func def func(value_local, query_local, key_local): batch_size_new = value_local.size(0) h_local, w_local = value_local.size(2), value_local.size(3) value_local = value_local.contiguous().view( batch_size_new, self.in_dim, -1) query_local = query_local.contiguous().view( batch_size_new, self.in_dim, -1) query_local = query_local.permute(0, 2, 1) key_local = key_local.contiguous().view(batch_size_new, self.in_dim, -1) sim_map = torch.bmm(query_local, key_local) sim_map = self.softmax(sim_map) context_local = torch.bmm(value_local, sim_map.permute(0, 2, 1)) context_local = context_local.view(batch_size_new, self.in_dim, h_local, w_local, 2) return context_local context_locals = func(v_locals, q_locals, k_locals) b, c, h, w, _ = context_locals.shape context_list = torch.split(context_locals, b // self.scale, 0) context = torch.cat(context_list, dim=3) context_list = torch.split(context, b // self.scale // self.scale, 0) context = torch.cat(context_list, dim=2) context = torch.cat([context[:, :, :, :, 0], context[:, :, :, :, 1]], 3) return context + x
def main(): ################ load ################### actor_path = os.path.abspath( os.curdir) + '/PPO_Mixedinput_Navigation_Model/weight/AC_TD3_actor.pkl' critic_path = os.path.abspath( os.curdir ) + '/PPO_Mixedinput_Navigation_Model/weight/AC_TD3_critic.pkl' if os.path.exists(actor_path): actor = Actor(state_size, action_size).to(device) actor.load_state_dict(torch.load(actor_path)) print('Actor Model loaded') else: actor = Actor(state_size, action_size).to(device) if os.path.exists(critic_path): critic = Critic(state_size, action_size).to(device) critic.load_state_dict(torch.load(critic_path)) print('Critic Model loaded') else: critic = Critic(state_size, action_size).to(device) critic_next = Critic(state_size, action_size).to(device) critic_next.load_state_dict(critic.state_dict()) print("Waiting for GAMA...") ################### initialization ######################## reset() episode = 2548 #540 training_stage = 80 #100#80 Decay = training_stage * 18 lr = 0.0001 sample_lr = [ 0.0001, 0.00009, 0.00008, 0.00007, 0.00006, 0.00005, 0.00004, 0.00003, 0.00002, 0.00001, 0.000009, 0.000008, 0.000007, 0.000006, 0.000005, 0.000004, 0.000003, 0.000002, 0.000001 ] #900 960 1020 1080 1140 if episode >= training_stage: #50 100 try: lr = sample_lr[int(episode // training_stage)] except (IndexError): lr = 0.000001 * (0.9**((episode - Decay // training_stage)) ) #100-1800#80-1440#65-1170#570 -- 30 optimizerA = optim.Adam(actor.parameters(), lr, betas=(0.95, 0.999)) optimizerC = optim.Adam(critic.parameters(), lr, betas=(0.95, 0.999)) #,weight_decay=0.0001 test = "GAMA" state, reward, done, time_pass, over, _ = GAMA_connect(test) #connect print("done:", done, "timepass:"******"----------------------------------Net_Trained---------------------------------------" ) print('--------------------------Iteration:', episode, 'over--------------------------------') episode += 1 #最初の時 else: print('Iteration:', episode, "lr:", lr) state = np.reshape(state, (1, len(state))) #xxx state_img = generate_img() tensor_cv = torch.from_numpy(np.transpose( state_img, (2, 0, 1))).double().to(device) / 255 state = torch.DoubleTensor(state).reshape(1, state_size).to(device) for _ in range(Memory_size): memory.states.append(state) memory.states_img.append(tensor_cv) state = torch.stack(memory.states).to(device).detach() ### tensor_cv = torch.stack(memory.states_img).to(device).detach() value, h_state_cv_c, h_state_n_c, h_state_3_c = critic( state, tensor_cv) #dist, # now is a tensoraction = dist.sample() action, log_prob, entropy = actor( state, tensor_cv) #, h_state_cv_a,h_state_n_a,h_state_3_a print("acceleration: ", action.cpu().numpy()) send_to_GAMA([[1, float(action.cpu().numpy() * 10)]]) log_prob = log_prob.unsqueeze(0) #entropy += entropy state, reward, done, time_pass, over, average_speed_NPC = GAMA_connect( test) return None
# also use numpy-style advanced indexing x = torch.arange(9).view(3, 3) indices = torch.LongTensor([0, 2]) print(x[indices]) print("-" * 20) print(x[indices, :]) print("-" * 20) print(x[:, indices]) # We can combine tensors by concatenating them. First, concatenating on the rows x = torch.arange(6).view(2, 3) describe(x) describe(torch.cat([x, x], dim=0)) describe(torch.cat([x, x], dim=1)) describe(torch.stack([x, x])) # We can concentate along the first dimension(the columns direction) x = torch.arange(9).view(3, 3) print(x) print("-" * 20) new_x = torch.cat([x, x, x], dim=1) print(new_x.shape) print(new_x) # We can also concatenate on a new 0th dimension to "stack" the tensors x = torch.arange(9).view(3, 3) print(x) print("-" * 20) new_x = torch.stack([x, x, x]) print(new_x.shape)
def dnee_ee_features(rels, model, config, pred2idx, argw2idx, max_event_len, rel2idx, device=None): x1_idx = 0 x2_idx = 0 gold2e1xs = {} gold2e2xs = {} x1s, x2s = [], [] arg_lens = [config['arg0_max_len'], config['arg1_max_len']] for i_rel, rel in enumerate(rels): s = rel['Sense'][0] if len(rel['Arg1']['Events']) == 0: continue e1s = unique_event_dict(rel['Arg1']['Events'], pred2idx).values() for e1 in e1s: e1r = get_raw_event_repr(e1, config, pred2idx, argw2idx, device) x1s.append(e1r) if i_rel in gold2e1xs: gold2e1xs[i_rel].append(x1_idx) else: gold2e1xs[i_rel] = [x1_idx] x1_idx += 1 e2s = unique_event_dict(rel['Arg2']['Events'], pred2idx).values() for e2 in e2s: e2r = get_raw_event_repr(e2, config, pred2idx, argw2idx, device) x2s.append(e2r) if i_rel in gold2e2xs: gold2e2xs[i_rel].append(x2_idx) else: gold2e2xs[i_rel] = [x2_idx] x2_idx += 1 x1s = torch.stack(x1s, dim=0).squeeze() x2s = torch.stack(x2s, dim=0).squeeze() if device: x1s = x1s.to(device) x2s = x2s.to(device) with torch.no_grad(): x1ee = model.embed_event(x1s) x2ee = model.embed_event(x2s) x1_out = torch.zeros((len(rels), max_event_len, x1ee.shape[1]), dtype=torch.float32) x2_out = torch.zeros((len(rels), max_event_len, x2ee.shape[1]), dtype=torch.float32) y = torch.LongTensor(len(rels)) if device: x1_out = x1_out.to(device) x2_out = x2_out.to(device) y = y.to(device) for i_rel, rel in enumerate(rels): s = rel['Sense'][0] y[i_rel] = rel2idx[s] # combine scores for multiple event pairs if i_rel in gold2e1xs: idxs = gold2e1xs[i_rel] fs = x1ee[idxs, :] if fs.shape[0] > max_event_len: fs = fs[:max_event_len, :] x1_out[i_rel, :fs.shape[0]] = fs if i_rel in gold2e2xs: idxs = gold2e2xs[i_rel] fs = x2ee[idxs, :] if fs.shape[0] > max_event_len: fs = fs[:max_event_len, :] x2_out[i_rel, :fs.shape[0]] = fs return x1_out, x2_out, y
def spherical_to_cartesian(rtp): x = rtp[:, 0] * torch.sin(rtp[:, 1]) * torch.cos(rtp[:, 2]) y = rtp[:, 0] * torch.sin(rtp[:, 1]) * torch.sin(rtp[:, 2]) z = rtp[:, 0] * torch.cos(rtp[:, 1]) return torch.stack((x, y, z), 1)
def forward(self, xs_pad, ilens, ys_pad): """E2E forward. :param torch.Tensor xs_pad: batch of padded input sequences (B, Tmax, idim) :param torch.Tensor ilens: batch of lengths of input sequences (B) :param torch.Tensor ys_pad: batch of padded character id sequence tensor (B, num_spkrs, Lmax) :return: ctc loss value :rtype: torch.Tensor :return: attention loss value :rtype: torch.Tensor :return: accuracy in attention decoder :rtype: float """ # 0. Frontend if self.frontend is not None: hs_pad, hlens, mask = self.frontend(to_torch_tensor(xs_pad), ilens) if isinstance(hs_pad, list): hlens_n = [None] * self.num_spkrs for i in range(self.num_spkrs): hs_pad[i], hlens_n[i] = self.feature_transform( hs_pad[i], hlens) hlens = hlens_n else: hs_pad, hlens = self.feature_transform(hs_pad, hlens) else: hs_pad, hlens = xs_pad, ilens # 1. Encoder if not isinstance( hs_pad, list ): # single-channel input xs_pad (single- or multi-speaker) hs_pad, hlens, _ = self.enc(hs_pad, hlens) else: # multi-channel multi-speaker input xs_pad for i in range(self.num_spkrs): hs_pad[i], hlens[i], _ = self.enc(hs_pad[i], hlens[i]) # 2. CTC loss if self.mtlalpha == 0: loss_ctc, min_perm = None, None else: if not isinstance(hs_pad, list): # single-speaker input xs_pad loss_ctc = torch.mean(self.ctc(hs_pad, hlens, ys_pad)) else: # multi-speaker input xs_pad ys_pad = ys_pad.transpose(0, 1) # (num_spkrs, B, Lmax) loss_ctc_perm = torch.stack( [ self.ctc( hs_pad[i // self.num_spkrs], hlens[i // self.num_spkrs], ys_pad[i % self.num_spkrs], ) for i in range(self.num_spkrs**2) ], dim=1, ) # (B, num_spkrs^2) loss_ctc, min_perm = self.pit.pit_process(loss_ctc_perm) logging.info("ctc loss:" + str(float(loss_ctc))) # 3. attention loss if self.mtlalpha == 1: loss_att = None acc = None else: if not isinstance(hs_pad, list): # single-speaker input xs_pad loss_att, acc, _ = self.dec(hs_pad, hlens, ys_pad) else: for i in range(ys_pad.size(1)): # B ys_pad[:, i] = ys_pad[min_perm[i], i] rslt = [ self.dec(hs_pad[i], hlens[i], ys_pad[i], strm_idx=i) for i in range(self.num_spkrs) ] loss_att = sum([r[0] for r in rslt]) / float(len(rslt)) acc = sum([r[1] for r in rslt]) / float(len(rslt)) self.acc = acc # 5. compute cer/wer if (self.training or not (self.report_cer or self.report_wer) or not isinstance(hs_pad, list)): cer, wer = 0.0, 0.0 # oracle_cer, oracle_wer = 0.0, 0.0 else: if self.recog_args.ctc_weight > 0.0: lpz = [ self.ctc.log_softmax(hs_pad[i]).data for i in range(self.num_spkrs) ] else: lpz = None word_eds, char_eds, word_ref_lens, char_ref_lens = [], [], [], [] nbest_hyps = [ self.dec.recognize_beam_batch( hs_pad[i], torch.tensor(hlens[i]), lpz[i], self.recog_args, self.char_list, self.rnnlm, strm_idx=i, ) for i in range(self.num_spkrs) ] # remove <sos> and <eos> y_hats = [[ nbest_hyp[0]["yseq"][1:-1] for nbest_hyp in nbest_hyps[i] ] for i in range(self.num_spkrs)] for i in range(len(y_hats[0])): hyp_words = [] hyp_chars = [] ref_words = [] ref_chars = [] for ns in range(self.num_spkrs): y_hat = y_hats[ns][i] y_true = ys_pad[ns][i] seq_hat = [ self.char_list[int(idx)] for idx in y_hat if int(idx) != -1 ] seq_true = [ self.char_list[int(idx)] for idx in y_true if int(idx) != -1 ] seq_hat_text = "".join(seq_hat).replace( self.recog_args.space, " ") seq_hat_text = seq_hat_text.replace( self.recog_args.blank, "") seq_true_text = "".join(seq_true).replace( self.recog_args.space, " ") hyp_words.append(seq_hat_text.split()) ref_words.append(seq_true_text.split()) hyp_chars.append(seq_hat_text.replace(" ", "")) ref_chars.append(seq_true_text.replace(" ", "")) tmp_word_ed = [ editdistance.eval(hyp_words[ns // self.num_spkrs], ref_words[ns % self.num_spkrs]) for ns in range(self.num_spkrs**2) ] # h1r1,h1r2,h2r1,h2r2 tmp_char_ed = [ editdistance.eval(hyp_chars[ns // self.num_spkrs], ref_chars[ns % self.num_spkrs]) for ns in range(self.num_spkrs**2) ] # h1r1,h1r2,h2r1,h2r2 word_eds.append( self.pit.min_pit_sample(torch.tensor(tmp_word_ed))[0]) word_ref_lens.append(len(sum(ref_words, []))) char_eds.append( self.pit.min_pit_sample(torch.tensor(tmp_char_ed))[0]) char_ref_lens.append(len("".join(ref_chars))) wer = (0.0 if not self.report_wer else float(sum(word_eds)) / sum(word_ref_lens)) cer = (0.0 if not self.report_cer else float(sum(char_eds)) / sum(char_ref_lens)) alpha = self.mtlalpha if alpha == 0: self.loss = loss_att loss_att_data = float(loss_att) loss_ctc_data = None elif alpha == 1: self.loss = loss_ctc loss_att_data = None loss_ctc_data = float(loss_ctc) else: self.loss = alpha * loss_ctc + (1 - alpha) * loss_att loss_att_data = float(loss_att) loss_ctc_data = float(loss_ctc) loss_data = float(self.loss) if loss_data < CTC_LOSS_THRESHOLD and not math.isnan(loss_data): self.reporter.report(loss_ctc_data, loss_att_data, acc, cer, wer, loss_data) else: logging.warning("loss (=%f) is not correct", loss_data) return self.loss
def run_mdnet(**opts): img_list = opts['img_list'] gt = opts['gt'] # init bounding box target_bb = np.array(opts['init_bb']) # a bounding box per image result = np.zeros((len(img_list), 4)) result_bb = np.zeros((len(img_list), 4)) # first image result[0] = np.copy(target_bb) result_bb[0] = np.copy(target_bb) iou_result = np.zeros((len(img_list), 1)) # init model model = MDNet(opts['model_path']) model_g = NetG() if opts['adaptive_align']: align_h = model.roi_align_model.aligned_height align_w = model.roi_align_model.aligned_width spatial_s = model.roi_align_model.spatial_scale model.roi_align_model = RoIAlignAdaMax(align_h, align_w, spatial_s) if opts['use_gpu']: model = model.cuda() model_g = model_g.cuda() model.set_learnable_params(opts['ft_layers']) model_g.set_learnable_params(opts['ft_layers']) # init image crop model img_crop_model = ImgCropper(1.) if opts['use_gpu']: img_crop_model.gpu_enable() # init criterion and optimizer criterion = BinaryLoss() #criterion_g = nn.MSELoss(reduction='sum') criterion_g = nn.MSELoss(reduction='mean') init_optimizer = set_optimizer(model, opts['lr_init'], lr_mult=opts['lr_mult'], momentum=opts['momentum'], w_decay=opts['w_decay']) update_optimizer = set_optimizer(model, opts['lr_update'], lr_mult=opts['lr_mult'], momentum=opts['momentum'], w_decay=opts['w_decay']) tic = time.time() # Load first image cur_image = Image.open(img_list[0]).convert('RGB') cur_image = np.asarray(cur_image) # Draw pos/neg samples img_shape = cur_image.shape pos_examples = gen_samples(SampleGenerator('gaussian', (img_shape[1], img_shape[0]), 0.1, 1.2), target_bb, opts['n_pos_init'], opts['overlap_pos_init']) neg_examples = gen_samples(SampleGenerator('uniform', (img_shape[1], img_shape[0]), 1, 2, 1.1), target_bb, opts['n_neg_init'], opts['overlap_neg_init']) neg_examples = np.random.permutation(neg_examples) cur_bbreg_examples = gen_samples(SampleGenerator('uniform', (img_shape[1], img_shape[0]), 0.3, 1.5, 1.1), target_bb, opts['n_bbreg'], opts['overlap_bbreg'], opts['scale_bbreg']) # compute padded sample padded_x1 = (neg_examples[:, 0] - neg_examples[:, 2] * (opts['padding'] - 1.) / 2.).min() padded_y1 = (neg_examples[:, 1] - neg_examples[:, 3] * (opts['padding'] - 1.) / 2.).min() padded_x2 = (neg_examples[:, 0] + neg_examples[:, 2] * (opts['padding'] + 1.) / 2.).max() padded_y2 = (neg_examples[:, 1] + neg_examples[:, 3] * (opts['padding'] + 1.) / 2.).max() padded_scene_box = np.reshape(np.asarray((padded_x1, padded_y1, padded_x2 - padded_x1, padded_y2 - padded_y1)), (1, 4)) scene_boxes = np.reshape(np.copy(padded_scene_box), (1, 4)) if opts['jitter']: # horizontal shift jittered_scene_box_horizon = np.copy(padded_scene_box) jittered_scene_box_horizon[0, 0] -= 4. jitter_scale_horizon = 1. # vertical shift jittered_scene_box_vertical = np.copy(padded_scene_box) jittered_scene_box_vertical[0, 1] -= 4. jitter_scale_vertical = 1. jittered_scene_box_reduce1 = np.copy(padded_scene_box) jitter_scale_reduce1 = 1.1 ** (-1) # vertical shift jittered_scene_box_enlarge1 = np.copy(padded_scene_box) jitter_scale_enlarge1 = 1.1 ** (1) # scale reduction jittered_scene_box_reduce2 = np.copy(padded_scene_box) jitter_scale_reduce2 = 1.1 ** (-2) # scale enlarge jittered_scene_box_enlarge2 = np.copy(padded_scene_box) jitter_scale_enlarge2 = 1.1 ** (2) scene_boxes = np.concatenate( [scene_boxes, jittered_scene_box_horizon, jittered_scene_box_vertical, jittered_scene_box_reduce1, jittered_scene_box_enlarge1, jittered_scene_box_reduce2, jittered_scene_box_enlarge2], axis=0) jitter_scale = [1., jitter_scale_horizon, jitter_scale_vertical, jitter_scale_reduce1, jitter_scale_enlarge1, jitter_scale_reduce2, jitter_scale_enlarge2] else: jitter_scale = [1.] model.eval() for bidx in range(0, scene_boxes.shape[0]): crop_img_size = (scene_boxes[bidx, 2:4] * ((opts['img_size'], opts['img_size']) / target_bb[2:4])).astype( 'int64') * jitter_scale[bidx] cropped_image, cur_image_var = img_crop_model.crop_image(cur_image, np.reshape(scene_boxes[bidx], (1, 4)), crop_img_size) cropped_image = cropped_image - 128. feat_map = model(cropped_image, out_layer='conv3') rel_target_bbox = np.copy(target_bb) rel_target_bbox[0:2] -= scene_boxes[bidx, 0:2] batch_num = np.zeros((pos_examples.shape[0], 1)) cur_pos_rois = np.copy(pos_examples) cur_pos_rois[:, 0:2] -= np.repeat(np.reshape(scene_boxes[bidx, 0:2], (1, 2)), cur_pos_rois.shape[0], axis=0) scaled_obj_size = float(opts['img_size']) * jitter_scale[bidx] cur_pos_rois = samples2maskroi(cur_pos_rois, model.receptive_field, (scaled_obj_size, scaled_obj_size), target_bb[2:4], opts['padding']) cur_pos_rois = np.concatenate((batch_num, cur_pos_rois), axis=1) cur_pos_rois = Variable(torch.from_numpy(cur_pos_rois.astype('float32'))).cuda() cur_pos_feats = model.roi_align_model(feat_map, cur_pos_rois) cur_pos_feats = cur_pos_feats.view(cur_pos_feats.size(0), -1).data.clone() batch_num = np.zeros((neg_examples.shape[0], 1)) cur_neg_rois = np.copy(neg_examples) cur_neg_rois[:, 0:2] -= np.repeat(np.reshape(scene_boxes[bidx, 0:2], (1, 2)), cur_neg_rois.shape[0], axis=0) cur_neg_rois = samples2maskroi(cur_neg_rois, model.receptive_field, (scaled_obj_size, scaled_obj_size), target_bb[2:4], opts['padding']) cur_neg_rois = np.concatenate((batch_num, cur_neg_rois), axis=1) cur_neg_rois = Variable(torch.from_numpy(cur_neg_rois.astype('float32'))).cuda() cur_neg_feats = model.roi_align_model(feat_map, cur_neg_rois) cur_neg_feats = cur_neg_feats.view(cur_neg_feats.size(0), -1).data.clone() # bbreg rois batch_num = np.zeros((cur_bbreg_examples.shape[0], 1)) cur_bbreg_rois = np.copy(cur_bbreg_examples) cur_bbreg_rois[:, 0:2] -= np.repeat(np.reshape(scene_boxes[bidx, 0:2], (1, 2)), cur_bbreg_rois.shape[0], axis=0) scaled_obj_size = float(opts['img_size']) * jitter_scale[bidx] cur_bbreg_rois = samples2maskroi(cur_bbreg_rois, model.receptive_field, (scaled_obj_size, scaled_obj_size), target_bb[2:4], opts['padding']) cur_bbreg_rois = np.concatenate((batch_num, cur_bbreg_rois), axis=1) cur_bbreg_rois = Variable(torch.from_numpy(cur_bbreg_rois.astype('float32'))).cuda() cur_bbreg_feats = model.roi_align_model(feat_map, cur_bbreg_rois) cur_bbreg_feats = cur_bbreg_feats.view(cur_bbreg_feats.size(0), -1).data.clone() feat_dim = cur_pos_feats.size(-1) if bidx == 0: pos_feats = cur_pos_feats neg_feats = cur_neg_feats # bbreg feature bbreg_feats = cur_bbreg_feats bbreg_examples = cur_bbreg_examples else: pos_feats = torch.cat((pos_feats, cur_pos_feats), dim=0) neg_feats = torch.cat((neg_feats, cur_neg_feats), dim=0) # bbreg feature bbreg_feats = torch.cat((bbreg_feats, cur_bbreg_feats), dim=0) bbreg_examples = np.concatenate((bbreg_examples, cur_bbreg_examples), axis=0) if pos_feats.size(0) > opts['n_pos_init']: pos_idx = np.asarray(list(range(pos_feats.size(0)))) np.random.shuffle(pos_idx) pos_feats = pos_feats[pos_idx[0:opts['n_pos_init']], :] if neg_feats.size(0) > opts['n_neg_init']: neg_idx = np.asarray(list(range(neg_feats.size(0)))) np.random.shuffle(neg_idx) neg_feats = neg_feats[neg_idx[0:opts['n_neg_init']], :] # bbreg if bbreg_feats.size(0) > opts['n_bbreg']: bbreg_idx = np.asarray(list(range(bbreg_feats.size(0)))) np.random.shuffle(bbreg_idx) bbreg_feats = bbreg_feats[bbreg_idx[0:opts['n_bbreg']], :] bbreg_examples = bbreg_examples[bbreg_idx[0:opts['n_bbreg']], :] # print bbreg_examples.shape # open images and crop patch from obj extra_obj_size = np.array((opts['img_size'], opts['img_size'])) extra_crop_img_size = extra_obj_size * (opts['padding'] + 0.6) replicateNum = 100 for iidx in range(replicateNum): extra_target_bbox = np.copy(target_bb) extra_scene_box = np.copy(extra_target_bbox) extra_scene_box_center = extra_scene_box[0:2] + extra_scene_box[2:4] / 2. extra_scene_box_size = extra_scene_box[2:4] * (opts['padding'] + 0.6) extra_scene_box[0:2] = extra_scene_box_center - extra_scene_box_size / 2. extra_scene_box[2:4] = extra_scene_box_size extra_shift_offset = np.clip(2. * np.random.randn(2), -4, 4) cur_extra_scale = 1.1 ** np.clip(np.random.randn(1), -2, 2) extra_scene_box[0] += extra_shift_offset[0] extra_scene_box[1] += extra_shift_offset[1] extra_scene_box[2:4] *= cur_extra_scale[0] scaled_obj_size = float(opts['img_size']) / cur_extra_scale[0] cur_extra_cropped_image, _ = img_crop_model.crop_image(cur_image, np.reshape(extra_scene_box, (1, 4)), extra_crop_img_size) cur_extra_cropped_image = cur_extra_cropped_image.detach() cur_extra_pos_examples = gen_samples(SampleGenerator('gaussian', (img_shape[1], img_shape[0]), 0.1, 1.2), extra_target_bbox, opts['n_pos_init'] // replicateNum, opts['overlap_pos_init']) cur_extra_neg_examples = gen_samples(SampleGenerator('uniform', (img_shape[1], img_shape[0]), 0.3, 2, 1.1), extra_target_bbox, opts['n_neg_init'] // replicateNum // 4, opts['overlap_neg_init']) # bbreg sample cur_extra_bbreg_examples = gen_samples(SampleGenerator('uniform', (img_shape[1], img_shape[0]), 0.3, 1.5, 1.1), extra_target_bbox, opts['n_bbreg'] // replicateNum // 4, opts['overlap_bbreg'], opts['scale_bbreg']) batch_num = iidx * np.ones((cur_extra_pos_examples.shape[0], 1)) cur_extra_pos_rois = np.copy(cur_extra_pos_examples) cur_extra_pos_rois[:, 0:2] -= np.repeat(np.reshape(extra_scene_box[0:2], (1, 2)), cur_extra_pos_rois.shape[0], axis=0) cur_extra_pos_rois = samples2maskroi(cur_extra_pos_rois, model.receptive_field, (scaled_obj_size, scaled_obj_size), extra_target_bbox[2:4], opts['padding']) cur_extra_pos_rois = np.concatenate((batch_num, cur_extra_pos_rois), axis=1) batch_num = iidx * np.ones((cur_extra_neg_examples.shape[0], 1)) cur_extra_neg_rois = np.copy(cur_extra_neg_examples) cur_extra_neg_rois[:, 0:2] -= np.repeat(np.reshape(extra_scene_box[0:2], (1, 2)), cur_extra_neg_rois.shape[0], axis=0) cur_extra_neg_rois = samples2maskroi(cur_extra_neg_rois, model.receptive_field, (scaled_obj_size, scaled_obj_size), extra_target_bbox[2:4], opts['padding']) cur_extra_neg_rois = np.concatenate((batch_num, cur_extra_neg_rois), axis=1) # bbreg rois batch_num = iidx * np.ones((cur_extra_bbreg_examples.shape[0], 1)) cur_extra_bbreg_rois = np.copy(cur_extra_bbreg_examples) cur_extra_bbreg_rois[:, 0:2] -= np.repeat(np.reshape(extra_scene_box[0:2], (1, 2)), cur_extra_bbreg_rois.shape[0], axis=0) cur_extra_bbreg_rois = samples2maskroi(cur_extra_bbreg_rois, model.receptive_field, (scaled_obj_size, scaled_obj_size), extra_target_bbox[2:4], opts['padding']) cur_extra_bbreg_rois = np.concatenate((batch_num, cur_extra_bbreg_rois), axis=1) if iidx == 0: extra_cropped_image = cur_extra_cropped_image extra_pos_rois = np.copy(cur_extra_pos_rois) extra_neg_rois = np.copy(cur_extra_neg_rois) # bbreg rois extra_bbreg_rois = np.copy(cur_extra_bbreg_rois) extra_bbreg_examples = np.copy(cur_extra_bbreg_examples) else: extra_cropped_image = torch.cat((extra_cropped_image, cur_extra_cropped_image), dim=0) extra_pos_rois = np.concatenate((extra_pos_rois, np.copy(cur_extra_pos_rois)), axis=0) extra_neg_rois = np.concatenate((extra_neg_rois, np.copy(cur_extra_neg_rois)), axis=0) # bbreg rois extra_bbreg_rois = np.concatenate((extra_bbreg_rois, np.copy(cur_extra_bbreg_rois)), axis=0) extra_bbreg_examples = np.concatenate((extra_bbreg_examples, np.copy(cur_extra_bbreg_examples)), axis=0) extra_pos_rois = Variable(torch.from_numpy(extra_pos_rois.astype('float32'))).cuda() extra_neg_rois = Variable(torch.from_numpy(extra_neg_rois.astype('float32'))).cuda() # bbreg rois extra_bbreg_rois = Variable(torch.from_numpy(extra_bbreg_rois.astype('float32'))).cuda() extra_cropped_image -= 128. extra_feat_maps = model(extra_cropped_image, out_layer='conv3') # Draw pos/neg samples img_shape = cur_image.shape extra_pos_feats = model.roi_align_model(extra_feat_maps, extra_pos_rois) extra_pos_feats = extra_pos_feats.view(extra_pos_feats.size(0), -1).data.clone() extra_neg_feats = model.roi_align_model(extra_feat_maps, extra_neg_rois) extra_neg_feats = extra_neg_feats.view(extra_neg_feats.size(0), -1).data.clone() # bbreg feat extra_bbreg_feats = model.roi_align_model(extra_feat_maps, extra_bbreg_rois) extra_bbreg_feats = extra_bbreg_feats.view(extra_bbreg_feats.size(0), -1).data.clone() # concatenate extra features to original_features pos_feats = torch.cat((pos_feats, extra_pos_feats), dim=0) neg_feats = torch.cat((neg_feats, extra_neg_feats), dim=0) # concatenate extra bbreg feats to original_bbreg_feats bbreg_feats = torch.cat((bbreg_feats, extra_bbreg_feats), dim=0) bbreg_examples = np.concatenate((bbreg_examples, extra_bbreg_examples), axis=0) torch.cuda.empty_cache() model.zero_grad() # Initial training train(model, None, criterion, init_optimizer, pos_feats, neg_feats, opts['maxiter_init'], **opts) #del init_optimizer, neg_feats if opts['use_gpu']: torch.cuda.empty_cache() g_pretrain(model, model_g, criterion_g, pos_feats, **opts) if opts['use_gpu']: torch.cuda.empty_cache() # bbreg train if bbreg_feats.size(0) > opts['n_bbreg']: bbreg_idx = np.asarray(list(range(bbreg_feats.size(0)))) np.random.shuffle(bbreg_idx) bbreg_feats = bbreg_feats[bbreg_idx[0:opts['n_bbreg']], :] bbreg_examples = bbreg_examples[bbreg_idx[0:opts['n_bbreg']], :] bbreg = BBRegressor((img_shape[1], img_shape[0])) bbreg.train(bbreg_feats, bbreg_examples, target_bb) if pos_feats.size(0) > opts['n_pos_update']: pos_idx = np.asarray(list(range(pos_feats.size(0)))) np.random.shuffle(pos_idx) pos_feats_all = [pos_feats.index_select(0, torch.from_numpy(pos_idx[0:opts['n_pos_update']]).cuda())] if neg_feats.size(0) > opts['n_neg_update']: neg_idx = np.asarray(list(range(neg_feats.size(0)))) np.random.shuffle(neg_idx) neg_feats_all = [neg_feats.index_select(0, torch.from_numpy(neg_idx[0:opts['n_neg_update']]).cuda())] spf_total = time.time() - tic # spf_total = 0. # no first frame # Visualize savefig = opts['savefig_dir'] != '' if opts['visualize'] or savefig: dpi = 80.0 figsize = (cur_image.shape[1] / dpi, cur_image.shape[0] / dpi) fig = plt.figure(frameon=False, figsize=figsize, dpi=dpi) ax = plt.Axes(fig, [0., 0., 1., 1.]) ax.set_axis_off() fig.add_axes(ax) im = ax.imshow(cur_image, aspect='normal') if gt is not None: gt_rect = plt.Rectangle(tuple(gt[0, :2]), gt[0, 2], gt[0, 3], linewidth=3, edgecolor="#00ff00", zorder=1, fill=False) ax.add_patch(gt_rect) rect = plt.Rectangle(tuple(result_bb[0, :2]), result_bb[0, 2], result_bb[0, 3], linewidth=3, edgecolor="#ff0000", zorder=1, fill=False) ax.add_patch(rect) if opts['visualize']: plt.pause(.01) plt.draw() if savefig: fig.savefig(os.path.join(opts['savefig_dir'], '0000.jpg'), dpi=dpi) # Main loop trans_f = opts['trans_f'] for i in range(1, len(img_list)): tic = time.time() # Load image cur_image = Image.open(img_list[i]).convert('RGB') cur_image = np.asarray(cur_image) # Estimate target bbox img_shape = cur_image.shape samples = gen_samples( SampleGenerator('gaussian', (img_shape[1], img_shape[0]), trans_f, opts['scale_f'], valid=True), target_bb, opts['n_samples']) padded_x1 = (samples[:, 0] - samples[:, 2] * (opts['padding'] - 1.) / 2.).min() padded_y1 = (samples[:, 1] - samples[:, 3] * (opts['padding'] - 1.) / 2.).min() padded_x2 = (samples[:, 0] + samples[:, 2] * (opts['padding'] + 1.) / 2.).max() padded_y2 = (samples[:, 1] + samples[:, 3] * (opts['padding'] + 1.) / 2.).max() padded_scene_box = np.asarray((padded_x1, padded_y1, padded_x2 - padded_x1, padded_y2 - padded_y1)) if padded_scene_box[0] > cur_image.shape[1]: padded_scene_box[0] = cur_image.shape[1] - 1 if padded_scene_box[1] > cur_image.shape[0]: padded_scene_box[1] = cur_image.shape[0] - 1 if padded_scene_box[0] + padded_scene_box[2] < 0: padded_scene_box[2] = -padded_scene_box[0] + 1 if padded_scene_box[1] + padded_scene_box[3] < 0: padded_scene_box[3] = -padded_scene_box[1] + 1 crop_img_size = (padded_scene_box[2:4] * ((opts['img_size'], opts['img_size']) / target_bb[2:4])).astype( 'int64') cropped_image, cur_image_var = img_crop_model.crop_image(cur_image, np.reshape(padded_scene_box, (1, 4)), crop_img_size) cropped_image = cropped_image - 128. model.eval() feat_map = model(cropped_image, out_layer='conv3') # relative target bbox with padded_scene_box rel_target_bbox = np.copy(target_bb) rel_target_bbox[0:2] -= padded_scene_box[0:2] # Extract sample features and get target location batch_num = np.zeros((samples.shape[0], 1)) sample_rois = np.copy(samples) sample_rois[:, 0:2] -= np.repeat(np.reshape(padded_scene_box[0:2], (1, 2)), sample_rois.shape[0], axis=0) sample_rois = samples2maskroi(sample_rois, model.receptive_field, (opts['img_size'], opts['img_size']), target_bb[2:4], opts['padding']) sample_rois = np.concatenate((batch_num, sample_rois), axis=1) sample_rois = Variable(torch.from_numpy(sample_rois.astype('float32'))).cuda() sample_feats = model.roi_align_model(feat_map, sample_rois) sample_feats = sample_feats.view(sample_feats.size(0), -1).clone() sample_scores = model(sample_feats, in_layer='fc4') top_scores, top_idx = sample_scores[:, 1].topk(5) top_idx = top_idx.data.cpu().numpy() target_score = top_scores.data.mean() target_bb = samples[top_idx].mean(axis=0) success = target_score > opts['success_thr'] # Expand search area at failure if success: trans_f = opts['trans_f'] else: trans_f = opts['trans_f_expand'] # bb regression if success: bbreg_feats = sample_feats[top_idx, :] bbreg_samples = samples[top_idx] bbreg_samples = bbreg.predict(bbreg_feats.data, bbreg_samples) bbreg_bbox = bbreg_samples.mean(axis=0) else: bbreg_bbox = target_bb # Save result result[i] = target_bb result_bb[i] = bbreg_bbox iou_result[i] = 1. # Data collect if success: # Draw pos/neg samples pos_examples = gen_samples( SampleGenerator('gaussian', (img_shape[1], img_shape[0]), 0.1, 1.2), target_bb, opts['n_pos_update'], opts['overlap_pos_update']) neg_examples = gen_samples( SampleGenerator('uniform', (img_shape[1], img_shape[0]), 1.5, 1.2), target_bb, opts['n_neg_update'], opts['overlap_neg_update']) padded_x1 = (neg_examples[:, 0] - neg_examples[:, 2] * (opts['padding'] - 1.) / 2.).min() padded_y1 = (neg_examples[:, 1] - neg_examples[:, 3] * (opts['padding'] - 1.) / 2.).min() padded_x2 = (neg_examples[:, 0] + neg_examples[:, 2] * (opts['padding'] + 1.) / 2.).max() padded_y2 = (neg_examples[:, 1] + neg_examples[:, 3] * (opts['padding'] + 1.) / 2.).max() padded_scene_box = np.reshape( np.asarray((padded_x1, padded_y1, padded_x2 - padded_x1, padded_y2 - padded_y1)), (1, 4)) scene_boxes = np.reshape(np.copy(padded_scene_box), (1, 4)) jitter_scale = [1.] for bidx in range(0, scene_boxes.shape[0]): crop_img_size = (scene_boxes[bidx, 2:4] * ( (opts['img_size'], opts['img_size']) / target_bb[2:4])).astype('int64') * jitter_scale[ bidx] cropped_image, cur_image_var = img_crop_model.crop_image(cur_image, np.reshape(scene_boxes[bidx], (1, 4)), crop_img_size) cropped_image = cropped_image - 128. feat_map = model(cropped_image, out_layer='conv3') rel_target_bbox = np.copy(target_bb) rel_target_bbox[0:2] -= scene_boxes[bidx, 0:2] batch_num = np.zeros((pos_examples.shape[0], 1)) cur_pos_rois = np.copy(pos_examples) cur_pos_rois[:, 0:2] -= np.repeat(np.reshape(scene_boxes[bidx, 0:2], (1, 2)), cur_pos_rois.shape[0], axis=0) scaled_obj_size = float(opts['img_size']) * jitter_scale[bidx] cur_pos_rois = samples2maskroi(cur_pos_rois, model.receptive_field, (scaled_obj_size, scaled_obj_size), target_bb[2:4], opts['padding']) cur_pos_rois = np.concatenate((batch_num, cur_pos_rois), axis=1) cur_pos_rois = Variable(torch.from_numpy(cur_pos_rois.astype('float32'))).cuda() cur_pos_feats = model.roi_align_model(feat_map, cur_pos_rois) cur_pos_feats = cur_pos_feats.view(cur_pos_feats.size(0), -1).data.clone() batch_num = np.zeros((neg_examples.shape[0], 1)) cur_neg_rois = np.copy(neg_examples) cur_neg_rois[:, 0:2] -= np.repeat(np.reshape(scene_boxes[bidx, 0:2], (1, 2)), cur_neg_rois.shape[0], axis=0) cur_neg_rois = samples2maskroi(cur_neg_rois, model.receptive_field, (scaled_obj_size, scaled_obj_size), target_bb[2:4], opts['padding']) cur_neg_rois = np.concatenate((batch_num, cur_neg_rois), axis=1) cur_neg_rois = Variable(torch.from_numpy(cur_neg_rois.astype('float32'))).cuda() cur_neg_feats = model.roi_align_model(feat_map, cur_neg_rois) cur_neg_feats = cur_neg_feats.view(cur_neg_feats.size(0), -1).data.clone() feat_dim = cur_pos_feats.size(-1) if bidx == 0: pos_feats = cur_pos_feats ##index select neg_feats = cur_neg_feats else: pos_feats = torch.cat((pos_feats, cur_pos_feats), dim=0) neg_feats = torch.cat((neg_feats, cur_neg_feats), dim=0) if pos_feats.size(0) > opts['n_pos_update']: pos_idx = np.asarray(list(range(pos_feats.size(0)))) np.random.shuffle(pos_idx) pos_feats = pos_feats.index_select(0, torch.from_numpy(pos_idx[0:opts['n_pos_update']]).cuda()) if neg_feats.size(0) > opts['n_neg_update']: neg_idx = np.asarray(list(range(neg_feats.size(0)))) np.random.shuffle(neg_idx) neg_feats = neg_feats.index_select(0, torch.from_numpy(neg_idx[0:opts['n_neg_update']]).cuda()) pos_feats_all.append(pos_feats) neg_feats_all.append(neg_feats) if len(pos_feats_all) > opts['n_frames_long']: del pos_feats_all[0] if len(neg_feats_all) > opts['n_frames_short']: del neg_feats_all[0] # Short term update if not success: nframes = min(opts['n_frames_short'], len(pos_feats_all)) pos_data = torch.stack(pos_feats_all[-nframes:], 0).view(-1, feat_dim) neg_data = torch.stack(neg_feats_all, 0).view(-1, feat_dim) train(model, None, criterion, update_optimizer, pos_data, neg_data, opts['maxiter_update'], **opts) # Long term update elif i % opts['long_interval'] == 0: pos_data = torch.stack(pos_feats_all, 0).view(-1, feat_dim) neg_data = torch.stack(neg_feats_all, 0).view(-1, feat_dim) train(model, model_g, criterion, update_optimizer, pos_data, neg_data, opts['maxiter_update'], **opts) spf = time.time() - tic spf_total += spf # Visualize if opts['visualize'] or savefig: im.set_data(cur_image) if gt is not None: gt_rect.set_xy(gt[i, :2]) gt_rect.set_width(gt[i, 2]) gt_rect.set_height(gt[i, 3]) rect.set_xy(result_bb[i, :2]) rect.set_width(result_bb[i, 2]) rect.set_height(result_bb[i, 3]) if opts['visualize']: plt.pause(.01) plt.draw() if savefig: fig.savefig(os.path.join(opts['savefig_dir'], '%04d.jpg' % i), dpi=dpi) if opts['visual_log']: if gt is None: print("Frame %d/%d, Score %.3f, Time %.3f" % (i, len(img_list), target_score, spf)) else: print("Frame %d/%d, Overlap %.3f, Score %.3f, Time %.3f" % \ (i, len(img_list), overlap_ratio(gt[i], result_bb[i])[0], target_score, spf)) iou_result[i] = overlap_ratio(gt[i], result_bb[i])[0] fps = len(img_list) / spf_total # fps = (len(img_list)-1) / spf_total #no first frame return iou_result, result_bb, fps, result
def forward( self, # type: ignore question: Dict[str, torch.LongTensor], passage: Dict[str, torch.LongTensor], answer: Dict[str, torch.LongTensor], dialog: Dict[str, torch.LongTensor], previous_answer_appended: Dict[str, torch.LongTensor], span_start: torch.IntTensor = None, span_end: torch.IntTensor = None, p1_answer_marker: torch.IntTensor = None, p2_answer_marker: torch.IntTensor = None, p3_answer_marker: torch.IntTensor = None, yesno_list: torch.IntTensor = None, followup_list: torch.IntTensor = None, metadata: List[Dict[str, Any]] = None) -> Dict[str, torch.Tensor]: # pylint: disable=arguments-differ """ Parameters ---------- question : Dict[str, torch.LongTensor] From a ``TextField``. passage : Dict[str, torch.LongTensor] From a ``TextField``. The model assumes that this passage contains the answer to the question, and predicts the beginning and ending positions of the answer within the passage. span_start : ``torch.IntTensor``, optional From an ``IndexField``. This is one of the things we are trying to predict - the beginning position of the answer with the passage. This is an `inclusive` token index. If this is given, we will compute a loss that gets included in the output dictionary. span_end : ``torch.IntTensor``, optional From an ``IndexField``. This is one of the things we are trying to predict - the ending position of the answer with the passage. This is an `inclusive` token index. If this is given, we will compute a loss that gets included in the output dictionary. p1_answer_marker : ``torch.IntTensor``, optional This is one of the inputs, but only when num_context_answers > 0. This is a tensor that has a shape [batch_size, max_qa_count, max_passage_length]. Most passage token will have assigned 'O', except the passage tokens belongs to the previous answer in the dialog, which will be assigned labels such as <1_start>, <1_in>, <1_end>. For more details, look into dataset_readers/util/make_reading_comprehension_instance_quac p2_answer_marker : ``torch.IntTensor``, optional This is one of the inputs, but only when num_context_answers > 1. It is similar to p1_answer_marker, but marking previous previous answer in passage. p3_answer_marker : ``torch.IntTensor``, optional This is one of the inputs, but only when num_context_answers > 2. It is similar to p1_answer_marker, but marking previous previous previous answer in passage. yesno_list : ``torch.IntTensor``, optional This is one of the outputs that we are trying to predict. Three way classification (the yes/no/not a yes no question). followup_list : ``torch.IntTensor``, optional This is one of the outputs that we are trying to predict. Three way classification (followup / maybe followup / don't followup). metadata : ``List[Dict[str, Any]]``, optional If present, this should contain the question ID, original passage text, and token offsets into the passage for each instance in the batch. We use this for computing official metrics using the official SQuAD evaluation script. The length of this list should be the batch size, and each dictionary should have the keys ``id``, ``original_passage``, and ``token_offsets``. If you only want the best span string and don't care about official metrics, you can omit the ``id`` key. Returns ------- An output dictionary consisting of the followings. Each of the followings is a nested list because first iterates over dialog, then questions in dialog. qid : List[List[str]] A list of list, consisting of question ids. followup : List[List[int]] A list of list, consisting of continuation marker prediction index. (y :yes, m: maybe follow up, n: don't follow up) yesno : List[List[int]] A list of list, consisting of affirmation marker prediction index. (y :yes, x: not a yes/no question, n: np) best_span_str : List[List[str]] If sufficient metadata was provided for the instances in the batch, we also return the string from the original passage that the model thinks is the best answer to the question. loss : torch.FloatTensor, optional A scalar loss to be optimised. """ #question = previous_answer_appended batch_size, max_qa_count, max_q_len, _ = question[ 'token_characters'].size() #logger.info("dialog shape token charcaters is %s %s", dialog['token_characters'].size(), dialog['elmo'].size()) #logger.info("question shape token charcaters is %s %s", question['token_characters'].size(), question['elmo'].size()) batch_size, max_dia_count, max_dia_len, _ = dialog[ 'token_characters'].size() total_qa_count = batch_size * max_qa_count qa_mask = torch.ge(followup_list, 0).view(total_qa_count) embedded_question = self._text_field_embedder(question, num_wrapping_dims=1) #logger.info("11111111111 dialog is %s", dialog['token_characters'].shape) #logger.info("11111111111 dialog is %s", dialog['elmo'].shape) embedded_dialog = self._text_field_embedder(dialog, num_wrapping_dims=1) embedded_question = embedded_question.reshape( total_qa_count, max_q_len, self._text_field_embedder.get_output_dim()) embedded_dialog = embedded_dialog.reshape( total_qa_count, max_dia_len, self._text_field_embedder.get_output_dim()) embedded_question = self._variational_dropout(embedded_question) embedded_dialog = self._variational_dropout(embedded_dialog) embedded_passage = self._variational_dropout( self._text_field_embedder(passage)) passage_length = embedded_passage.size(1) #logger.info("embedded question has shape %s", embedded_question.shape) #logger.info("embedded dialog has shape %s", embedded_dialog.shape) question_mask = util.get_text_field_mask(question, num_wrapping_dims=1).float() question_mask = question_mask.reshape(total_qa_count, max_q_len) dialog_mask = util.get_text_field_mask(dialog, num_wrapping_dims=1).float() dialog_mask = dialog_mask.reshape(total_qa_count, max_dia_len) passage_mask = util.get_text_field_mask(passage).float() #logger.info("dialog shape token charcaters is %s %s", dialog['token_characters'].size(), dialog['elmo'].size()) #logger.info("answer shape token charcaters is %s %s", answer['token_characters'].size(), answer['elmo'].size()) #logger.info("quesion shape token charcaters is %s %s", question['token_characters'].size(), question['elmo'].size()) #logger.info("previous answer shape token charcaters is %s %s", previous_answer_appended['token_characters'].size(), previous_answer_appended['elmo'].size()) repeated_passage_mask = passage_mask.unsqueeze(1).repeat( 1, max_qa_count, 1) repeated_passage_mask = repeated_passage_mask.view( total_qa_count, passage_length) if self._num_context_answers > 0: # Encode question turn number inside the dialog into question embedding. question_num_ind = util.get_range_vector( max_qa_count, util.get_device_of(embedded_question)) question_num_ind = question_num_ind.unsqueeze(-1).repeat( 1, max_q_len) question_num_ind = question_num_ind.unsqueeze(0).repeat( batch_size, 1, 1) question_num_ind = question_num_ind.reshape( total_qa_count, max_q_len) question_num_marker_emb = self._question_num_marker( question_num_ind) embedded_question = torch.cat( [embedded_question, question_num_marker_emb], dim=-1) # Append dialog number for dialog question_num_ind = util.get_range_vector( max_dia_count, util.get_device_of(embedded_dialog)) question_num_ind = question_num_ind.unsqueeze(-1).repeat( 1, max_dia_len) question_num_ind = question_num_ind.unsqueeze(0).repeat( batch_size, 1, 1) question_num_ind = question_num_ind.reshape( total_qa_count, max_dia_len) question_num_marker_emb = self._question_num_marker( question_num_ind) embedded_dialog = torch.cat( [embedded_dialog, question_num_marker_emb], dim=-1) # Encode the previous answers in passage embedding. repeated_embedded_passage = embedded_passage.unsqueeze(1).repeat(1, max_qa_count, 1, 1). \ view(total_qa_count, passage_length, self._text_field_embedder.get_output_dim()) # batch_size * max_qa_count, passage_length, word_embed_dim p1_answer_marker = p1_answer_marker.view(total_qa_count, passage_length) p1_answer_marker_emb = self._prev_ans_marker(p1_answer_marker) repeated_embedded_passage = torch.cat( [repeated_embedded_passage, p1_answer_marker_emb], dim=-1) if self._num_context_answers > 1: p2_answer_marker = p2_answer_marker.view( total_qa_count, passage_length) p2_answer_marker_emb = self._prev_ans_marker(p2_answer_marker) repeated_embedded_passage = torch.cat( [repeated_embedded_passage, p2_answer_marker_emb], dim=-1) if self._num_context_answers > 2: p3_answer_marker = p3_answer_marker.view( total_qa_count, passage_length) p3_answer_marker_emb = self._prev_ans_marker( p3_answer_marker) repeated_embedded_passage = torch.cat( [repeated_embedded_passage, p3_answer_marker_emb], dim=-1) repeated_encoded_passage = self._variational_dropout( self._phrase_layer(repeated_embedded_passage, repeated_passage_mask)) else: encoded_passage = self._variational_dropout( self._phrase_layer(embedded_passage, passage_mask)) repeated_encoded_passage = encoded_passage.unsqueeze(1).repeat( 1, max_qa_count, 1, 1) repeated_encoded_passage = repeated_encoded_passage.view( total_qa_count, passage_length, self._encoding_dim) #logger.info("repeated encoded passage has shape %s", repeated_encoded_passage.shape) #logger.info("embedded question has shape %s", embedded_question.shape) #logger.info("question mask has shape %s", question_mask.shape) #logger.info("embedded dialog has shape %s", embedded_dialog.shape) #logger.info("dialog mask has shape %s", dialog_mask.shape) encoded_question = self._variational_dropout( self._phrase_layer(embedded_question, question_mask)) encoded_dialog = self._variational_dropout( self._phrase_layer(embedded_dialog, dialog_mask)) #logger.info("encoded_question is %s", encoded_question.shape) #logger.info("encoded_dialog is %s", encoded_dialog.shape) #logger.info("encoded_passage is %s", repeated_encoded_passage.shape) # Shape: (batch_size * max_qa_count, passage_length, question_length) passage_question_similarity = self._matrix_attention( repeated_encoded_passage, encoded_question) #logger.info("passage_question_similarity is %s", passage_question_similarity.shape) # Shape: (batch_size * max_qa_count, passage_length, question_length) #logger.info("question_mask is %s", question_mask.shape) passage_question_attention = util.masked_softmax( passage_question_similarity, question_mask) # Shape: (batch_size * max_qa_count, passage_length, encoding_dim) passage_question_vectors = util.weighted_sum( encoded_question, passage_question_attention) #logger.info("passage question vectors is %s", passage_question_vectors.shape) ############################# DIALOG SIMILARITY STUFF ################################################################ dialog_question_similarity = self._matrix_attention( encoded_question, encoded_dialog) #logger.info("dialog question similarity is %s", dialog_question_similarity.shape) #logger.info("dialog_mask is %s", dialog_mask.shape) # Shape: (batch_size * max_qa_count, passage_length, question_length) dialog_question_attention = util.masked_softmax( dialog_question_similarity, dialog_mask) #logger.info("dialog question attention is %s", dialog_question_attention.shape) # Shape: (batch_size * max_qa_count, passage_length, encoding_dim) question_dialog_vectors = util.weighted_sum(encoded_dialog, dialog_question_attention) #logger.info("question_dialog_vectors is %s", question_dialog_vectors.shape) #logger.info("encoded_question 111 %s", encoded_question.shape) #logger.info("encoded_question 2222 %s", encoded_question.shape) #logger.info("self._encoding_dim %s", self._encoding_dim) encoded_question = torch.cat( [encoded_question, question_dialog_vectors], dim=-1) encoded_question = F.relu(self.t(encoded_question)) #logger.info("encoded_question 3333333 %s", encoded_question.shape) ###################################################################################################################### # We replace masked values with something really negative here, so they don't affect the # max below. #if max_qa_count == 7 and batch_size == 21: # sys.exit() masked_similarity = util.replace_masked_values( passage_question_similarity, question_mask.unsqueeze(1), -1e7) question_passage_similarity = masked_similarity.max( dim=-1)[0].squeeze(-1) question_passage_attention = util.masked_softmax( question_passage_similarity, repeated_passage_mask) # Shape: (batch_size * max_qa_count, encoding_dim) question_passage_vector = util.weighted_sum( repeated_encoded_passage, question_passage_attention) tiled_question_passage_vector = question_passage_vector.unsqueeze( 1).expand(total_qa_count, passage_length, self._encoding_dim) # Shape: (batch_size * max_qa_count, passage_length, encoding_dim * 4) final_merged_passage = torch.cat([ repeated_encoded_passage, passage_question_vectors, repeated_encoded_passage * passage_question_vectors, repeated_encoded_passage * tiled_question_passage_vector ], dim=-1) final_merged_passage = F.relu(self._merge_atten(final_merged_passage)) residual_layer = self._variational_dropout( self._residual_encoder(final_merged_passage, repeated_passage_mask)) self_attention_matrix = self._self_attention(residual_layer, residual_layer) mask = repeated_passage_mask.reshape(total_qa_count, passage_length, 1) \ * repeated_passage_mask.reshape(total_qa_count, 1, passage_length) self_mask = torch.eye(passage_length, passage_length, device=self_attention_matrix.device) self_mask = self_mask.reshape(1, passage_length, passage_length) mask = mask * (1 - self_mask) self_attention_probs = util.masked_softmax(self_attention_matrix, mask) # (batch, passage_len, passage_len) * (batch, passage_len, dim) -> (batch, passage_len, dim) self_attention_vecs = torch.matmul(self_attention_probs, residual_layer) self_attention_vecs = torch.cat([ self_attention_vecs, residual_layer, residual_layer * self_attention_vecs ], dim=-1) residual_layer = F.relu( self._merge_self_attention(self_attention_vecs)) final_merged_passage = final_merged_passage + residual_layer # batch_size * maxqa_pair_len * max_passage_len * 200 final_merged_passage = self._variational_dropout(final_merged_passage) start_rep = self._span_start_encoder(final_merged_passage, repeated_passage_mask) span_start_logits = self._span_start_predictor(start_rep).squeeze(-1) end_rep = self._span_end_encoder( torch.cat([final_merged_passage, start_rep], dim=-1), repeated_passage_mask) span_end_logits = self._span_end_predictor(end_rep).squeeze(-1) span_yesno_logits = self._span_yesno_predictor(end_rep).squeeze(-1) span_followup_logits = self._span_followup_predictor(end_rep).squeeze( -1) span_start_logits = util.replace_masked_values(span_start_logits, repeated_passage_mask, -1e7) # batch_size * maxqa_len_pair, max_document_len span_end_logits = util.replace_masked_values(span_end_logits, repeated_passage_mask, -1e7) best_span = self._get_best_span_yesno_followup(span_start_logits, span_end_logits, span_yesno_logits, span_followup_logits, self._max_span_length) output_dict: Dict[str, Any] = {} # Compute the loss. if span_start is not None: loss = nll_loss(util.masked_log_softmax(span_start_logits, repeated_passage_mask), span_start.view(-1), ignore_index=-1) self._span_start_accuracy(span_start_logits, span_start.view(-1), mask=qa_mask) loss += nll_loss(util.masked_log_softmax(span_end_logits, repeated_passage_mask), span_end.view(-1), ignore_index=-1) self._span_end_accuracy(span_end_logits, span_end.view(-1), mask=qa_mask) self._span_accuracy(best_span[:, 0:2], torch.stack([span_start, span_end], -1).view(total_qa_count, 2), mask=qa_mask.unsqueeze(1).expand(-1, 2).long()) # add a select for the right span to compute loss gold_span_end_loc = [] span_end = span_end.view( total_qa_count).squeeze().data.cpu().numpy() for i in range(0, total_qa_count): gold_span_end_loc.append( max(span_end[i] * 3 + i * passage_length * 3, 0)) gold_span_end_loc.append( max(span_end[i] * 3 + i * passage_length * 3 + 1, 0)) gold_span_end_loc.append( max(span_end[i] * 3 + i * passage_length * 3 + 2, 0)) gold_span_end_loc = span_start.new(gold_span_end_loc) pred_span_end_loc = [] for i in range(0, total_qa_count): pred_span_end_loc.append( max(best_span[i][1] * 3 + i * passage_length * 3, 0)) pred_span_end_loc.append( max(best_span[i][1] * 3 + i * passage_length * 3 + 1, 0)) pred_span_end_loc.append( max(best_span[i][1] * 3 + i * passage_length * 3 + 2, 0)) predicted_end = span_start.new(pred_span_end_loc) _yesno = span_yesno_logits.view(-1).index_select( 0, gold_span_end_loc).view(-1, 3) _followup = span_followup_logits.view(-1).index_select( 0, gold_span_end_loc).view(-1, 3) loss += nll_loss(F.log_softmax(_yesno, dim=-1), yesno_list.view(-1), ignore_index=-1) loss += nll_loss(F.log_softmax(_followup, dim=-1), followup_list.view(-1), ignore_index=-1) _yesno = span_yesno_logits.view(-1).index_select( 0, predicted_end).view(-1, 3) _followup = span_followup_logits.view(-1).index_select( 0, predicted_end).view(-1, 3) self._span_yesno_accuracy(_yesno, yesno_list.view(-1), mask=qa_mask) self._span_followup_accuracy(_followup, followup_list.view(-1), mask=qa_mask) output_dict["loss"] = loss # Compute F1 and preparing the output dictionary. output_dict['best_span_str'] = [] output_dict['qid'] = [] output_dict['followup'] = [] output_dict['yesno'] = [] best_span_cpu = best_span.detach().cpu().numpy() for i in range(batch_size): passage_str = metadata[i]['original_passage'] offsets = metadata[i]['token_offsets'] f1_score = 0.0 per_dialog_best_span_list = [] per_dialog_yesno_list = [] per_dialog_followup_list = [] per_dialog_query_id_list = [] for per_dialog_query_index, (iid, answer_texts) in enumerate( zip(metadata[i]["instance_id"], metadata[i]["answer_texts_list"])): predicted_span = tuple(best_span_cpu[i * max_qa_count + per_dialog_query_index]) start_offset = offsets[predicted_span[0]][0] end_offset = offsets[predicted_span[1]][1] yesno_pred = predicted_span[2] followup_pred = predicted_span[3] per_dialog_yesno_list.append(yesno_pred) per_dialog_followup_list.append(followup_pred) per_dialog_query_id_list.append(iid) best_span_string = passage_str[start_offset:end_offset] per_dialog_best_span_list.append(best_span_string) if answer_texts: if len(answer_texts) > 1: t_f1 = [] # Compute F1 over N-1 human references and averages the scores. for answer_index in range(len(answer_texts)): idxes = list(range(len(answer_texts))) idxes.pop(answer_index) refs = [answer_texts[z] for z in idxes] t_f1.append( squad_eval.metric_max_over_ground_truths( squad_eval.f1_score, best_span_string, refs)) f1_score = 1.0 * sum(t_f1) / len(t_f1) else: f1_score = squad_eval.metric_max_over_ground_truths( squad_eval.f1_score, best_span_string, answer_texts) self._official_f1(100 * f1_score) output_dict['qid'].append(per_dialog_query_id_list) output_dict['best_span_str'].append(per_dialog_best_span_list) output_dict['yesno'].append(per_dialog_yesno_list) output_dict['followup'].append(per_dialog_followup_list) return output_dict
def forward(self, batch): """ :param batch: (p_index, q_index, zhengli_index, fuli_index, wfqd_index) :return: """ passage = batch[0:3] query = batch[3:6] zhengli = batch[6:9] # (batch_size, zhengli_len) fuli = batch[9:12] wfqd = batch[12:15] # mask passage_mask = utils.get_mask(passage[0]) query_mask = utils.get_mask(query[0]) zhengli_mask = utils.get_mask(zhengli[0]) fuli_mask = utils.get_mask(fuli[0]) wfqd_mask = utils.get_mask(wfqd[0]) # embedding passage_vec = self.embedding(passage) query_vec = self.embedding(query) zhengli_vec = self.embedding(zhengli) fuli_vec = self.embedding(fuli) wfqd_vec = self.embedding(wfqd) # encoder: p, q passage_vec = self.encoder( passage_vec, passage_mask) # (p_len, batch_size. hidden_size*2) passage_vec = self.dropout(passage_vec) query_vec = self.encoder(query_vec, query_mask) query_vec = self.dropout(query_vec) # encoder: zhengli,fuli,wfqd zhengli_vec = self.encoder(zhengli_vec, zhengli_mask) zhengli_vec = self.dropout(zhengli_vec) fuli_vec = self.encoder(fuli_vec, fuli_mask) fuli_vec = self.dropout(fuli_vec) wfqd_vec = self.encoder(wfqd_vec, wfqd_mask) wfqd_vec = self.dropout(wfqd_vec) # answer build zhengli_vec = self.mean_a(zhengli_vec, zhengli_mask) # (batch_size, hidden_size*2) fuli_vec = self.mean_a(fuli_vec, fuli_mask) wfqd_vec = self.mean_a(wfqd_vec, wfqd_mask) answer = torch.stack([zhengli_vec, fuli_vec, wfqd_vec ]).transpose(0, 1) # (batch_size, 3, hidden_size) # merge q into p, get p prep align_ct = passage_vec for i in range(self.num_align_hops): qt_align_ct = self.aligner[i](align_ct, query_vec, query_mask) bar_ct = self.aligner_sfu[i]( align_ct, torch.cat([ qt_align_ct, align_ct * qt_align_ct, align_ct - qt_align_ct ], dim=2)) ct_align_ct = self.self_aligner[i](bar_ct, passage_mask) hat_ct = self.self_aligner_sfu[i]( bar_ct, torch.cat( [ct_align_ct, bar_ct * ct_align_ct, bar_ct - ct_align_ct], dim=2)) align_ct = self.choose_agg[i](hat_ct, passage_mask) p_prep = align_ct # (p_len, batch_size, hidden_size*2) q_prep = self.mean_q(query_vec, query_mask).unsqueeze( 0) # (1, batch_size, hidden_size*2) sj = self.vp(torch.tanh(self.wp1(p_prep) + self.wp2(q_prep))).transpose( 0, 1) # (batch_size, p_len, 1) mask = passage_mask.eq(0).unsqueeze(2) sj.masked_fill_(mask, -float('inf')) sj = f.softmax(sj, dim=1).transpose(1, 2) # (batch_size, 1, p_len) p_prep = torch.bmm(sj, p_prep.transpose(0, 1)).squeeze( 1) # (batch_size, hidden_size*2) # choosing p_prep = self.bi_linear(p_prep) # (batch_size, hidden_size) outputs = torch.bmm(answer, p_prep.unsqueeze(2)).squeeze(2) # (batch_size, 3) return outputs
def generate(self, mels, save_path, batched, target, overlap, mu_law, trg_mel=None): device = next(self.parameters()).device # use same device as parameters mu_law = mu_law if self.mode == 'RAW' else False self.eval() output = [] start = time.time() rnn1 = self.get_gru_cell(self.rnn1) rnn2 = self.get_gru_cell(self.rnn2) with torch.no_grad(): mels = torch.as_tensor(mels, device=device) wave_len = (mels.size(-1) - 1) * self.hop_length mels = self.pad_tensor(mels.transpose(1, 2), pad=self.pad, side='both') mels = mels.transpose(1, 2) if trg_mel is not None and hasattr(self, 'adaptnet'): mels = self.adaptnet(mels, trg_mel) mels, aux = self.upsample(mels) if batched: mels = self.fold_with_overlap(mels, target, overlap) aux = self.fold_with_overlap(aux, target, overlap) b_size, seq_len, _ = mels.size() h1 = torch.zeros(b_size, self.rnn_dims, device=device) h2 = torch.zeros(b_size, self.rnn_dims, device=device) x = torch.zeros(b_size, 1, device=device) d = self.aux_dims aux_split = [aux[:, :, d * i:d * (i + 1)] for i in range(4)] for i in range(seq_len): m_t = mels[:, i, :] a1_t, a2_t, a3_t, a4_t = \ (a[:, i, :] for a in aux_split) x = torch.cat([x, m_t, a1_t], dim=1) x = self.I(x) h1 = rnn1(x, h1) x = x + h1 inp = torch.cat([x, a2_t], dim=1) h2 = rnn2(inp, h2) x = x + h2 x = torch.cat([x, a3_t], dim=1) x = F.relu(self.fc1(x)) x = torch.cat([x, a4_t], dim=1) x = F.relu(self.fc2(x)) logits = self.fc3(x) if self.mode == 'MOL': sample = sample_from_discretized_mix_logistic(logits.unsqueeze(0).transpose(1, 2)) output.append(sample.view(-1)) # x = torch.FloatTensor([[sample]]).cuda() x = sample.transpose(0, 1) elif self.mode == 'RAW': posterior = F.softmax(logits, dim=1) distrib = torch.distributions.Categorical(posterior) sample = 2 * distrib.sample().float() / (self.n_classes - 1.) - 1. output.append(sample) x = sample.unsqueeze(-1) else: raise RuntimeError("Unknown model mode value - ", self.mode) #if i % 100 == 0: self.gen_display(i, seq_len, b_size, start) output = torch.stack(output).transpose(0, 1) output = output.cpu().numpy() output = output.astype(np.float64) if batched: output = self.xfade_and_unfold(output, target, overlap) else: output = output[0] if mu_law: output = decode_mu_law(output, self.n_classes, False) # Fade-out at the end to avoid signal cutting out suddenly fade_out = np.linspace(1, 0, 20 * self.hop_length) output = output[:wave_len] output[-20 * self.hop_length:] *= fade_out save_wav(output, save_path) self.train() return output
def stack(self, keys): data = [getattr(self, k)[:self.size] for k in keys] return map(lambda x: torch.stack(x, dim=0), data)
def normalize(self, keys): for key in keys: k = torch.stack(getattr(self, key)) k = (k - k.mean()) / (k.std() + 1e-10) setattr(self, key, [i for i in k])
def get_tp_fp_fn_tn(net_output, gt, axes=None, mask=None, square=False): """ net_output must be (b, c, x, y(, z))) gt must be a label map (shape (b, 1, x, y(, z)) OR shape (b, x, y(, z))) or one hot encoding (b, c, x, y(, z)) if mask is provided it must have shape (b, 1, x, y(, z))) :param net_output: :param gt: :param axes: can be (, ) = no summation :param mask: mask must be 1 for valid pixels and 0 for invalid pixels :param square: if True then fp, tp and fn will be squared before summation :return: """ if axes is None: axes = tuple(range(2, len(net_output.size()))) shp_x = net_output.shape shp_y = gt.shape with torch.no_grad(): if len(shp_x) != len(shp_y): gt = gt.view((shp_y[0], 1, *shp_y[1:])) if all([i == j for i, j in zip(net_output.shape, gt.shape)]): # if this is the case then gt is probably already a one hot encoding y_onehot = gt else: gt = gt.long() y_onehot = torch.zeros(shp_x) if net_output.device.type == "cuda": y_onehot = y_onehot.cuda(net_output.device.index) y_onehot.scatter_(1, gt, 1) tp = net_output * y_onehot fp = net_output * (1 - y_onehot) fn = (1 - net_output) * y_onehot tn = (1 - net_output) * (1 - y_onehot) if mask is not None: tp = torch.stack(tuple(x_i * mask[:, 0] for x_i in torch.unbind(tp, dim=1)), dim=1) fp = torch.stack(tuple(x_i * mask[:, 0] for x_i in torch.unbind(fp, dim=1)), dim=1) fn = torch.stack(tuple(x_i * mask[:, 0] for x_i in torch.unbind(fn, dim=1)), dim=1) tn = torch.stack(tuple(x_i * mask[:, 0] for x_i in torch.unbind(tn, dim=1)), dim=1) if square: tp = tp**2 fp = fp**2 fn = fn**2 tn = tn**2 if len(axes) > 0: tp = sum_tensor(tp, axes, keepdim=False) fp = sum_tensor(fp, axes, keepdim=False) fn = sum_tensor(fn, axes, keepdim=False) tn = sum_tensor(tn, axes, keepdim=False) return tp, fp, fn, tn
def forward(self, support_x, support_y, query_x, query_y, train=True, n_way=-1, curr_shot=-1): batch_sz, support_sz, _, h, w = support_x.size() query_sz = query_x.size(1) # FEATURE EXTRACTION support_x = self.repnet(support_x.view(batch_sz * support_sz, -1, h, w)) query_x = self.repnet(query_x.view(batch_sz * query_sz, -1, h, w)) # output [b, support_sz / query_sz, c, d, d] support_xf = support_x.view(batch_sz, support_sz, self.c, self.d, self.d) query_xf = query_x.view(batch_sz, query_sz, self.c, self.d, self.d) # SWAP if self.swap and train: support_xfs, query_xfs, support_ys, query_ys = \ self._generate_multiple(support_xf, query_xf, support_y, query_y, n_way) else: # also for test support_xfs, query_xfs, support_ys, query_ys = \ [support_xf], [query_xf], [support_y], [query_y] # SCORE expand_sz = n_way if self.opts.model.sum_supp_sample else support_sz score = torch.zeros(len(support_xfs), batch_sz, query_sz, expand_sz).to(self.opts.ctrl.device) for i in range(len(support_xfs)): # expand both to [b, query_sz, support_sz/n_way, c, d, d] if self.opts.model.sum_supp_sample: support_xf = torch.sum( support_xfs[i].view(batch_sz, n_way, -1, self.c, self.d, self.d), 2).squeeze(2) else: support_xf = support_xfs[i] support_xf = support_xf.unsqueeze(1).expand( -1, query_sz, -1, -1, -1, -1) query_xf = query_xfs[i].unsqueeze(2).expand( -1, -1, expand_sz, -1, -1, -1) # cat => [b, query_sz, support_sz/n_way, 2c, d, d] comb = torch.cat([support_xf, query_xf], dim=3) comb = comb.view(batch_sz * query_sz * expand_sz, 2 * self.c, self.d, self.d) comb = self.relation2(self.relation1(comb)) comb = F.avg_pool2d(comb, self.pool_size) # [b*query_sz*support_sz/n_way, 256] => [b, query_sz, support_sz/n_way, 1] # score: [b, query_sz, support_sz/n_way] score[i] = self.fc(comb.view(batch_sz * query_sz * expand_sz, -1)).view(batch_sz, query_sz, expand_sz, 1).squeeze(3) # LOSS OR ACCURACY if train: loss = torch.zeros(len(support_xfs)).to(self.opts.ctrl.device) for i in range(len(support_xfs)): if self.CE_loss: # reformat score output: N, n_way (being the number of classes) curr_score = score[i].view(batch_sz * query_sz, n_way, -1).mean(dim=-1) support_y_neat = support_ys[i][:, ::curr_shot] # b, n_way target = torch.stack([ torch.nonzero( torch.eq(support_y_neat[b], query_ys[i][b, j])) for b, query in enumerate(query_ys[i]) for j, _, in enumerate(query) ]) target = target.view(-1) # shape: N loss[i] = F.cross_entropy(curr_score, target) else: # build the label if self.opts.model.sum_supp_sample: support_y_neat = support_ys[i][:, :: curr_shot] # b, n_way support_y_expand = support_y_neat.unsqueeze(1).expand( batch_sz, query_sz, n_way) query_y_expand = query_ys[i].unsqueeze(2).expand( batch_sz, query_sz, n_way) else: # [b, support_sz] => [b, 1, support_sz] => [b, query_sz, support_sz] support_y_expand = support_ys[i].unsqueeze(1).expand( batch_sz, query_sz, support_sz) # [b, query_sz] => [b, query_sz, 1] => [b, query_sz, support_sz] query_y_expand = query_ys[i].unsqueeze(2).expand( batch_sz, query_sz, support_sz) # convert byte tensor to float tensor label = torch.eq(support_y_expand, query_y_expand).float() loss[i] = F.mse_loss(score[i], label) loss = (loss.sum() / len(support_xfs)).unsqueeze(0) return loss.unsqueeze( 0) # output size: 1 x 1 (or the number of losses) else: # TEST if self.opts.model.sum_supp_sample: score = score[0].unsqueeze(-1) else: # score shape: b, query_sz, n_way, k_shot score = score[0].view(batch_sz, query_sz, n_way, curr_shot) # pred_ind shape: b, query_sz if self.CE_loss: pred_ind = score.mean(dim=-1).argmax(dim=-1) else: pred_ind = score.sum(dim=-1).argmax(dim=-1) support_y_neat = support_ys[0][:, ::curr_shot] # b, n_way pred = torch.stack([ support_y_neat[b, ind] for b, query in enumerate(pred_ind) for ind in query ]) pred = pred.view(batch_sz, -1) correct = torch.eq(pred, query_ys[0]).sum() correct = correct.unsqueeze(0) return pred, correct
print('completed %epoch ', inter) inter += .10 data = [] result = [] for sm in sample: imag, dat, res = sm data = dat result.append(res) for img in imag: preds = model.predict(img) preds = preds.astype('float').reshape(-1) preds = preds[0] target = torch.stack(result) target = target.view(-1) final_vars = [] final_vars = torch.FloatTensor([[[abs(data - preds)]]]) x = net(*final_vars) values, indices = x.max(1) loss = criterion(x, Variable(target)) loss.backward() optimizer.step() net.zero_grad() if (i_batch > 10): break # &&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&TESTING &&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&
def forward(self, x, temp=1): bs, N, T, ch, h, w = x.shape x = x.flatten(0, 1) if self.feature_dropout_prob > 0: # Note: Apply to q,k? x_att = self.feat_dropout(x) else: x_att = x # TODO: Check if normalization should also be done for the Value features. Maybe it doesn't matter. x_att = F.normalize(x_att, p=2, dim=-3) # Note: Layernorm # Option 1 # qs = self.to_q(x_att[:, 1:].flatten(0, 1)).flatten(-2, -1) # [bsNT, ch, hw] # ks = self.to_k(x_att[:, :-1].flatten(0, 1)).flatten(-2, -1) # Option 2 qs, ks = x_att[:, 1:].flatten(0, 1).flatten(-2, -1), x_att[:, :-1].flatten(0, 1).flatten(-2, -1) # Feature linear transformation + stacked positional encoding vs_pe = self.to_v(x.flatten(0, 1)).reshape(bs*N, T, -1, h*w) # As = self.affinity(ks, qs) # We track backwards! energy = torch.einsum('bcn,bcm->bnm', qs, ks).reshape(bs*N, T-1, h*w, h*w) * self.scale As = [self.stoch_mat(energy[:, t], temp=temp, do_dropout=True) for t in range(T-1)] # vs_list = torch.split(vs_pe, 1, dim=1) acc_state = vs_pe[:, :-self.n_timesteps+1, self.s_sta_dim:] # Note: Make sure it keeps the temporal encoding attn_vec = torch.stack(As, dim=1) for t in range(self.n_timesteps-1): if t + 2 < self.n_timesteps: curr_state, curr_attn = vs_pe[:, t+1:-self.n_timesteps+2+t, self.s_sta_dim:], attn_vec[:, t:-self.n_timesteps+2+t] else: curr_state, curr_attn = vs_pe[:, t+1:], attn_vec[:, t:] # attn_vec has one less timestep, so the range is slightly different. acc_state = torch.cat([curr_state, torch.einsum('btcm,btnm->btcn', acc_state, curr_attn)], dim=2) # Note: IGNORE. For testing purposes: reconstruct t without (t) features # if t + 2 < self.n_timesteps: # acc_state = torch.cat([curr_state, torch.einsum('btcm,btnm->btcn', acc_state, curr_attn)], dim=2) # else: # acc_state = torch.einsum('btcm,btnm->btcn', acc_state, curr_attn) # # Note: IGNORE. For testing purposes: Simple version of the function # acc_state = vs_list[0] # for t in range(T-1): # # if t < T-3: # Note: Test attention. If we reverse the dimensionality it works poorly. Difficult to know why # acc_state = torch.cat([vs_list[t+1], torch.einsum('bcm,bnm->bcn', acc_state.squeeze(1), As[t]).unsqueeze(1)], dim=2) # else: # acc_state = torch.einsum('bcm,bnm->bcn', acc_state.squeeze(1), As[t]).unsqueeze(1) # Option: Self-attention from SAGAN # m_batchsize,C,width ,height = x.size() # proj_query = self.query_conv(x).view(m_batchsize,-1,width*height).permute(0,2,1) # B X CX(N) # proj_key = self.key_conv(x).view(m_batchsize,-1,width*height) # B X C x (*W*H) # energy = torch.bmm(proj_query,proj_key) # transpose check # attention = self.softmax(energy) # BX (N) X (N) # proj_value = self.value_conv(x).view(m_batchsize,-1,width*height) # B X C X N # # out = torch.bmm(proj_value,attention.permute(0,2,1) ) # out = out.view(m_batchsize,C,width,height) # out = self.gamma*out + x return acc_state
def validation_end(self, outputs): avg_loss = torch.stack([x["val_loss"] for x in outputs]).mean() # tensorboard_logs = {"Validation/Loss": avg_loss} return {"avg_val_loss": avg_loss} # "log": tensorboard_logs
def forward(self, xh, xp_list): xp_att_list = [self.node_att(xp) for xp in xp_list] com_att = torch.max(torch.stack(xp_att_list, dim=1), dim=1, keepdim=False)[0] xph_message = sum([self.conv_ch(torch.cat([xh, xp*com_att], dim=1)) for xp in xp_list]) return xph_message
cut_size = 44 total_epoch = 250 path = os.path.join(opt.dataset + '_' + opt.model) # Data print('==> Preparing data..') transform_train = transforms.Compose([ transforms.RandomCrop(44), transforms.RandomHorizontalFlip(), transforms.ToTensor(), ]) transform_test = transforms.Compose([ transforms.TenCrop(cut_size), transforms.Lambda(lambda crops: torch.stack([transforms.ToTensor()(crop) for crop in crops])), ]) trainset = FER2013(split = 'Training', transform=transform_train) trainloader = torch.utils.data.DataLoader(trainset, batch_size=opt.bs, shuffle=True, num_workers=1) PublicTestset = FER2013(split = 'PublicTest', transform=transform_test) PublicTestloader = torch.utils.data.DataLoader(PublicTestset, batch_size=opt.bs, shuffle=False, num_workers=1) PrivateTestset = FER2013(split = 'PrivateTest', transform=transform_test) PrivateTestloader = torch.utils.data.DataLoader(PrivateTestset, batch_size=opt.bs, shuffle=False, num_workers=1) # Model if opt.model == 'VGG19': net = VGG('VGG19') elif opt.model == 'Resnet18': net = ResNet18()
def initProb(sData, nTrain, nVal, var0, alph, cvt): """ initialize the OC problem that we want to solve :param sData: str, name of the problem :param nTrain: int, number of samples in a batch, drawn from rho_0 :param nVal: int, number of validation samples to draw from rho_0 :param var0: float, variance of rho_0 :param alph: list, 6-value list of parameters/hyperparameters :param cvt: func, conversion function for typing and device :return: prob: the problem Object x0: nTrain -by- d tensor, training batch x0v: nVal -by- d tensor, training batch xInit: 1 -by- d tensor, center of rho_0 """ if sData == 'softcorridor': d = 4 xtarget = cvt(torch.tensor([[2, 2, -2, 2]])) xInit = cvt(torch.tensor([[-2, -2, 2, -2]])) x0 = xInit + cvt(var0 * torch.randn(nTrain, d)) x0v = xInit + cvt(var0 * torch.randn(nTrain, d)) prob = Cross2D(xtarget, obstacle='softcorridor', alph_Q=alph[1], alph_W=alph[2], r=0.5) elif sData == 'swarm': nAgents = 32 d = nAgents*3 xtarget = cvt(torch.tensor([-2., 2., 8., -1., 2., 8., 0., 2., 8., 1., 2., 8., 2., 2., 8., -2.5, 3., 8., -1.5, 3., 8., -0.5, 3., 8., 0.5, 3., 8., 1.5, 3., 8., 2.5, 3., 8., -2., 4., 8., -1., 4., 8., 0., 4., 8., 1., 4., 8., 2., 4., 8.])) xtarget = torch.cat((xtarget.view(-1, 3), cvt(torch.tensor([0, -0.5, -3])) + xtarget.view(-1, 3)), dim=0).view(-1) halfTrain = nTrain // 2 xInit = cvt(torch.tensor([1,-1,-1])) * xtarget.view(-1,3) + cvt(torch.tensor([0,0,10])) xInit = xInit.view(1,-1) x0 = xInit + cvt( var0 * torch.randn(halfTrain, d)) xmore = xtarget + cvt(var0 * torch.randn(halfTrain, d)) x0 = torch.cat((x0, xmore), dim=0) # validation samples from rho_0 x0v = xInit + cvt( var0 * torch.randn(halfTrain, d)) for j in range(nAgents): x0[ :,3*j+3:3*(j+1)] = 0. * x0[ :,3*j+3:3*(j+1)] x0v[:,3*j+3:3*(j+1)] = 0. * x0v[:,3*j+3:3*(j+1)] prob = SwarmTraj(xtarget, obstacle='blocks', alph_Q=alph[1], alph_W=alph[2], r= 0.2) elif sData == 'swarm50': nAgents = 50 d = nAgents*3 xtarget = cvt(torch.tensor([-2., 2., 6., -1., 2., 6., 0., 2., 6., 1., 2., 6., 2., 2., 6., 3., 2., 6., 4., 2., 6., -2.5, 3., 7., -1.5, 3., 7., -0.5, 3., 7., 0.5, 3., 7., 1.5, 3., 7., 2.5, 3., 7., 3.5, 3., 7., -2., 4., 8., -1., 4., 8., 0., 4., 8., 1., 4., 8., 2., 4., 8., 3., 4., 8., 4., 4., 8., -2., 3., 5., -1., 3., 5., 1., 3., 5., 2., 3., 5.])) xtarget = torch.cat((xtarget.view(-1, 3), cvt(torch.tensor([0, -0.5, -3])) + xtarget.view(-1, 3)), dim=0).view(-1) halfTrain = nTrain // 2 xInit = cvt(torch.tensor([1,-1,-1])) * xtarget.view(-1,3) + cvt(torch.tensor([0,0,10])) xInit = xInit.view(1,-1) x0 = xInit + cvt( var0 * torch.randn(halfTrain, d)) xmore = xtarget + cvt(var0 * torch.randn(halfTrain, d)) x0 = torch.cat((x0, xmore), dim=0) # validation samples from rho_0 x0v = xInit + cvt( var0 * torch.randn(halfTrain, d)) for j in range(nAgents): x0[ :,3*j+3:3*(j+1)] = 0. * x0[ :,3*j+3:3*(j+1)] x0v[:,3*j+3:3*(j+1)] = 0. * x0v[:,3*j+3:3*(j+1)] prob = SwarmTraj(xtarget, obstacle='blocks', alph_Q=alph[1], alph_W=alph[2], r= 0.1) elif sData == 'singlequad': d = 12 xtarget = cvt(torch.tensor([2., 2., 2., 0., 0., 0., 0., 0., 0., 0., 0., 0.])) init = -1.5 xInit = cvt(torch.tensor([init, init, init])) x0 = xInit + cvt(var0 * torch.randn(nTrain, 3)) x0 = pad(x0, [0, d - 3, 0, 0], value=0) xInit = pad(xInit.view(1,-1), [0, d - 3, 0, 0], value=0) # validation samples from rho_0 x0v = cvt(torch.tensor([init, init, init]) + var0 * torch.randn(nVal, 3)) x0v = pad(x0v, [0, d - 3, 0, 0], value=0) prob = Quadcopter(xtarget,obstacle=None, alph_Q = 0.0, alph_W = 0.0) elif sData=='midcross2': nAgents = 2 d = 2 * nAgents xtarget = cvt(torch.tensor([2,2,-2,2])) xInit = cvt(torch.tensor([-2,-2,2,-2])).view(1,-1) x0 = cvt(torch.tensor([-2,-2,2,-2]) + var0 * torch.randn(nTrain, d)) x0v = cvt(torch.tensor([-2,-2,2,-2]) + var0 * torch.randn(nTrain, d)) prob = Cross2D(xtarget, obstacle=None, alph_Q=alph[1], alph_W=alph[2]) elif sData == 'midcross4': nAgents = 4 d = 2 * nAgents xx = torch.linspace(-2, 2, nAgents) xtarget = cvt(torch.stack((xx.flip(dims=[0]), 2 * torch.ones(nAgents)), dim=1).reshape(1,-1)) xInit = cvt(torch.stack((xx, -2 * torch.ones(nAgents)), dim=1).reshape(1,-1)) x0 = xInit + cvt(var0 * torch.randn(nTrain, d)) x0v = xInit + cvt(var0 * torch.randn(nVal, d)) prob = Cross2D(xtarget, obstacle=None, alph_Q=alph[1], alph_W=alph[2], r=0.4) elif sData == 'midcross20': nAgents = 20 d = 2 * nAgents xx = torch.linspace(-6, 6, nAgents) xtarget = cvt(torch.stack((xx.flip(dims=[0]), 6 * torch.ones(nAgents)), dim=1).reshape(1, -1)) xInit = cvt(torch.stack((xx, -6 * torch.ones(nAgents)), dim=1).reshape(1, -1)) x0 = xInit + cvt(var0 * torch.randn(nTrain, d)) x0v = xInit + cvt(var0 * torch.randn(nVal, d)) prob = Cross2D(xtarget, obstacle=None, alph_Q=alph[1], alph_W=alph[2], r=0.15) elif sData == 'midcross30': nAgents = 30 d = 2 * nAgents xx = torch.linspace(-6, 6, nAgents) tmp = torch.tensor([6,4,2]) tmp = tmp.view(-1,1).repeat(nAgents//3,1).view(-1) xtarget = cvt(torch.stack((xx.flip(dims=[0]), tmp), dim=1).reshape(1, -1)) tmp = torch.tensor([-6,-4,-2]) tmp = tmp.view(-1,1).repeat(nAgents//3,1).view(-1) xInit = cvt(torch.stack((xx, tmp), dim=1).reshape(1, -1)) x0 = xInit + cvt(var0 * torch.randn(nTrain, d)) x0v = xInit + cvt(var0 * torch.randn(nVal, d)) prob = Cross2D(xtarget, obstacle=None, alph_Q=alph[1], alph_W=alph[2], r=0.2) elif sData == 'swap2': nAgents = 2 d = 2 * nAgents xtarget = cvt(torch.tensor([10., 0., -10., 0.])) xInit = cvt(torch.tensor([-10., 0., 10., 0.])).reshape(1, -1) x0 = xInit + cvt(var0 * torch.randn(nTrain, d)) x0v = xInit + cvt(var0 * torch.randn(nVal, d)) prob = Cross2D(xtarget, obstacle='hardcorridor', alph_Q=alph[1], alph_W=alph[2], r=1.0) elif sData == 'swap12': nAgents = 12 d = 2 * nAgents xtarget = cvt(torch.tensor([ 2,2, 0,0, 10,0, -10,0, 5,5, -5,-5, -4,2, -6,-1, 5,-5, -5,5, 2,-2, -2,-2 ])) xInit = cvt(torch.tensor([0,0, 2,2, -10,0, 10,0, -5,-5, 5,5, -6,-1, -4,2, -5,5, 5,-5, -2,-2, 2,-2 ])).reshape(1,-1) x0 = xInit + cvt(var0 * torch.randn(nTrain, d)) x0v = xInit + cvt(var0 * torch.randn(nVal, d)) prob = Cross2D(xtarget, obstacle=None, alph_Q=alph[1], alph_W=alph[2], r=0.5) elif sData == 'swap12_5pair': nAgents = 10 d = 2 * nAgents xtarget = cvt(torch.tensor([ 2,2, 0,0, 10,0, -10,0, 5,5, -5,-5, -4,2, -6,-1, 5,-5, -5,5 ])) xInit = cvt(torch.tensor([0,0, 2,2, -10,0, 10,0, -5,-5, 5,5, -6,-1, -4,2, -5,5, 5,-5 ])).reshape(1,-1) x0 = xInit + cvt(var0 * torch.randn(nTrain, d)) x0v = xInit + cvt(var0 * torch.randn(nVal, d)) prob = Cross2D(xtarget, obstacle=None, alph_Q=alph[1], alph_W=alph[2], r=0.5) elif sData == 'swap12_4pair': nAgents = 8 d = 2 * nAgents xtarget = cvt(torch.tensor([ 2,2, 0,0, 10,0, -10,0, 5,5, -5,-5, -4,2, -6,-1 ])) xInit = cvt(torch.tensor([0,0, 2,2, -10,0, 10,0, -5,-5, 5,5, -6,-1, -4,2])).reshape(1,-1) x0 = xInit + cvt(var0 * torch.randn(nTrain, d)) x0v = xInit + cvt(var0 * torch.randn(nVal, d)) prob = Cross2D(xtarget, obstacle=None, alph_Q=alph[1], alph_W=alph[2], r=0.5) elif sData == 'swap12_3pair': nAgents = 6 d = 2 * nAgents xtarget = cvt(torch.tensor([ 2,2, 0,0, 10,0, -10,0, 5,5, -5,-5 ])) xInit = cvt(torch.tensor([0,0, 2,2, -10,0, 10,0, -5,-5, 5,5 ])).reshape(1,-1) x0 = xInit + cvt(var0 * torch.randn(nTrain, d)) x0v = xInit + cvt(var0 * torch.randn(nVal, d)) prob = Cross2D(xtarget, obstacle=None, alph_Q=alph[1], alph_W=alph[2], r=0.5) elif sData == 'swap12_2pair': nAgents = 4 d = 2 * nAgents xtarget = cvt(torch.tensor([ 2,2, 0,0, 10,0, -10,0 ])) xInit = cvt(torch.tensor([0,0, 2,2, -10,0, 10,0 ])).reshape(1,-1) x0 = xInit + cvt(var0 * torch.randn(nTrain, d)) x0v = xInit + cvt(var0 * torch.randn(nVal, d)) prob = Cross2D(xtarget, obstacle=None, alph_Q=alph[1], alph_W=alph[2], r=0.5) elif sData == 'swap12_1pair': nAgents = 2 d = 2 * nAgents xtarget = cvt(torch.tensor([ 2,2, 0,0 ])) xInit = cvt(torch.tensor([0,0, 2,2 ])).reshape(1,-1) x0 = xInit + cvt(var0 * torch.randn(nTrain, d)) x0v = xInit + cvt(var0 * torch.randn(nVal, d)) prob = Cross2D(xtarget, obstacle=None, alph_Q=alph[1], alph_W=alph[2], r=0.5) else: print("incorrect value passed to --data") exit(1) return prob, x0, x0v, xInit
def compute_JacobianM(self, pts2d1, pts2d2, pts2d1_normed, pts2d2_normed, fixM1, fixM2, intrinsic, t, ang, lagr): R = self.rot_from_axisangle(ang) T = self.t2T(t) derT0, derT1, derT2 = self.derivative_translate(intrinsic.device) rotxd, rotyd, rotzd = self.derivative_angle(ang) samplenum = pts2d1.shape[1] r_bias = (torch.norm(t) - 1) J_t0_bias = 2 * lagr * r_bias / torch.norm(t) * t[0] J_t1_bias = 2 * lagr * r_bias / torch.norm(t) * t[1] J_t2_bias = 2 * lagr * r_bias / torch.norm(t) * t[2] ## ============Compute DerivM2============ ## planeparam2 = torch.inverse(fixM2 @ intrinsic).T @ T @ R @ torch.inverse(intrinsic) @ pts2d1 planeparam2_norm = torch.norm(planeparam2, dim=0, keepdim=True) planeparam2_normed = planeparam2 / planeparam2_norm rdist_2 = torch.sum(planeparam2_normed * pts2d2_normed, dim=0, keepdim=True) deriv_tonorm2 = 2 * rdist_2 * pts2d2_normed dtonx2, dtony2, dtonz2 = torch.split(deriv_tonorm2, 1, dim=0) px2, py2, pz2 = torch.split(planeparam2, 1, dim=0) deriv_px2 = dtonx2 / planeparam2_norm - torch.sum(px2 * planeparam2_normed * deriv_tonorm2, dim=0, keepdim=True) / (planeparam2_norm ** 2) deriv_py2 = dtony2 / planeparam2_norm - torch.sum(py2 * planeparam2_normed * deriv_tonorm2, dim=0, keepdim=True) / (planeparam2_norm ** 2) deriv_pz2 = dtonz2 / planeparam2_norm - torch.sum(pz2 * planeparam2_normed * deriv_tonorm2, dim=0, keepdim=True) / (planeparam2_norm ** 2) deriv_norm2 = torch.cat([deriv_px2, deriv_py2, deriv_pz2], dim=0) deriv_M2 = (deriv_norm2.T).unsqueeze(2) @ (pts2d1.T).unsqueeze(1) @ torch.inverse(intrinsic).T deriv_M2 = (torch.inverse(fixM2 @ intrinsic)).unsqueeze(0).expand([samplenum, -1, -1]) @ deriv_M2 ## ============Compute DerivM1============ ## planeparam1 = (pts2d2.T @ torch.inverse(intrinsic).T @ T @ R @ torch.inverse(fixM1 @ intrinsic)).T planeparam1_norm = torch.norm(planeparam1, dim=0, keepdim=True) planeparam1_normed = planeparam1 / planeparam1_norm rdist_1 = torch.sum(planeparam1_normed * pts2d1_normed, dim=0, keepdim=True) deriv_tonorm1 = 2 * rdist_1 * pts2d1_normed dtonx1, dtony1, dtonz1 = torch.split(deriv_tonorm1, 1, dim=0) px1, py1, pz1 = torch.split(planeparam1, 1, dim=0) deriv_px1 = dtonx1 / planeparam1_norm - torch.sum(px1 * planeparam1_normed * deriv_tonorm1, dim=0, keepdim=True) / (planeparam1_norm ** 2) deriv_py1 = dtony1 / planeparam1_norm - torch.sum(py1 * planeparam1_normed * deriv_tonorm1, dim=0, keepdim=True) / (planeparam1_norm ** 2) deriv_pz1 = dtonz1 / planeparam1_norm - torch.sum(pz1 * planeparam1_normed * deriv_tonorm1, dim=0, keepdim=True) / (planeparam1_norm ** 2) deriv_norm1 = torch.cat([deriv_px1, deriv_py1, deriv_pz1], dim=0) deriv_M1 = (pts2d2.T).unsqueeze(2) @ (deriv_norm1.T).unsqueeze(1) deriv_M1 = deriv_M1 @ torch.inverse(fixM1 @ intrinsic).T deriv_M1 = (torch.inverse(intrinsic)).unsqueeze(0).expand([samplenum, -1, -1]) @ deriv_M1 ## ============== ## J_t0 = torch.sum((deriv_M2 + deriv_M1) * (derT0 @ R), dim=[1, 2]) / samplenum + J_t0_bias / samplenum J_t1 = torch.sum((deriv_M2 + deriv_M1) * (derT1 @ R), dim=[1, 2]) / samplenum + J_t1_bias / samplenum J_t2 = torch.sum((deriv_M2 + deriv_M1) * (derT2 @ R), dim=[1, 2]) / samplenum + J_t2_bias / samplenum J_ang0 = torch.sum((deriv_M2 + deriv_M1) * (T @ rotxd), dim=[1, 2]) / samplenum J_ang1 = torch.sum((deriv_M2 + deriv_M1) * (T @ rotyd), dim=[1, 2]) / samplenum J_ang2 = torch.sum((deriv_M2 + deriv_M1) * (T @ rotzd), dim=[1, 2]) / samplenum JacobM = torch.stack([J_ang0, J_ang1, J_ang2, J_t0, J_t1, J_t2], dim=1) residual = (rdist_2 ** 2 / samplenum + rdist_1 ** 2 / samplenum + lagr * r_bias ** 2 / samplenum) return JacobM, residual.T
def fit_tau(X_t,X_tp1, tau, tau_t, vee, opt_tau, opt_vee, device, epoch): """ Meant to be called on a batch of states to estimate the batch gradient wrt tau and v. """ alpha_t = 1. / np.sqrt(epoch+1) lam = 10 tau_xt = tau(X_t) tau_xtp1 = tau(X_tp1) tau_t_xt = tau_t(X_t) tau_t_xtp1 = tau_t(X_tp1) v = vee grad_theta_tau_xt, grad_theta_tau_xtp1 = defaultdict(list),defaultdict(list) for i in range(len(X_t)): opt_tau.zero_grad() tau_xt.backward([torch.FloatTensor([[1] if i==j else [0] for j in range(len(tau_xt))]).to(device)],retain_graph=True) for param in tau.named_parameters(): grad_theta_tau_xt[param[0]].append(param[1].grad.clone()) opt_tau.zero_grad() tau_xtp1.backward([torch.FloatTensor([[1] if i==j else [0] for j in range(len(tau_xtp1))]).to(device)],retain_graph=True) for param in tau.named_parameters(): grad_theta_tau_xtp1[param[0]].append(param[1].grad.clone()) opt_tau.zero_grad() opt_vee.zero_grad() avg_grad_J_tau = [] avg_grad_J_v = [] for param in tau.named_parameters(): """ grad_theta: n_batch x n_out x n_in (matrix) n_batch x n_out (bias) """ grad_theta_tau_xt_MAT = torch.stack(grad_theta_tau_xt[param[0]]) grad_theta_tau_xtp1_MAT = torch.stack(grad_theta_tau_xtp1[param[0]]) """ Defined both gradients as in Eq.17 """ if len(grad_theta_tau_xt_MAT.shape) == 3: # Matrix tiled_tau_xt = tau_xt.repeat(grad_theta_tau_xt_MAT.shape[1],1,grad_theta_tau_xt_MAT.shape[2]).permute(1,0,2) tiled_tau_xtp1 = tau_xtp1.repeat(grad_theta_tau_xtp1_MAT.shape[1],1,grad_theta_tau_xtp1_MAT.shape[2]).permute(1,0,2) tiled_tau_t_xt = tau_t_xt.repeat(grad_theta_tau_xt_MAT.shape[1],1,grad_theta_tau_xt_MAT.shape[2]).permute(1,0,2) tiled_tau_t_xtp1 = tau_t_xtp1.repeat(grad_theta_tau_xtp1_MAT.shape[1],1,grad_theta_tau_xtp1_MAT.shape[2]).permute(1,0,2) else: # Bias tiled_tau_xt = tau_xt.repeat(1,grad_theta_tau_xt_MAT.shape[1]) tiled_tau_xtp1 = tau_xtp1.repeat(1,grad_theta_tau_xtp1_MAT.shape[1]) tiled_tau_t_xt = tau_t_xt.repeat(1,grad_theta_tau_xt_MAT.shape[1]) tiled_tau_t_xtp1 = tau_t_xtp1.repeat(1,grad_theta_tau_xtp1_MAT.shape[1]) grad_J_tau = (tiled_tau_xt * grad_theta_tau_xt_MAT).mean(0) - (1 - alpha_t) * (tiled_tau_t_xt * grad_theta_tau_xt_MAT).mean(0) - alpha_t * (tiled_tau_t_xtp1 * grad_theta_tau_xtp1_MAT).mean(0) + 2*lam*v*grad_theta_tau_xt_MAT.mean(0) grad_J_v = - (2 * lam * (tau_xt.mean() - 1 - v)) param[1].grad = grad_J_tau vee.grad = grad_J_v avg_grad_J_tau.append( grad_J_tau.mean().item() ) avg_grad_J_v.append( grad_J_v.mean().item() ) opt_tau.step() opt_vee.step() tau_t.load_state_dict(tau.state_dict()) return np.mean(avg_grad_J_tau), np.mean(avg_grad_J_v)
def validation_epoch_end(self, outputs): acc = torch.stack([x['acc'] for x in outputs]).mean() val_loss = torch.stack([x['val_loss'] for x in outputs]).mean() tensorboard_logs = {'val_ce_loss': val_loss, 'val_acc': acc} progress_bar_metrics = tensorboard_logs return {'val_loss': val_loss, 'log': tensorboard_logs, 'progress_bar': progress_bar_metrics}
def test_epoch_end(self, outputs): acc = torch.stack([x['acc'] for x in outputs]).mean() test_loss = torch.stack([x['test_loss'] for x in outputs]).mean() tensorboard_logs = {'test_ce_loss': test_loss, 'test_acc': acc} progress_bar_metrics = tensorboard_logs return {'test_loss': test_loss, 'log': tensorboard_logs, 'progress_bar': progress_bar_metrics}
def forward(self, img, save_path=None, return_prob=False): """Run MTCNN face detection on a PIL image. This method performs both detection and extraction of faces, returning tensors representing detected faces rather than the bounding boxes. To access bounding boxes, see the MTCNN.detect() method below. Arguments: img {PIL.Image} -- A PIL image. Keyword Arguments: save_path {str} -- An optional save path for the cropped image. Note that when self.prewhiten=True, although the returned tensor is prewhitened, the saved face image is not, so it is a true representation of the face in the input image. (default: {None}) return_prob {bool} -- Whether or not to return the detection probability. (default: {False}) Returns: Union[torch.Tensor, tuple(torch.tensor, float)] -- If detected, cropped image of a face with dimensions 3 x image_size x image_size. Optionally, the probability that a face was detected. If self.keep_all is True, n detected faces are returned in an n x 3 x image_size x image_size tensor with an optional list of detection probabilities. Example: >>> from facenet_pytorch import MTCNN >>> mtcnn = MTCNN() >>> face_tensor, prob = mtcnn(img, save_path='face.png', return_prob=True) """ with torch.no_grad(): boxes, probs = self.detect(img) if boxes is None: if return_prob: return None, [None] if self.keep_all else None else: return None if not self.keep_all: boxes = boxes[[0]] faces = [] for i, box in enumerate(boxes): face_path = save_path if save_path is not None and i > 0: save_name, ext = os.path.splitext(save_path) face_path = save_name + '_' + str(i + 1) + ext face = extract_face(img, box, self.image_size, self.margin, face_path) if self.prewhiten: face = prewhiten(face) faces.append(face) if self.keep_all: faces = torch.stack(faces) else: faces = faces[0] probs = probs[0] if return_prob: return faces, probs else: return faces
def collate_fn(examples): pixel_values = torch.stack([example["pixel_values"] for example in examples]) labels = torch.tensor([example["labels"] for example in examples]) return {"pixel_values": pixel_values, "labels": labels}
def get_protein_foreground_probability( self, adata: Optional[AnnData] = None, indices: Optional[Sequence[int]] = None, transform_batch: Optional[Sequence[Union[Number, str]]] = None, protein_list: Optional[Sequence[str]] = None, n_samples: int = 1, batch_size: Optional[int] = None, return_mean: bool = True, return_numpy: Optional[bool] = None, ): r""" Returns the foreground probability for proteins. This is denoted as :math:`(1 - \pi_{nt})` in the totalVI paper. Parameters ---------- adata AnnData object with equivalent structure to initial AnnData. If `None`, defaults to the AnnData object used to initialize the model. indices Indices of cells in adata to use. If `None`, all cells are used. transform_batch Batch to condition on. If transform_batch is: - None, then real observed batch is used - int, then batch transform_batch is used - List[int], then average over batches in list protein_list Return protein expression for a subset of genes. This can save memory when working with large datasets and few genes are of interest. n_samples Number of posterior samples to use for estimation. batch_size Minibatch size for data loading into model. Defaults to `scvi.settings.batch_size`. return_mean Whether to return the mean of the samples. return_numpy Return a :class:`~numpy.ndarray` instead of a :class:`~pandas.DataFrame`. DataFrame includes gene names as columns. If either `n_samples=1` or `return_mean=True`, defaults to `False`. Otherwise, it defaults to `True`. Returns ------- - **foreground_probability** - probability foreground for each protein If `n_samples` > 1 and `return_mean` is False, then the shape is `(samples, cells, genes)`. Otherwise, shape is `(cells, genes)`. In this case, return type is :class:`~pandas.DataFrame` unless `return_numpy` is True. """ adata = self._validate_anndata(adata) post = self._make_scvi_dl(adata=adata, indices=indices, batch_size=batch_size) if protein_list is None: protein_mask = slice(None) else: all_proteins = adata.uns["scvi_protein_names"] protein_mask = [ True if p in protein_list else False for p in all_proteins ] if n_samples > 1 and return_mean is False: if return_numpy is False: logger.warning( "return_numpy must be True if n_samples > 1 and return_mean is False, returning np.ndarray" ) return_numpy = True if indices is None: indices = np.arange(adata.n_obs) py_mixings = [] if not isinstance(transform_batch, IterableClass): transform_batch = [transform_batch] transform_batch = _get_batch_code_from_category(adata, transform_batch) for tensors in post: x = tensors[_CONSTANTS.X_KEY] y = tensors[_CONSTANTS.PROTEIN_EXP_KEY] batch_index = tensors[_CONSTANTS.BATCH_KEY] label = tensors[_CONSTANTS.LABELS_KEY] py_mixing = torch.zeros_like(y[..., protein_mask]) if n_samples > 1: py_mixing = torch.stack(n_samples * [py_mixing]) for b in transform_batch: outputs = self.model.inference( x, y, batch_index=batch_index, label=label, n_samples=n_samples, transform_batch=b, ) py_mixing += torch.sigmoid( outputs["py_"]["mixing"])[..., protein_mask] py_mixing /= len(transform_batch) py_mixings += [py_mixing.cpu()] if n_samples > 1: # concatenate along batch dimension -> result shape = (samples, cells, features) py_mixings = torch.cat(py_mixings, dim=1) # (cells, features, samples) py_mixings = py_mixings.permute(1, 2, 0) else: py_mixings = torch.cat(py_mixings, dim=0) if return_mean is True and n_samples > 1: py_mixings = torch.mean(py_mixings, dim=-1) py_mixings = py_mixings.cpu().numpy() if return_numpy is True: return 1 - py_mixings else: pro_names = self.adata.uns["scvi_protein_names"] foreground_prob = pd.DataFrame( 1 - py_mixings, columns=pro_names[protein_mask], index=adata.obs_names[indices], ) return foreground_prob
def forward(self, in_modalities): umask = in_modalities[-1] in_modalities = in_modalities[:-2] batch_size = in_modalities[0].shape[0] time_stamps = in_modalities[0].shape[1] #Unimodal all_h = [] for modality, dim, lstm, dropout, fc in zip(in_modalities, self.hidden_dims, self.lstms, self.drop_outs, self.fcs): self.h = torch.zeros(batch_size, dim).to(self.device) self.c = torch.zeros(batch_size, dim).to(self.device) h = [] for t in range(time_stamps): #Apply the mask dirrectly on the data input_u = modality[:, t, :] * umask[:, t].unsqueeze(dim=-1) self.h, self.c = lstm(input_u, (self.h, self.c)) self.h = torch.tanh(self.h) self.h = dropout(self.h) h.append(torch.tanh(fc(self.h))) all_h.append(h) #Multimodal utterance_features = [torch.stack(h, dim=-2) for h in all_h] dialogue_utterance_feature = torch.cat(utterance_features, dim=-1) self.h_dialogue = torch.zeros(batch_size, self.dialogue_hidden_dim).to(self.device) self.c_dialogue = torch.zeros(batch_size, self.dialogue_hidden_dim).to(self.device) all_h_dialogue = [] for t in range(time_stamps): input_m = dialogue_utterance_feature[:, t, :] * umask[:, t].unsqueeze( dim=-1) self.h_dialogue, self.c_dialogue = self.dialogue_lstm( input_m, (self.h_dialogue, self.c_dialogue)) self.h_dialogue = self.drop_out(self.h_dialogue) all_h_dialogue.append(torch.tanh(self.fc_out(self.h_dialogue))) output_emo = [self.smax_fc_emo(_h) for _h in all_h_dialogue] output_act = [self.smax_fc_act(_h) for _h in all_h_dialogue] #Stack hidden states output_emo = torch.stack(output_emo, dim=-2) output_act = torch.stack(output_act, dim=-2) log_prob_emo = F.log_softmax(output_emo, 2) # batch, seq_len, n_classes log_prob_act = F.log_softmax(output_act, 2) # batch, seq_len, n_classes return log_prob_emo, log_prob_act
# Add to buffer. instruction_data_cuda = [ torch.tensor(t, dtype=torch.float, device=device) for t in instruction_data ] replay_buffer.append(instruction_data_cuda) # Check for minimum replay size. if len(replay_buffer) < REPLAY_MIN: print('Waiting for minimum buffer size ... {}/{}'.format( len(replay_buffer), REPLAY_MIN)) continue # Sample training mini-batch. sampled_evaluations = replay_buffer.sample(REPLAY_SAMPLE_SIZE) sampled_contexts = torch.stack([t[0] for t in sampled_evaluations]) sampled_states = torch.stack([t[1] for t in sampled_evaluations]) sampled_params = torch.stack([t[2] for t in sampled_evaluations]) sampled_values = torch.stack([t[3] for t in sampled_evaluations]) # Update critic. critic_loss = torch.distributions.Normal(*critic_model(sampled_contexts, sampled_states, sampled_params)) \ .log_prob(sampled_values).mean(dim=-1) critic_model_optimizer.zero_grad() gen_model_optimizer.zero_grad() (-critic_loss).backward() torch.nn.utils.clip_grad_norm_(critic_model.parameters(), 1.0) critic_model_optimizer.step() # Update params model. (macro_actions, macro_actions_entropy) = gen_model.rsample(
def __init__(self, opts): super(CTMNet, self).__init__() self.opts = opts if self.opts.fsl.ctm: # use forward_CTM method self.epsilon = .0001 self.L = 5 self.no_bp_P_L = False self.deactivate_CE = self.opts.ctmnet.deactivate_CE self.use_OT_net = self.opts.ctmnet.use_OT self.pred_source = self.opts.ctmnet.pred_source # 'both' # 'score', 'dist', 'both' self.use_relation_net = self.opts.ctmnet.CE_use_relation self.dnet = self.opts.ctmnet.dnet # dnet or baseline self.dnet_out_c = self.opts.ctmnet.dnet_out_c # define the reshaper try: self.dnet_supp_manner = self.opts.ctmnet.dnet_supp_manner self.mp_mean = self.opts.ctmnet.dnet_mp_mean self.delete_mp = self.opts.ctmnet.dnet_delete_mp self.use_discri_loss = self.opts.ctmnet.use_discri_loss self.discri_random_target = self.opts.ctmnet.discri_random_target self.discri_random_weight = self.opts.ctmnet.discri_random_weight self.discri_test_update = self.opts.ctmnet.discri_test_update self.discri_test_update_fac = self.opts.ctmnet.discri_test_update_fac self.discri_see_weights = self.opts.ctmnet.discri_see_weights self.discri_zz = self.opts.ctmnet.zz except: self.use_discri_loss = False try: self.baseline_manner = self.opts.ctmnet.baseline_manner except: self.baseline_manner = '' else: self.CE_loss = opts.fsl.CE_loss self.swap = opts.fsl.swap if self.swap: self.swap_num = opts.fsl.swap_num _logger = opts.logger _logger('Building up models ...') # feature extractor in_c = 1 if opts.dataset.name == 'omniglot' else 3 print("-----------------CNN ENCODER-----------------") self.repnet = feat_extract(self.opts.model.resnet_pretrain, opts=opts, structure=opts.model.structure, in_c=in_c) input_bs = opts.fsl.n_way[0] * opts.fsl.k_shot[0] random_input = torch.rand(input_bs, in_c, opts.data.im_size, opts.data.im_size) repnet_out = self.repnet(random_input) repnet_sz = repnet_out.size() assert repnet_sz[2] == repnet_sz[3] _logger('\trepnet output sz: {} (assume bs=n_way*k_shot)'.format( repnet_sz)) self.c = repnet_sz[1] # supposed to be 64 self.d = repnet_sz[2] if self.opts.fsl.ctm: if self.use_OT_net: self.inplanes = 4 * self.c self.critic_sup = nn.Sequential( self._make_layer(Bottleneck, 128, 4, stride=1), self._make_layer(Bottleneck, 64, 3, stride=1)) self.inplanes = 4 * self.c self.critic_que = nn.Sequential( self._make_layer(Bottleneck, 128, 4, stride=1), self._make_layer(Bottleneck, 64, 3, stride=1)) _embedding = repnet_out if self.baseline_manner == 'sample_wise_similar': assert self.opts.model.structure == 'shallow' input_c = _embedding.size(1) self.additional_repnet = nn.Sequential( nn.Conv2d(input_c, input_c, kernel_size=3, padding=1), nn.BatchNorm2d(input_c, momentum=1, affine=True), nn.ReLU()) # RESHAPER if not (not self.dnet and self.baseline_manner == 'no_reshaper'): assert np.mod(self.dnet_out_c, 4) == 0 out_size = int(self.dnet_out_c / 4) self.inplanes = _embedding.size(1) if self.opts.model.structure.startswith('resnet'): self.reshaper = nn.Sequential( self._make_layer(Bottleneck, out_size * 2, 3, stride=1), self._make_layer(Bottleneck, out_size, 2, stride=1)) else: print("-----------------RESHAPER-----------------") self.reshaper = self._make_layer(Bottleneck, out_size, 4, stride=1, name="reshaper") _out_downsample = self.reshaper(_embedding) # CONCENTRATOR AND PROJECTOR if self.dnet: if self.mp_mean: ## mp = main_component self.inplanes = _embedding.size(1) else: # concatenate along the channel for all samples in each class self.inplanes = self.opts.fsl.k_shot[0] * _embedding.size( 1) if self.opts.model.structure.startswith('resnet'): self.main_component = nn.Sequential( self._make_layer(Bottleneck, out_size * 2, 3, stride=1), self._make_layer(Bottleneck, out_size, 2, stride=1)) else: print("-----------------CONCENTRATOR-----------------") tmp_inplaces = self.inplanes self.main_component1 = self._make_layer( Bottleneck, out_size, 4, stride=1, name="concentrator2", change_inplanes=True) self.inplanes = tmp_inplaces self.main_component2 = self._make_layer( Bottleneck_k5, out_size, 4, stride=1, name="concentrator1", change_inplanes=True) # projector if self.delete_mp: ## mp = main_component assert self.opts.fsl.k_shot[ 0] == 1 ## of k=1 one shot learning, dont need concentrator del self.main_component # input_c for Projector, no mp self.inplanes = self.opts.fsl.n_way[0] * _embedding.size(1) else: # input_c for Projector, has mp self.inplanes = self.opts.fsl.n_way[0] * out_size * 4 if self.opts.model.structure.startswith('resnet'): self.projection = nn.Sequential( self._make_layer(Bottleneck, out_size * 2, 3, stride=1), self._make_layer(Bottleneck, out_size, 2, stride=1)) else: print("-----------------PROJECTOR-----------------") self.projection = self._make_layer(Bottleneck, out_size, 4, stride=1, name="projector") # deprecated; kept for legacy if self.use_discri_loss: # 40 x 19 x 19 = 14440 input_c = _out_downsample.size(1) * _out_downsample.size( 2) * _out_downsample.size(2) if self.discri_zz: self.disc_fc = nn.ModuleList([ nn.Sequential( nn.Linear(input_c, int(input_c / 8)), nn.BatchNorm1d(int(input_c / 8)), nn.ReLU(), ), MyLinear(int(input_c / 8), 256, True, reset_each_iter=self.discri_random_weight) ]) else: self.disc_fc = nn.ModuleList([ nn.Sequential(nn.Linear(input_c, int(input_c / 8)), nn.BatchNorm1d(int(input_c / 8)), nn.ReLU(), nn.Linear(int(input_c / 8), 256), nn.BatchNorm1d(256), nn.ReLU()), # nn.Linear(int(input_c/8), self.opts.fsl.n_way[0]), MyLinear(256, self.opts.fsl.n_way[0], bias=(not self.discri_random_weight), reset_each_iter=self.discri_random_weight) ]) # RELATION METRIC if self.use_relation_net: # relation sub_net if hasattr(self, 'reshaper'): _input = _out_downsample else: _input = _embedding if self.opts.model.relation_net == 'res_block': # (256); it is "2" because combining two embedding self.inplanes = 2 * _input.size(1) self.relation1 = self._make_layer(Bottleneck, 32, 2, stride=2) self.relation2 = self._make_layer(Bottleneck, 16, 2, stride=1) _combine = torch.stack([_input, _input], dim=1).view(_input.size(0), -1, _input.size(2), _input.size(3)) _out = self.relation2(self.relation1(_combine)) self.fc_input_c = _out.size(1) * _out.size(2) * _out.size( 3) _half = int(self.fc_input_c / 2) self.fc = nn.Sequential(nn.Linear(self.fc_input_c, _half), nn.BatchNorm1d(_half), nn.ReLU(inplace=True), nn.Linear(_half, 1)) elif self.opts.model.relation_net == 'simple': input_c = 2 * _input.size(1) self.relation1 = nn.Sequential( nn.Conv2d(input_c, 64, kernel_size=3, padding=1), nn.BatchNorm2d(64, momentum=1, affine=True), nn.ReLU(), nn.MaxPool2d(2)) self.relation2 = nn.Sequential( nn.Conv2d(64, 64, kernel_size=3, padding=1), nn.BatchNorm2d(64, momentum=1, affine=True), nn.ReLU(), # nn.MaxPool2d(2) ) _combine = torch.stack([_input, _input], dim=1).view(_input.size(0), -1, _input.size(2), _input.size(3)) _out = self.relation2(self.relation1(_combine)) self.fc_input_c = _out.size(1) * _out.size(2) * _out.size( 3) _half = int(self.fc_input_c / 2) self.fc = nn.Sequential( nn.Linear(self.fc_input_c, _half), nn.ReLU(), nn.Linear(_half, 1), # except no sigmoid since we use CE ) else: # the original relation network self.inplanes = 2 * self.c # the original network in the relation net # after the relation module (three layers) self.relation1 = self._make_layer(Bottleneck, 128, 4, stride=2) self.relation2 = self._make_layer(Bottleneck, 64, 3, stride=2) if self.CE_loss: self.fc = nn.Sequential(nn.Linear(256, 64), nn.BatchNorm1d(64), nn.ReLU(inplace=True), nn.Linear(64, 1)) else: self.fc = nn.Sequential( nn.Linear(256, 64), nn.BatchNorm1d(64), nn.ReLU(inplace=True), nn.Linear(64, 1), nn.Sigmoid() # the only difference ) combine = torch.stack([repnet_out, repnet_out], dim=1).view(repnet_out.size(0), -1, repnet_out.size(2), repnet_out.size(3)) out = self.relation2(self.relation1(combine)) _logger('\tafter layer5 sz: {} (assume bs=2)\n'.format(out.size())) self.pool_size = out.size(2) self._initialize_weights()