def decode(dataloader: torch.utils.data.DataLoader, model: AcousticModel, device: Union[str, torch.device], HLG: Fsa, symbols: SymbolTable): tot_num_cuts = len(dataloader.dataset.cuts) num_cuts = 0 results = [] # a list of pair (ref_words, hyp_words) for batch_idx, batch in enumerate(dataloader): feature = batch['inputs'] supervisions = batch['supervisions'] supervision_segments = torch.stack( (supervisions['sequence_idx'], torch.floor_divide(supervisions['start_frame'], model.subsampling_factor), torch.floor_divide(supervisions['num_frames'], model.subsampling_factor)), 1).to(torch.int32) indices = torch.argsort(supervision_segments[:, 2], descending=True) supervision_segments = supervision_segments[indices] texts = supervisions['text'] assert feature.ndim == 3 feature = feature.to(device) # at entry, feature is [N, T, C] feature = feature.permute(0, 2, 1) # now feature is [N, C, T] with torch.no_grad(): nnet_output = model(feature) # nnet_output is [N, C, T] nnet_output = nnet_output.permute(0, 2, 1) # now nnet_output is [N, T, C] # blank_bias = -3.0 # nnet_output[:, :, 0] += blank_bias dense_fsa_vec = k2.DenseFsaVec(nnet_output, supervision_segments) # assert LG.is_cuda() assert HLG.device == nnet_output.device, \ f"Check failed: LG.device ({HLG.device}) == nnet_output.device ({nnet_output.device})" # TODO(haowen): with a small `beam`, we may get empty `target_graph`, # thus `tot_scores` will be `inf`. Definitely we need to handle this later. lattices = k2.intersect_dense_pruned(HLG, dense_fsa_vec, 20.0, 7.0, 30, 10000) # lattices = k2.intersect_dense(LG, dense_fsa_vec, 10.0) best_paths = k2.shortest_path(lattices, use_double_scores=True) assert best_paths.shape[0] == len(texts) hyps = get_texts(best_paths, indices) assert len(hyps) == len(texts) for i in range(len(texts)): hyp_words = [symbols.get(x) for x in hyps[i]] ref_words = texts[i].split(' ') results.append((ref_words, hyp_words)) if batch_idx % 10 == 0: logging.info( 'batch {}, cuts processed until now is {}/{} ({:.6f}%)'.format( batch_idx, num_cuts, tot_num_cuts, float(num_cuts) / tot_num_cuts * 100)) num_cuts += len(texts) return results
def beam_search(self, h, final_sequence, i, k, max_len, mask, probas): if i<max_len: i += 1 y, h = self.forward(final_sequence[-1].unsqueeze(0), h) y = y.squeeze() h = h.squeeze() y = torch.nn.functional.softmax(y, 1) y_flat = (probas*y.permute(1,0)).permute(1,0).flatten(0) values, indexes = torch.topk(y_flat, k) probas = probas*values/sum(probas*values) probas[torch.nonzero((mask == 0))] = 1000000 words = torch.fmod(indexes, self.vocab_dim)*mask mask = mask*(words != 1)#.long() #1 is eos h_new = h[torch.floor_divide(indexes, self.vocab_dim)] final_sequence = final_sequence.permute(1,0)[torch.floor_divide(indexes, self.vocab_dim)].permute(1,0) final_sequence = torch.cat((final_sequence, words.unsqueeze(0))) return self.beam_search(h_new.unsqueeze(0), final_sequence, i, k, max_len, mask, probas) else : return final_sequence
def encode_supervisions(supervisions: Dict[str, torch.Tensor], subsampling_factor) -> Tuple[torch.Tensor, List[str]]: """ Encodes Lhotse's ``batch["supervisions"]`` dict into a pair of torch Tensor, and a list of transcription strings. The supervision tensor has shape ``(batch_size, 3)``. Its second dimension contains information about sequence index [0], start frames [1] and num frames [2]. The batch items might become re-ordered during this operation -- the returned tensor and list of strings are guaranteed to be consistent with each other. This mimics subsampling by a factor of 4 with Conv1D layer with no padding. """ supervision_segments = torch.stack( (supervisions['sequence_idx'], torch.floor_divide(supervisions['start_frame'], subsampling_factor), torch.floor_divide(supervisions['num_frames'], subsampling_factor)), 1).to(torch.int32) supervision_segments = torch.clamp(supervision_segments, min=0) indices = torch.argsort(supervision_segments[:, 2], descending=True) supervision_segments = supervision_segments[indices] texts = supervisions['text'] texts = [texts[idx] for idx in indices] return supervision_segments, texts
def step(self, step, lprobs, scores): super()._init_buffers(lprobs) bsz, beam_size, vocab_size = lprobs.size() if step == 0: # at the first step all hypotheses are equally likely, so use # only the first beam lprobs = lprobs[:, ::beam_size, :].contiguous() else: # make probs contain cumulative scores for each hypothesis lprobs.add_(scores[:, :, step - 1].unsqueeze(-1)) torch.topk( lprobs.view(bsz, -1), k=min( # Take the best 2 x beam_size predictions. We'll choose the first # beam_size of these which don't predict eos to continue with. beam_size * 2, lprobs.view(bsz, -1).size(1) - 1, # -1 so we never select pad ), out=(self.scores_buf, self.indices_buf), ) torch.floor_divide(self.indices_buf, vocab_size, out=self.beams_buf) self.indices_buf.fmod_(vocab_size) return self.scores_buf, self.indices_buf, self.beams_buf
def create_mesh( decoder, filename, N=256, max_batch=64 ** 3, offset=None, scale=None ): start = time.time() ply_filename = filename decoder.eval() # NOTE: the voxel_origin is actually the (bottom, left, down) corner, not the middle voxel_origin = [-1, -1, -1] voxel_size = 2.0 / (N - 1) overall_index = torch.arange(0, N ** 3, 1, out=torch.LongTensor()) samples = torch.zeros(N ** 3, 4) # transform first 3 columns # to be the x, y, z index samples[:, 2] = overall_index % N samples[:, 1] = torch.floor_divide(overall_index.long(), N) % N samples[:, 0] = torch.floor_divide(torch.floor_divide(overall_index.long(), N), N) % N # transform first 3 columns # to be the x, y, z coordinate samples[:, 0] = (samples[:, 0] * voxel_size) + voxel_origin[2] samples[:, 1] = (samples[:, 1] * voxel_size) + voxel_origin[1] samples[:, 2] = (samples[:, 2] * voxel_size) + voxel_origin[0] num_samples = N ** 3 samples.requires_grad = False head = 0 while head < num_samples: print(head) sample_subset = samples[head : min(head + max_batch, num_samples), 0:3].cuda() samples[head : min(head + max_batch, num_samples), 3] = ( decoder(sample_subset) .squeeze()#.squeeze(1) .detach() .cpu() ) head += max_batch sdf_values = samples[:, 3] sdf_values = sdf_values.reshape(N, N, N) end = time.time() print("sampling takes: %f" % (end - start)) convert_sdf_samples_to_ply( sdf_values.data.cpu(), voxel_origin, voxel_size, ply_filename + ".ply", offset, scale, )
def forward(self, x, nframes=None): """ If nframes is provided, remove padded parts from quant_losses, flat_inputs and flat_onehots. This is useful for training, when EMA only requires pre-quantized inputs and assigned indices. Note that jitter() is only applied after VQ-{2,3}. Args: x (torch.Tensor): Spectral feature batch of shape (B, C, F, T) or (B, F, T). nframes (torch.Tensor): Number of frames for each utterance. Shape is (B,) """ quant_losses = [None] * 5 # quantization losses by layer flat_inputs = [None] * 5 # flattened pre-quantized inputs by layer flat_onehots = [None] * 5 # flattened one-hot codes by layer if x.dim() == 3: x = x.unsqueeze(1) L = x.size(-1) cur_nframes = None x = self.relu(self.bn1(self.conv1(x))) if nframes is not None: cur_nframes = torch.floor_divide(nframes, round(L / x.size(-1))) (quant_losses[0], x, flat_inputs[0], flat_onehots[0]) = self.maybe_quantize(x, 0, cur_nframes) x = self.maybe_jitter(x) x = self.layer1(x) if nframes is not None: cur_nframes = torch.floor_divide(nframes, round(L / x.size(-1))) (quant_losses[1], x, flat_inputs[1], flat_onehots[1]) = self.maybe_quantize(x, 1, cur_nframes) x = self.maybe_jitter(x) x = self.layer2(x) if nframes is not None: cur_nframes = torch.floor_divide(nframes, round(L / x.size(-1))) (quant_losses[2], x, flat_inputs[2], flat_onehots[2]) = self.maybe_quantize(x, 2, cur_nframes) x = self.layer3(x) if nframes is not None: cur_nframes = torch.floor_divide(nframes, round(L / x.size(-1))) (quant_losses[3], x, flat_inputs[3], flat_onehots[3]) = self.maybe_quantize(x, 3, cur_nframes) x = self.layer4(x) if nframes is not None: cur_nframes = torch.floor_divide(nframes, round(L / x.size(-1))) (quant_losses[4], x, flat_inputs[4], flat_onehots[4]) = self.maybe_quantize(x, 4, cur_nframes) return x, quant_losses, flat_inputs, flat_onehots
def get_conv_output_lengths(self, input_lengths, axis=1): seq_len = input_lengths for m in self.conv.modules(): if type(m) == nn.Conv2d: seq_len = torch.floor_divide( (seq_len + 2 * m.padding[axis] - m.dilation[axis] * (m.kernel_size[axis] - 1) - 1), m.stride[axis]) + 1 elif type(m) == nn.MaxPool2d: seq_len = torch.floor_divide( (seq_len + 2 * m.padding - m.dilation * (m.kernel_size - 1) - 1), m.stride) + 1 return seq_len
def decode(dataloader: torch.utils.data.DataLoader, model: AcousticModel, device: Union[str, torch.device], LG: Fsa, symbols: SymbolTable): results = [] # a list of pair (ref_words, hyp_words) for batch_idx, batch in enumerate(dataloader): feature = batch['features'] supervisions = batch['supervisions'] supervision_segments = torch.stack( (supervisions['sequence_idx'], torch.floor_divide(supervisions['start_frame'], model.subsampling_factor), torch.floor_divide(supervisions['num_frames'], model.subsampling_factor)), 1).to(torch.int32) texts = supervisions['text'] assert feature.ndim == 3 feature = feature.to(device) # at entry, feature is [N, T, C] feature = feature.permute(0, 2, 1) # now feature is [N, C, T] with torch.no_grad(): nnet_output = model(feature) # nnet_output is [N, C, T] nnet_output = nnet_output.permute(0, 2, 1) # now nnet_output is [N, T, C] dense_fsa_vec = k2.DenseFsaVec(nnet_output, supervision_segments) assert LG.is_cuda() assert LG.device == nnet_output.device, \ f"Check failed: LG.device ({LG.device}) == nnet_output.device ({nnet_output.device})" # TODO(haowen): with a small `beam`, we may get empty `target_graph`, # thus `tot_scores` will be `inf`. Definitely we need to handle this later. lattices = k2.intersect_dense_pruned(LG, dense_fsa_vec, 2000.0, 20.0, 30, 300) best_paths = k2.shortest_path(lattices, use_float_scores=True) best_paths = best_paths.to('cpu') assert best_paths.shape[0] == len(texts) for i in range(len(texts)): hyp_words = [ symbols.get(x) for x in best_paths[i].aux_labels if x > 0 ] results.append((texts[i].split(' '), hyp_words)) if batch_idx % 10 == 0: logging.info('Processed batch {}/{} ({:.6f}%)'.format( batch_idx, len(dataloader), float(batch_idx) / len(dataloader) * 100)) return results
def create_coord_from_det(c, f): coord = zeros((f.shape[0], 3), dtype=int32) seg = zeros(c.shape, dtype=c.dtype) floor_divide(c, 2, out=seg) coord[:, 0] = seg % 14 coord[:, 1] = floor_divide(seg, 14) features = zeros((f.shape[0], f.shape[1] * 2), dtype=f.dtype) n_samp = f.shape[1] for i in range(coord.shape[0]): coord[i, 2] = i if c[i] % 2 == 0: features[i, 0:n_samp] = f[i] else: features[i, n_samp:] = f[i] return coord, features
def forward(self, x): C = torch.floor_divide(x.shape[1], 4) filters = torch.cat([ self.weight, ] * C, dim=0) y = F.conv_transpose2d(x, filters, groups=C, stride=2) return y
def forward(self, input_ids, attention_mask, token_type_ids, labels, eval_type="train"): batch_size = input_ids.size(0) num_slots = self.num_slots # encoder, a pretrained model, output is a tuple sequence_output = self.encoder(input_ids, attention_mask, token_type_ids)[0] # decoder loss, loss_slot, pred_slot = self.decoder(sequence_output, attention_mask, labels, self.slot_lookup, self.value_lookup, eval_type) # calculate accuracy accuracy = pred_slot == labels acc_slot = torch.true_divide( torch.sum(accuracy, 0).float(), batch_size).cpu().detach().numpy() # slot accuracy acc = torch.sum(torch.floor_divide( torch.sum(accuracy, 1), num_slots)).float().item() / batch_size # joint accuracy return loss, loss_slot, acc, acc_slot, pred_slot
def advance(self, word_prob): "Update beam status and check if finished or not." num_words = word_prob.size(1) # Sum the previous scores. if len(self.prev_ks) > 0: beam_lk = word_prob + self.scores.unsqueeze(1).expand_as(word_prob) else: beam_lk = word_prob[0] flat_beam_lk = beam_lk.view(-1) best_scores, best_scores_id = flat_beam_lk.topk( self.size, 0, True, True) # 1st sort best_scores, best_scores_id = flat_beam_lk.topk( self.size, 0, True, True) # 2nd sort self.all_scores.append(self.scores) self.scores = best_scores # bestScoresId is flattened as a (beam x word) array, # so we need to calculate which word and beam each score came from prev_k = torch.floor_divide(best_scores_id, num_words) self.prev_ks.append(prev_k) self.next_ys.append(best_scores_id - prev_k * num_words) # End condition is when top-of-beam is EOS. if self.next_ys[-1][0].item() == config.EOS_idx: self._done = True self.all_scores.append(self.scores) return self._done
def get_kp_torch_batch(self, pred, conf, topk=100): b, c, h, w = pred.shape pred = pred.contiguous().view(-1) pred[pred < conf] = 0 score, topk_idx = torch.topk(pred, k=topk) batch = torch.floor_divide(topk_idx, (h * w * c)) cls = torch.floor_divide((topk_idx - batch * h * w * c), (h * w)) channel = (topk_idx - batch * h * w * c) - (cls * h * w) x = channel % w y = torch.floor_divide(channel, w) return x.view(-1), y.view(-1), cls.view(-1), batch.view(-1)
def train(model, iterator, optimizer, criterion): epoch_loss = 0 epoch_acc = 0 model.train() for batch in tqdm(iterator): bayes_loss = [] optimizer.zero_grad() # 循环50次,最大似然损失 for i in range(N_CIRCLE): # predictions = model(batch.text, batch.gate) bayes_loss.append( torch.gather(predictions, 1, batch.label.long().unsqueeze(-1))) possible = torch.cat(tuple([x for x in bayes_loss]), 1) possible_max = torch.max(possible, dim=1).values loss = sum(1 - possible_max) acc_num = sum(torch.gt(possible_max, 0.5)) acc = torch.floor_divide(acc_num, len(possible_max)) loss.backward() optimizer.step() epoch_loss += loss.item() epoch_acc += acc.item() return epoch_loss / len(iterator), epoch_acc / len(iterator)
def hann2d_clipped(sz: torch.Tensor, effective_sz: torch.Tensor, centered=True) -> torch.Tensor: """1D clipped cosine window.""" # Ensure that the difference is even effective_sz += (effective_sz - sz) % 2 effective_window = hann1d(effective_sz[0].item(), True).reshape( 1, 1, -1, 1) * hann1d(effective_sz[1].item(), True).reshape( 1, 1, 1, -1) pad = torch.floor_divide(sz - effective_sz, 2) window = F.pad( effective_window, (pad[1].item(), pad[1].item(), pad[0].item(), pad[0].item()), 'replicate') if centered: return window else: mid = (sz / 2).int() window_shift_lr = torch.cat( (window[:, :, :, mid[1]:], window[:, :, :, :mid[1]]), 3) return torch.cat((window_shift_lr[:, :, mid[0]:, :], window_shift_lr[:, :, :mid[0], :]), 2)
def select_actions(self, policy, sum_log_probs, mask, infer_type='stochastic'): beam_size, seq_size = policy.size() nzn = torch.nonzero(mask, as_tuple=False).shape[0] sample_size = min(nzn, self.beam_size) ourlogzero = sys.float_info.min lpolicy = policy.masked_fill(mask == 0, ourlogzero).log() npolicy = sum_log_probs.unsqueeze(1) + lpolicy if infer_type == 'stochastic': nnpolicy = npolicy.exp().masked_fill(mask == 0, 0).view(1, -1) m = Categorical(nnpolicy) gact_ind = torch.multinomial(nnpolicy, sample_size) log_select = m.log_prob(gact_ind) elif infer_type == 'greedy': nnpolicy = npolicy.exp().masked_fill(mask == 0, 0).view(1, -1) _, gact_ind = nnpolicy.topk(sample_size, dim=1) prob = policy.view(-1)[gact_ind] log_select = prob.log() beam_id = torch.floor_divide(gact_ind, seq_size).squeeze(0) act_ind = torch.fmod(gact_ind, seq_size) return act_ind, log_select, beam_id
def step(self, step: int, lprobs, scores: Optional[Tensor]): bsz, beam_size, vocab_size = lprobs.size() if step == 0: # at the first step all hypotheses are equally likely, so use # only the first beam lprobs = lprobs[:, ::beam_size, :].contiguous() else: # make probs contain cumulative scores for each hypothesis assert scores is not None lprobs = lprobs + scores[:, :, step - 1].unsqueeze(-1) top_prediction = torch.topk( lprobs.view(bsz, -1), k=min( # Take the best 2 x beam_size predictions. We'll choose the first # beam_size of these which don't predict eos to continue with. beam_size * 2, lprobs.view(bsz, -1).size(1) - 1, # -1 so we never select pad ), ) scores_buf = top_prediction[0] indices_buf = top_prediction[1] if torch.__version__ < '1.6.0': beams_buf = torch.div(indices_buf, vocab_size) else: beams_buf = torch.floor_divide(indices_buf, vocab_size) indices_buf = indices_buf.fmod(vocab_size) return scores_buf, indices_buf, beams_buf
def advance(self, workd_lk): """Advance the beam.""" num_words = workd_lk.size(1) # Sum the previous scores. if len(self.prevKs) > 0: beam_lk = workd_lk + self.scores.unsqueeze(1).expand_as(workd_lk) else: beam_lk = workd_lk[0] flat_beam_lk = beam_lk.view(-1) bestScores, bestScoresId = flat_beam_lk.topk(self.size, 0, True, True) self.scores = bestScores # bestScoresId is flattened beam x word array, so calculate which # word and beam each score came from prev_k = torch.floor_divide(bestScoresId, num_words) # note: double check here self.prevKs.append(prev_k) self.nextYs.append(bestScoresId - prev_k * num_words) # End condition is when top-of-beam is EOS. if self.nextYs[-1][0] == self.eos: self.done = True return self.done
def backward(ctx, grad_output): """ In the backward pass we receive a Tensor containing the gradient of the loss with respect to the output, and we need to compute the gradient of the loss with respect to the input. """ x, y = ctx.saved_variables return grad_output * 1, grad_output * torch.neg(torch.floor_divide(x, y))
def _topk(scores, K=40): batch, cat, height, width = scores.size() topk_scores, topk_inds = torch.topk(scores.view(batch, cat, -1), K) topk_inds = topk_inds % (height * width) topk_ys = (torch.floor_divide(topk_inds, width)).float() topk_xs = (topk_inds % width).int().float() topk_score, topk_ind = torch.topk(topk_scores.view(batch, -1), K) topk_clses = (torch.floor_divide(topk_ind, K)).int() topk_inds = _gather_feat(topk_inds.view(batch, -1, 1), topk_ind).view(batch, K) topk_ys = _gather_feat(topk_ys.view(batch, -1, 1), topk_ind).view(batch, K) topk_xs = _gather_feat(topk_xs.view(batch, -1, 1), topk_ind).view(batch, K) return topk_score, topk_inds, topk_clses, topk_ys, topk_xs
def getmaxn(tensor, n): tlen = tensor.shape[-1] tensor = tensor.reshape(-1) idx = tensor.argsort(descending=True)[:n] value = tensor[idx] maxn = torch.cat( (torch.true_divide(idx, tlen).float().unsqueeze(0), (torch.floor_divide(idx, tlen)).float().unsqueeze(0), value.unsqueeze(0)), dim=0)
def get_seg_target(self, seg_scores, gt_bboxes, device): strides=[4, 8, 16, 32, 64] batch_size = len(gt_bboxes) feat_sizes = [each.size()[-2:] for each in seg_scores] seg_labels_list = [] seg_weights_list = [] for si, stride in enumerate(strides): seg_labels = torch.ones((batch_size, feat_sizes[si][0], feat_sizes[si][1]), dtype=torch.int64, device=device) seg_weights = torch.zeros((batch_size, feat_sizes[si][0], feat_sizes[si][1]), dtype=torch.float32, device=device) for pi, bbox_per in enumerate(gt_bboxes): bbox_per_down = (bbox_per / stride).type(torch.int32) for bi in range(bbox_per.size()[0]): x_min, y_min, x_max, y_max = bbox_per_down[bi] x_mid = torch.floor_divide((x_max + x_min), 2).type(torch.int32) y_mid = torch.floor_divide((y_max + y_min), 2).type(torch.int32) choice = random.randint(1, 8) if choice == 1: seg_weights[pi, y_mid:y_max, x_min:x_max] = 1 elif choice == 2: seg_weights[pi, y_min:y_mid, x_min:x_max] = 1 elif choice == 3: seg_weights[pi, y_min:y_max, x_mid:x_max] = 1 elif choice == 4: seg_weights[pi, y_min:y_max, x_min:x_mid] = 1 elif choice == 5: x_mid_l = torch.floor_divide((x_min + x_mid), 2).type(torch.int32) x_mid_r = torch.floor_divide((x_mid + x_max), 2).type(torch.int32) y_mid_t = torch.floor_divide((y_min + y_mid), 2).type(torch.int32) y_mid_b = torch.floor_divide((y_mid + y_max), 2).type(torch.int32) seg_weights[pi, y_min:y_max, x_min:x_max] = 1 seg_weights[pi, y_mid_t:y_mid_b,x_mid_l:x_mid_r] = 0 elif choice == 6: x_mid_l = torch.floor_divide((x_min + x_mid), 2).type(torch.int32) x_mid_r = torch.floor_divide((x_mid + x_max), 2).type(torch.int32) y_mid_t = torch.floor_divide((y_min + y_mid), 2).type(torch.int32) y_mid_b = torch.floor_divide((y_mid + y_max), 2).type(torch.int32) seg_weights[pi, y_mid_t:y_mid_b,x_mid_l:x_mid_r] = 1 seg_labels[pi, y_min:y_max, x_min:x_max] = 0 seg_labels_list.append(seg_labels.reshape(batch_size, -1)) seg_weights_list.append(seg_weights.reshape(batch_size, -1)) return seg_labels_list, seg_weights_list
def floor_divide(input_, other): """Wrapper of `torch.floor_divide`. Parameters ---------- input_ : DTensor The first operand. other : DTensor The second operand. """ return torch.floor_divide(input_._data, other._data)
def hm2box(heatmap, offset, wh, scale_factor=4, topk=10, conf_th=0.3, normalized=False): height, width = heatmap.shape[-2:] max_pool = torch.nn.MaxPool2d(3, stride=1, padding=3//2) isPeak = max_pool(heatmap) == heatmap peakmap = heatmap * isPeak scores, indices = peakmap.flatten().topk(topk) clss = torch.floor_divide(indices, (height*width)) inds = torch.fmod(indices, (height*width)) yinds = torch.floor_divide(inds, width) xinds = torch.fmod(inds, width) xoffs = offset[0, yinds, xinds] xsizs = wh[0, yinds, xinds] yoffs = offset[1, yinds, xinds] ysizs = wh[1, yinds, xinds] if normalized: xoffs = xoffs * scale_factor yoffs = yoffs * scale_factor xsizs = xsizs * width ysizs = ysizs * height xmin = (xinds + xoffs - xsizs/2) * scale_factor ymin = (yinds + yoffs - ysizs/2) * scale_factor xmax = (xinds + xoffs + xsizs/2) * scale_factor ymax = (yinds + yoffs + ysizs/2) * scale_factor boxes = torch.stack([xmin, ymin, xmax, ymax], dim=1) # Tensor: topk x 4 # confidence thresholding over_threshold = scores >= conf_th return boxes[over_threshold], clss[over_threshold], scores[over_threshold]
def get_seq_lens(self, input_length): """ Given a 1D Tensor or Variable containing integer sequence lengths, return a 1D tensor or variable containing the size sequences that will be output by the network. :param input_length: 1D Tensor :return: 1D Tensor scaled by model """ seq_len = input_length for m in self.conv.modules(): if type(m) == nn.modules.conv.Conv2d: seq_len = torch.floor_divide( (seq_len + 2 * m.padding[1] - m.dilation[1] * (m.kernel_size[1] - 1) - 1), m.stride[1]) + 1 return seq_len.int()
def ind2sub(ind, shape, out=None): """Convert linear indices into sub indices (i, j, k). The rightmost dimension is the most rapidly changing one -> if shape == [D, H, W], the strides are therefore [H*W, W, 1] Parameters ---------- ind : tensor_like Linear indices shape : (D,) vector_like Size of each dimension. out : tensor, optional Output placeholder Returns ------- subs : (D, ...) tensor Sub-indices. """ ind = torch.as_tensor(ind) bck = backend(ind) stride = py.cumprod(shape, reverse=True, exclusive=True) stride = torch.as_tensor(stride, **bck) if out is None: sub = ind.new_empty([len(shape), *ind.shape]) else: sub = out.reshape([len(shape), *ind.shape]) sub[:, ...] = ind for d in range(len(shape)): if d > 0: torch.remainder(sub[d], torch.as_tensor(stride[d - 1], **bck), out=sub[d]) torch.floor_divide(sub[d], stride[d], out=sub[d]) return sub
def score(self, test_set, classifier=None, wrapper=None): w = self.train() test_features = self.preprocess(test_set) test_labels = test_set["clase"].values if wrapper: if classifier == 'glvq': dist = cdist(test_features, w, 'sqeuclidean') elif classifier == 'kglvq': dist = kernel_distance( torch.from_numpy( rbf_kernel(test_features, gamma=self.sigma)), torch.from_numpy( rbf_kernel(test_features, self.train_features, gamma=self.sigma)), torch.from_numpy( rbf_kernel(self.train_features, gamma=self.sigma)), torch.from_numpy(test_features), torch.from_numpy(w)).numpy() else: raise ValueError("Invalid classifier") test_acc = np.sum( test_labels == np.floor_divide(dist.argmin(1), self.ppc)) return test_acc / len(test_labels) test_features = torch.from_numpy(test_features) test_labels = torch.from_numpy(test_labels) if torch.cuda.is_available(): device = torch.device("cuda:0") else: device = torch.device("cpu") w.to(device) testloader = torch.utils.data.DataLoader(TensorDataset( test_features, test_labels), batch_size=16, num_workers=0) test_acc = torch.tensor(0) with torch.no_grad(): w.eval() for inputs, targets in testloader: inputs, targets = inputs.to(device), targets.to(device) distances, plabels = w(inputs) _, prediction = torch.min(distances, 1) prediction = torch.floor_divide(prediction, self.ppc) test_acc = test_acc + torch.sum(prediction == targets) return test_acc.item() / len(test_labels)
def advance(self, wordLk): """ Given prob over words for every last beam `wordLk` and attention `attnOut`: Compute and update the beam search. Parameters: * `wordLk`- probs of advancing from the last step (K x words) * `attnOut`- attention at the last step Returns: True if beam search is complete. """ numWords = wordLk.size(1) # Sum the previous scores. if len(self.prevKs) > 0: beamLk = wordLk + self.scores.unsqueeze(1).expand_as(wordLk) # Don't let EOS have children. for i in range(self.nextYs[-1].size(0)): if self.nextYs[-1][i] == self._eos: beamLk[i] = -1e20 else: beamLk = wordLk[0] flatBeamLk = beamLk.view(-1) bestScores, bestScoresId = flatBeamLk.topk(self.size, 0, True, True) self.scores = bestScores # bestScoresId is flattened beam x word array, so calculate which # word and beam each score came from #prevK = bestScoresId / numWords #prevK = torch.true_divide(bestScoresId, numWords) prevK = torch.floor_divide(bestScoresId, numWords) self.prevKs.append(prevK) self.nextYs.append((bestScoresId - prevK * numWords)) for i in range(self.nextYs[-1].size(0)): if self.nextYs[-1][i] == self._eos: s = self.scores[i] self.finished.append((s, len(self.nextYs) - 1, i)) # End condition is when top-of-beam is EOS and no global score. if self.nextYs[-1][0] == self._eos: self.eosTop = True
def floor_divide(a: NdarrayOrTensor, b) -> NdarrayOrTensor: """`np.floor_divide` with equivalent implementation for torch. As of pt1.8, use `torch.div(..., rounding_mode="floor")`, and before that, use `torch.floor_divide`. Args: a: first array/tensor b: scalar to divide by Returns: Element-wise floor division between two arrays/tensors. """ if isinstance(a, torch.Tensor): if is_module_ver_at_least(torch, (1, 8, 0)): return torch.div(a, b, rounding_mode="floor") return torch.floor_divide(a, b) return np.floor_divide(a, b)
def advance(self, decoder_output): vocab_size = decoder_output.size(1) beam_scores = decoder_output + self.cur_scores.unsqueeze(1).expand_as( decoder_output) flat_beam_scores = beam_scores.view(-1) # cur_rows_with_eos = self.rows_with_eos.unsqueeze(1).expand_as(decoder_output).view(-1) # flat_beam_scores = torch.where(cur_rows_with_eos == 1, torch.zeros_like(flat_beam_scores), flat_beam_scores) best_scores, best_score_ids = flat_beam_scores.data.topk( self.beam_size) self.cur_scores = best_scores previous_idxs = torch.floor_divide(best_score_ids, vocab_size) self.previous_idx_history.append(previous_idxs) self.states.append(best_score_ids - previous_idxs * vocab_size) if self.states[-1][0] == self.special_tokens['<EOS>']: return True return False