def decode(dataloader: torch.utils.data.DataLoader, model: AcousticModel,
           device: Union[str, torch.device], HLG: Fsa, symbols: SymbolTable):
    tot_num_cuts = len(dataloader.dataset.cuts)
    num_cuts = 0
    results = []  # a list of pair (ref_words, hyp_words)
    for batch_idx, batch in enumerate(dataloader):
        feature = batch['inputs']
        supervisions = batch['supervisions']
        supervision_segments = torch.stack(
            (supervisions['sequence_idx'],
             torch.floor_divide(supervisions['start_frame'],
                                model.subsampling_factor),
             torch.floor_divide(supervisions['num_frames'],
                                model.subsampling_factor)), 1).to(torch.int32)
        indices = torch.argsort(supervision_segments[:, 2], descending=True)
        supervision_segments = supervision_segments[indices]
        texts = supervisions['text']
        assert feature.ndim == 3

        feature = feature.to(device)
        # at entry, feature is [N, T, C]
        feature = feature.permute(0, 2, 1)  # now feature is [N, C, T]
        with torch.no_grad():
            nnet_output = model(feature)
        # nnet_output is [N, C, T]
        nnet_output = nnet_output.permute(0, 2,
                                          1)  # now nnet_output is [N, T, C]

        #  blank_bias = -3.0
        #  nnet_output[:, :, 0] += blank_bias

        dense_fsa_vec = k2.DenseFsaVec(nnet_output, supervision_segments)
        # assert LG.is_cuda()
        assert HLG.device == nnet_output.device, \
            f"Check failed: LG.device ({HLG.device}) == nnet_output.device ({nnet_output.device})"
        # TODO(haowen): with a small `beam`, we may get empty `target_graph`,
        # thus `tot_scores` will be `inf`. Definitely we need to handle this later.
        lattices = k2.intersect_dense_pruned(HLG, dense_fsa_vec, 20.0, 7.0, 30,
                                             10000)

        # lattices = k2.intersect_dense(LG, dense_fsa_vec, 10.0)
        best_paths = k2.shortest_path(lattices, use_double_scores=True)
        assert best_paths.shape[0] == len(texts)
        hyps = get_texts(best_paths, indices)
        assert len(hyps) == len(texts)

        for i in range(len(texts)):
            hyp_words = [symbols.get(x) for x in hyps[i]]
            ref_words = texts[i].split(' ')
            results.append((ref_words, hyp_words))

        if batch_idx % 10 == 0:
            logging.info(
                'batch {}, cuts processed until now is {}/{} ({:.6f}%)'.format(
                    batch_idx, num_cuts, tot_num_cuts,
                    float(num_cuts) / tot_num_cuts * 100))

        num_cuts += len(texts)

    return results
Beispiel #2
0
    def beam_search(self, h, final_sequence, i, k, max_len, mask, probas):

        if i<max_len:
            i += 1

            y, h = self.forward(final_sequence[-1].unsqueeze(0), h)
            y = y.squeeze()
            h = h.squeeze()
            y = torch.nn.functional.softmax(y, 1)
            y_flat = (probas*y.permute(1,0)).permute(1,0).flatten(0)
            values, indexes = torch.topk(y_flat, k)
            probas = probas*values/sum(probas*values)
            probas[torch.nonzero((mask == 0))] = 1000000

            words = torch.fmod(indexes, self.vocab_dim)*mask

            mask = mask*(words != 1)#.long() #1 is eos

            h_new = h[torch.floor_divide(indexes, self.vocab_dim)]
            final_sequence = final_sequence.permute(1,0)[torch.floor_divide(indexes, self.vocab_dim)].permute(1,0)

            final_sequence = torch.cat((final_sequence, words.unsqueeze(0)))

            return self.beam_search(h_new.unsqueeze(0), final_sequence, i, k, max_len, mask, probas)

        else :

            return final_sequence
Beispiel #3
0
def encode_supervisions(supervisions: Dict[str, torch.Tensor],
                        subsampling_factor) -> Tuple[torch.Tensor, List[str]]:
    """
    Encodes Lhotse's ``batch["supervisions"]`` dict into a pair of torch Tensor,
    and a list of transcription strings.

    The supervision tensor has shape ``(batch_size, 3)``.
    Its second dimension contains information about sequence index [0],
    start frames [1] and num frames [2].

    The batch items might become re-ordered during this operation -- the returned tensor
    and list of strings are guaranteed to be consistent with each other.

    This mimics subsampling by a factor of 4 with Conv1D layer with no padding.
    """
    supervision_segments = torch.stack(
        (supervisions['sequence_idx'],
         torch.floor_divide(supervisions['start_frame'], subsampling_factor),
         torch.floor_divide(supervisions['num_frames'], subsampling_factor)),
        1).to(torch.int32)
    supervision_segments = torch.clamp(supervision_segments, min=0)
    indices = torch.argsort(supervision_segments[:, 2], descending=True)
    supervision_segments = supervision_segments[indices]
    texts = supervisions['text']
    texts = [texts[idx] for idx in indices]
    return supervision_segments, texts
    def step(self, step, lprobs, scores):
        super()._init_buffers(lprobs)
        bsz, beam_size, vocab_size = lprobs.size()

        if step == 0:
            # at the first step all hypotheses are equally likely, so use
            # only the first beam
            lprobs = lprobs[:, ::beam_size, :].contiguous()
        else:
            # make probs contain cumulative scores for each hypothesis
            lprobs.add_(scores[:, :, step - 1].unsqueeze(-1))

        torch.topk(
            lprobs.view(bsz, -1),
            k=min(
                # Take the best 2 x beam_size predictions. We'll choose the first
                # beam_size of these which don't predict eos to continue with.
                beam_size * 2,
                lprobs.view(bsz, -1).size(1) - 1,  # -1 so we never select pad
            ),
            out=(self.scores_buf, self.indices_buf),
        )
        torch.floor_divide(self.indices_buf, vocab_size, out=self.beams_buf)
        self.indices_buf.fmod_(vocab_size)
        return self.scores_buf, self.indices_buf, self.beams_buf
Beispiel #5
0
def create_mesh(
    decoder, filename, N=256, max_batch=64 ** 3, offset=None, scale=None
):
    start = time.time()
    ply_filename = filename

    decoder.eval()

    # NOTE: the voxel_origin is actually the (bottom, left, down) corner, not the middle
    voxel_origin = [-1, -1, -1]
    voxel_size = 2.0 / (N - 1)

    overall_index = torch.arange(0, N ** 3, 1, out=torch.LongTensor())
    samples = torch.zeros(N ** 3, 4)

    # transform first 3 columns
    # to be the x, y, z index
    samples[:, 2] = overall_index % N
    samples[:, 1] = torch.floor_divide(overall_index.long(), N) % N
    samples[:, 0] = torch.floor_divide(torch.floor_divide(overall_index.long(), N), N) % N

    # transform first 3 columns
    # to be the x, y, z coordinate
    samples[:, 0] = (samples[:, 0] * voxel_size) + voxel_origin[2]
    samples[:, 1] = (samples[:, 1] * voxel_size) + voxel_origin[1]
    samples[:, 2] = (samples[:, 2] * voxel_size) + voxel_origin[0]

    num_samples = N ** 3

    samples.requires_grad = False

    head = 0

    while head < num_samples:
        print(head)
        sample_subset = samples[head : min(head + max_batch, num_samples), 0:3].cuda()

        samples[head : min(head + max_batch, num_samples), 3] = (
            decoder(sample_subset)
            .squeeze()#.squeeze(1)
            .detach()
            .cpu()
        )
        head += max_batch

    sdf_values = samples[:, 3]
    sdf_values = sdf_values.reshape(N, N, N)

    end = time.time()
    print("sampling takes: %f" % (end - start))

    convert_sdf_samples_to_ply(
        sdf_values.data.cpu(),
        voxel_origin,
        voxel_size,
        ply_filename + ".ply",
        offset,
        scale,
    )
Beispiel #6
0
    def forward(self, x, nframes=None):
        """
        If nframes is provided, remove padded parts from quant_losses,
        flat_inputs and flat_onehots. This is useful for training, when EMA 
        only requires pre-quantized inputs and assigned indices. Note that
        jitter() is only applied after VQ-{2,3}.

        Args:
            x (torch.Tensor): Spectral feature batch of shape (B, C, F, T) or 
                (B, F, T).
            nframes (torch.Tensor): Number of frames for each utterance. Shape
                is (B,)
        """
        quant_losses = [None] * 5  # quantization losses by layer
        flat_inputs = [None] * 5   # flattened pre-quantized inputs by layer
        flat_onehots = [None] * 5  # flattened one-hot codes by layer
        
        if x.dim() == 3:
            x = x.unsqueeze(1)
        L = x.size(-1)
        cur_nframes = None

        x = self.relu(self.bn1(self.conv1(x)))
        if nframes is not None:
            cur_nframes = torch.floor_divide(nframes, round(L / x.size(-1)))
        (quant_losses[0], x, flat_inputs[0],
         flat_onehots[0]) = self.maybe_quantize(x, 0, cur_nframes)
        x = self.maybe_jitter(x)

        x = self.layer1(x)
        if nframes is not None:
            cur_nframes = torch.floor_divide(nframes, round(L / x.size(-1)))
        (quant_losses[1], x, flat_inputs[1],
         flat_onehots[1]) = self.maybe_quantize(x, 1, cur_nframes)
        x = self.maybe_jitter(x)
        
        x = self.layer2(x)
        if nframes is not None:
            cur_nframes = torch.floor_divide(nframes, round(L / x.size(-1)))
        (quant_losses[2], x, flat_inputs[2],
         flat_onehots[2]) = self.maybe_quantize(x, 2, cur_nframes)

        x = self.layer3(x)
        if nframes is not None:
            cur_nframes = torch.floor_divide(nframes, round(L / x.size(-1)))
        (quant_losses[3], x, flat_inputs[3],
         flat_onehots[3]) = self.maybe_quantize(x, 3, cur_nframes)
        
        x = self.layer4(x)
        if nframes is not None:
            cur_nframes = torch.floor_divide(nframes, round(L / x.size(-1)))
        (quant_losses[4], x, flat_inputs[4],
         flat_onehots[4]) = self.maybe_quantize(x, 4, cur_nframes)
        
        return x, quant_losses, flat_inputs, flat_onehots
Beispiel #7
0
 def get_conv_output_lengths(self, input_lengths, axis=1):
     seq_len = input_lengths
     for m in self.conv.modules():
         if type(m) == nn.Conv2d:
             seq_len = torch.floor_divide(
                 (seq_len + 2 * m.padding[axis] - m.dilation[axis] *
                  (m.kernel_size[axis] - 1) - 1), m.stride[axis]) + 1
         elif type(m) == nn.MaxPool2d:
             seq_len = torch.floor_divide(
                 (seq_len + 2 * m.padding - m.dilation *
                  (m.kernel_size - 1) - 1), m.stride) + 1
     return seq_len
Beispiel #8
0
def decode(dataloader: torch.utils.data.DataLoader, model: AcousticModel,
           device: Union[str, torch.device], LG: Fsa, symbols: SymbolTable):
    results = []  # a list of pair (ref_words, hyp_words)
    for batch_idx, batch in enumerate(dataloader):
        feature = batch['features']
        supervisions = batch['supervisions']
        supervision_segments = torch.stack(
            (supervisions['sequence_idx'],
             torch.floor_divide(supervisions['start_frame'],
                                model.subsampling_factor),
             torch.floor_divide(supervisions['num_frames'],
                                model.subsampling_factor)), 1).to(torch.int32)
        texts = supervisions['text']
        assert feature.ndim == 3

        feature = feature.to(device)
        # at entry, feature is [N, T, C]
        feature = feature.permute(0, 2, 1)  # now feature is [N, C, T]
        with torch.no_grad():
            nnet_output = model(feature)
        # nnet_output is [N, C, T]
        nnet_output = nnet_output.permute(0, 2,
                                          1)  # now nnet_output is [N, T, C]

        dense_fsa_vec = k2.DenseFsaVec(nnet_output, supervision_segments)
        assert LG.is_cuda()
        assert LG.device == nnet_output.device, \
            f"Check failed: LG.device ({LG.device}) == nnet_output.device ({nnet_output.device})"
        # TODO(haowen): with a small `beam`, we may get empty `target_graph`,
        # thus `tot_scores` will be `inf`. Definitely we need to handle this later.
        lattices = k2.intersect_dense_pruned(LG, dense_fsa_vec, 2000.0, 20.0,
                                             30, 300)
        best_paths = k2.shortest_path(lattices, use_float_scores=True)
        best_paths = best_paths.to('cpu')
        assert best_paths.shape[0] == len(texts)

        for i in range(len(texts)):
            hyp_words = [
                symbols.get(x) for x in best_paths[i].aux_labels if x > 0
            ]
            results.append((texts[i].split(' '), hyp_words))

        if batch_idx % 10 == 0:
            logging.info('Processed batch {}/{} ({:.6f}%)'.format(
                batch_idx, len(dataloader),
                float(batch_idx) / len(dataloader) * 100))

    return results
Beispiel #9
0
def create_coord_from_det(c, f):
    coord = zeros((f.shape[0], 3), dtype=int32)
    seg = zeros(c.shape, dtype=c.dtype)
    floor_divide(c, 2, out=seg)
    coord[:, 0] = seg % 14
    coord[:, 1] = floor_divide(seg, 14)
    features = zeros((f.shape[0], f.shape[1] * 2), dtype=f.dtype)
    n_samp = f.shape[1]
    for i in range(coord.shape[0]):
        coord[i, 2] = i
        if c[i] % 2 == 0:
            features[i, 0:n_samp] = f[i]
        else:
            features[i, n_samp:] = f[i]

    return coord, features
Beispiel #10
0
 def forward(self, x):
     C = torch.floor_divide(x.shape[1], 4)
     filters = torch.cat([
         self.weight,
     ] * C, dim=0)
     y = F.conv_transpose2d(x, filters, groups=C, stride=2)
     return y
Beispiel #11
0
    def forward(self,
                input_ids,
                attention_mask,
                token_type_ids,
                labels,
                eval_type="train"):

        batch_size = input_ids.size(0)
        num_slots = self.num_slots

        # encoder, a pretrained model, output is a tuple
        sequence_output = self.encoder(input_ids, attention_mask,
                                       token_type_ids)[0]

        # decoder
        loss, loss_slot, pred_slot = self.decoder(sequence_output,
                                                  attention_mask, labels,
                                                  self.slot_lookup,
                                                  self.value_lookup, eval_type)

        # calculate accuracy
        accuracy = pred_slot == labels
        acc_slot = torch.true_divide(
            torch.sum(accuracy, 0).float(),
            batch_size).cpu().detach().numpy()  # slot accuracy
        acc = torch.sum(torch.floor_divide(
            torch.sum(accuracy, 1),
            num_slots)).float().item() / batch_size  # joint accuracy

        return loss, loss_slot, acc, acc_slot, pred_slot
Beispiel #12
0
    def advance(self, word_prob):
        "Update beam status and check if finished or not."
        num_words = word_prob.size(1)

        # Sum the previous scores.
        if len(self.prev_ks) > 0:
            beam_lk = word_prob + self.scores.unsqueeze(1).expand_as(word_prob)
        else:
            beam_lk = word_prob[0]

        flat_beam_lk = beam_lk.view(-1)

        best_scores, best_scores_id = flat_beam_lk.topk(
            self.size, 0, True, True)  # 1st sort
        best_scores, best_scores_id = flat_beam_lk.topk(
            self.size, 0, True, True)  # 2nd sort

        self.all_scores.append(self.scores)
        self.scores = best_scores

        # bestScoresId is flattened as a (beam x word) array,
        # so we need to calculate which word and beam each score came from
        prev_k = torch.floor_divide(best_scores_id, num_words)
        self.prev_ks.append(prev_k)
        self.next_ys.append(best_scores_id - prev_k * num_words)

        # End condition is when top-of-beam is EOS.
        if self.next_ys[-1][0].item() == config.EOS_idx:
            self._done = True
            self.all_scores.append(self.scores)

        return self._done
Beispiel #13
0
    def get_kp_torch_batch(self, pred, conf, topk=100):
        b, c, h, w = pred.shape
        pred = pred.contiguous().view(-1)
        pred[pred < conf] = 0
        score, topk_idx = torch.topk(pred, k=topk)

        batch = torch.floor_divide(topk_idx, (h * w * c))

        cls = torch.floor_divide((topk_idx - batch * h * w * c), (h * w))

        channel = (topk_idx - batch * h * w * c) - (cls * h * w)

        x = channel % w
        y = torch.floor_divide(channel, w)

        return x.view(-1), y.view(-1), cls.view(-1), batch.view(-1)
Beispiel #14
0
def train(model, iterator, optimizer, criterion):

    epoch_loss = 0
    epoch_acc = 0

    model.train()

    for batch in tqdm(iterator):
        bayes_loss = []
        optimizer.zero_grad()

        # 循环50次,最大似然损失
        for i in range(N_CIRCLE):
            #
            predictions = model(batch.text, batch.gate)
            bayes_loss.append(
                torch.gather(predictions, 1,
                             batch.label.long().unsqueeze(-1)))

        possible = torch.cat(tuple([x for x in bayes_loss]), 1)
        possible_max = torch.max(possible, dim=1).values

        loss = sum(1 - possible_max)
        acc_num = sum(torch.gt(possible_max, 0.5))
        acc = torch.floor_divide(acc_num, len(possible_max))

        loss.backward()

        optimizer.step()

        epoch_loss += loss.item()
        epoch_acc += acc.item()

    return epoch_loss / len(iterator), epoch_acc / len(iterator)
Beispiel #15
0
def hann2d_clipped(sz: torch.Tensor,
                   effective_sz: torch.Tensor,
                   centered=True) -> torch.Tensor:
    """1D clipped cosine window."""

    # Ensure that the difference is even
    effective_sz += (effective_sz - sz) % 2
    effective_window = hann1d(effective_sz[0].item(), True).reshape(
        1, 1, -1, 1) * hann1d(effective_sz[1].item(), True).reshape(
            1, 1, 1, -1)

    pad = torch.floor_divide(sz - effective_sz, 2)

    window = F.pad(
        effective_window,
        (pad[1].item(), pad[1].item(), pad[0].item(), pad[0].item()),
        'replicate')

    if centered:
        return window
    else:
        mid = (sz / 2).int()
        window_shift_lr = torch.cat(
            (window[:, :, :, mid[1]:], window[:, :, :, :mid[1]]), 3)
        return torch.cat((window_shift_lr[:, :, mid[0]:, :],
                          window_shift_lr[:, :, :mid[0], :]), 2)
Beispiel #16
0
    def select_actions(self,
                       policy,
                       sum_log_probs,
                       mask,
                       infer_type='stochastic'):

        beam_size, seq_size = policy.size()
        nzn = torch.nonzero(mask, as_tuple=False).shape[0]
        sample_size = min(nzn, self.beam_size)

        ourlogzero = sys.float_info.min
        lpolicy = policy.masked_fill(mask == 0, ourlogzero).log()
        npolicy = sum_log_probs.unsqueeze(1) + lpolicy
        if infer_type == 'stochastic':
            nnpolicy = npolicy.exp().masked_fill(mask == 0, 0).view(1, -1)

            m = Categorical(nnpolicy)
            gact_ind = torch.multinomial(nnpolicy, sample_size)
            log_select = m.log_prob(gact_ind)

        elif infer_type == 'greedy':
            nnpolicy = npolicy.exp().masked_fill(mask == 0, 0).view(1, -1)

            _, gact_ind = nnpolicy.topk(sample_size, dim=1)
            prob = policy.view(-1)[gact_ind]
            log_select = prob.log()

        beam_id = torch.floor_divide(gact_ind, seq_size).squeeze(0)
        act_ind = torch.fmod(gact_ind, seq_size)

        return act_ind, log_select, beam_id
Beispiel #17
0
    def step(self, step: int, lprobs, scores: Optional[Tensor]):
        bsz, beam_size, vocab_size = lprobs.size()

        if step == 0:
            # at the first step all hypotheses are equally likely, so use
            # only the first beam
            lprobs = lprobs[:, ::beam_size, :].contiguous()
        else:
            # make probs contain cumulative scores for each hypothesis
            assert scores is not None
            lprobs = lprobs + scores[:, :, step - 1].unsqueeze(-1)

        top_prediction = torch.topk(
            lprobs.view(bsz, -1),
            k=min(
                # Take the best 2 x beam_size predictions. We'll choose the first
                # beam_size of these which don't predict eos to continue with.
                beam_size * 2,
                lprobs.view(bsz, -1).size(1) - 1,  # -1 so we never select pad
            ),
        )
        scores_buf = top_prediction[0]
        indices_buf = top_prediction[1]
        if torch.__version__ < '1.6.0':
            beams_buf = torch.div(indices_buf, vocab_size)
        else:
            beams_buf = torch.floor_divide(indices_buf, vocab_size)
        indices_buf = indices_buf.fmod(vocab_size)
        return scores_buf, indices_buf, beams_buf
Beispiel #18
0
    def advance(self, workd_lk):
        """Advance the beam."""
        num_words = workd_lk.size(1)

        # Sum the previous scores.
        if len(self.prevKs) > 0:
            beam_lk = workd_lk + self.scores.unsqueeze(1).expand_as(workd_lk)
        else:
            beam_lk = workd_lk[0]

        flat_beam_lk = beam_lk.view(-1)

        bestScores, bestScoresId = flat_beam_lk.topk(self.size, 0, True, True)
        self.scores = bestScores

        # bestScoresId is flattened beam x word array, so calculate which
        # word and beam each score came from
        prev_k = torch.floor_divide(bestScoresId,
                                    num_words)  # note: double check here
        self.prevKs.append(prev_k)
        self.nextYs.append(bestScoresId - prev_k * num_words)

        # End condition is when top-of-beam is EOS.
        if self.nextYs[-1][0] == self.eos:
            self.done = True

        return self.done
Beispiel #19
0
 def backward(ctx, grad_output):
     """
     In the backward pass we receive a Tensor containing the gradient of the loss
     with respect to the output, and we need to compute the gradient of the loss
     with respect to the input.
     """
     x, y = ctx.saved_variables
     return grad_output * 1, grad_output * torch.neg(torch.floor_divide(x, y))
Beispiel #20
0
def _topk(scores, K=40):
    batch, cat, height, width = scores.size()

    topk_scores, topk_inds = torch.topk(scores.view(batch, cat, -1), K)

    topk_inds = topk_inds % (height * width)
    topk_ys = (torch.floor_divide(topk_inds, width)).float()
    topk_xs = (topk_inds % width).int().float()

    topk_score, topk_ind = torch.topk(topk_scores.view(batch, -1), K)
    topk_clses = (torch.floor_divide(topk_ind, K)).int()
    topk_inds = _gather_feat(topk_inds.view(batch, -1, 1),
                             topk_ind).view(batch, K)
    topk_ys = _gather_feat(topk_ys.view(batch, -1, 1), topk_ind).view(batch, K)
    topk_xs = _gather_feat(topk_xs.view(batch, -1, 1), topk_ind).view(batch, K)

    return topk_score, topk_inds, topk_clses, topk_ys, topk_xs
Beispiel #21
0
def getmaxn(tensor, n):
    tlen = tensor.shape[-1]
    tensor = tensor.reshape(-1)
    idx = tensor.argsort(descending=True)[:n]
    value = tensor[idx]
    maxn = torch.cat(
        (torch.true_divide(idx, tlen).float().unsqueeze(0),
         (torch.floor_divide(idx,
                             tlen)).float().unsqueeze(0), value.unsqueeze(0)),
        dim=0)
Beispiel #22
0
    def get_seg_target(self, seg_scores, gt_bboxes, device):
        strides=[4, 8, 16, 32, 64]
        batch_size = len(gt_bboxes)
        feat_sizes = [each.size()[-2:] for each in seg_scores]
        seg_labels_list = []
        seg_weights_list = []
        for si, stride in enumerate(strides):
            seg_labels = torch.ones((batch_size, feat_sizes[si][0], feat_sizes[si][1]), dtype=torch.int64, device=device)
            seg_weights = torch.zeros((batch_size, feat_sizes[si][0], feat_sizes[si][1]), dtype=torch.float32, device=device)
            for pi, bbox_per in enumerate(gt_bboxes):
                bbox_per_down = (bbox_per / stride).type(torch.int32)
                for bi in range(bbox_per.size()[0]):
                    x_min, y_min, x_max, y_max = bbox_per_down[bi]
                    x_mid = torch.floor_divide((x_max + x_min), 2).type(torch.int32)
                    y_mid = torch.floor_divide((y_max + y_min), 2).type(torch.int32)
                    choice = random.randint(1, 8)
                    if choice == 1:
                        seg_weights[pi, y_mid:y_max, x_min:x_max] = 1
                    elif choice == 2:
                        seg_weights[pi, y_min:y_mid, x_min:x_max] = 1
                    elif choice == 3:
                        seg_weights[pi, y_min:y_max, x_mid:x_max] = 1
                    elif choice == 4:
                        seg_weights[pi, y_min:y_max, x_min:x_mid] = 1
                    elif choice == 5:
                        x_mid_l = torch.floor_divide((x_min + x_mid), 2).type(torch.int32)
                        x_mid_r = torch.floor_divide((x_mid + x_max), 2).type(torch.int32)
                        y_mid_t = torch.floor_divide((y_min + y_mid), 2).type(torch.int32)
                        y_mid_b = torch.floor_divide((y_mid + y_max), 2).type(torch.int32)
                        seg_weights[pi, y_min:y_max, x_min:x_max] = 1
                        seg_weights[pi, y_mid_t:y_mid_b,x_mid_l:x_mid_r] = 0
                    elif choice == 6:
                        x_mid_l = torch.floor_divide((x_min + x_mid), 2).type(torch.int32)
                        x_mid_r = torch.floor_divide((x_mid + x_max), 2).type(torch.int32)
                        y_mid_t = torch.floor_divide((y_min + y_mid), 2).type(torch.int32)
                        y_mid_b = torch.floor_divide((y_mid + y_max), 2).type(torch.int32)
                        seg_weights[pi, y_mid_t:y_mid_b,x_mid_l:x_mid_r] = 1
                    seg_labels[pi, y_min:y_max, x_min:x_max] = 0
            seg_labels_list.append(seg_labels.reshape(batch_size, -1))
            seg_weights_list.append(seg_weights.reshape(batch_size, -1))

        return seg_labels_list, seg_weights_list
Beispiel #23
0
def floor_divide(input_, other):
    """Wrapper of `torch.floor_divide`.

    Parameters
    ----------
    input_ : DTensor
        The first operand.
    other : DTensor
        The second operand.
    """
    return torch.floor_divide(input_._data, other._data)
def hm2box(heatmap, offset, wh, scale_factor=4, topk=10, conf_th=0.3, normalized=False):
    height, width = heatmap.shape[-2:]
    
    max_pool = torch.nn.MaxPool2d(3, stride=1, padding=3//2)        
    
    isPeak = max_pool(heatmap) == heatmap
    peakmap = heatmap * isPeak

    scores, indices = peakmap.flatten().topk(topk)
    
    clss  = torch.floor_divide(indices, (height*width))
    inds  = torch.fmod(indices, (height*width))
    yinds = torch.floor_divide(inds, width)
    xinds = torch.fmod(inds, width)
    
    xoffs = offset[0, yinds, xinds]
    xsizs = wh[0, yinds, xinds]
    
    yoffs = offset[1, yinds, xinds]
    ysizs = wh[1, yinds, xinds]
    
    if normalized:
        xoffs = xoffs * scale_factor
        yoffs = yoffs * scale_factor        
        xsizs = xsizs * width
        ysizs = ysizs * height
    
    xmin = (xinds + xoffs - xsizs/2) * scale_factor
    ymin = (yinds + yoffs - ysizs/2) * scale_factor
    xmax = (xinds + xoffs + xsizs/2) * scale_factor
    ymax = (yinds + yoffs + ysizs/2) * scale_factor
        
    boxes = torch.stack([xmin, ymin, xmax, ymax], dim=1) # Tensor: topk x 4

    # confidence thresholding
    over_threshold = scores >= conf_th

    return boxes[over_threshold], clss[over_threshold], scores[over_threshold]
Beispiel #25
0
 def get_seq_lens(self, input_length):
     """
     Given a 1D Tensor or Variable containing integer sequence lengths, return a 1D tensor or variable
     containing the size sequences that will be output by the network.
     :param input_length: 1D Tensor
     :return: 1D Tensor scaled by model
     """
     seq_len = input_length
     for m in self.conv.modules():
         if type(m) == nn.modules.conv.Conv2d:
             seq_len = torch.floor_divide(
                 (seq_len + 2 * m.padding[1] - m.dilation[1] *
                  (m.kernel_size[1] - 1) - 1), m.stride[1]) + 1
     return seq_len.int()
Beispiel #26
0
def ind2sub(ind, shape, out=None):
    """Convert linear indices into sub indices (i, j, k).

    The rightmost dimension is the most rapidly changing one
    -> if shape == [D, H, W], the strides are therefore [H*W, W, 1]

    Parameters
    ----------
    ind : tensor_like
        Linear indices
    shape : (D,) vector_like
        Size of each dimension.
    out : tensor, optional
        Output placeholder

    Returns
    -------
    subs : (D, ...) tensor
        Sub-indices.
    """
    ind = torch.as_tensor(ind)
    bck = backend(ind)
    stride = py.cumprod(shape, reverse=True, exclusive=True)
    stride = torch.as_tensor(stride, **bck)
    if out is None:
        sub = ind.new_empty([len(shape), *ind.shape])
    else:
        sub = out.reshape([len(shape), *ind.shape])
    sub[:, ...] = ind
    for d in range(len(shape)):
        if d > 0:
            torch.remainder(sub[d],
                            torch.as_tensor(stride[d - 1], **bck),
                            out=sub[d])
        torch.floor_divide(sub[d], stride[d], out=sub[d])
    return sub
Beispiel #27
0
    def score(self, test_set, classifier=None, wrapper=None):
        w = self.train()
        test_features = self.preprocess(test_set)
        test_labels = test_set["clase"].values
        if wrapper:
            if classifier == 'glvq':
                dist = cdist(test_features, w, 'sqeuclidean')
            elif classifier == 'kglvq':
                dist = kernel_distance(
                    torch.from_numpy(
                        rbf_kernel(test_features, gamma=self.sigma)),
                    torch.from_numpy(
                        rbf_kernel(test_features,
                                   self.train_features,
                                   gamma=self.sigma)),
                    torch.from_numpy(
                        rbf_kernel(self.train_features, gamma=self.sigma)),
                    torch.from_numpy(test_features),
                    torch.from_numpy(w)).numpy()
            else:
                raise ValueError("Invalid classifier")
            test_acc = np.sum(
                test_labels == np.floor_divide(dist.argmin(1), self.ppc))
            return test_acc / len(test_labels)
        test_features = torch.from_numpy(test_features)
        test_labels = torch.from_numpy(test_labels)

        if torch.cuda.is_available():
            device = torch.device("cuda:0")
        else:
            device = torch.device("cpu")
        w.to(device)
        testloader = torch.utils.data.DataLoader(TensorDataset(
            test_features, test_labels),
                                                 batch_size=16,
                                                 num_workers=0)
        test_acc = torch.tensor(0)
        with torch.no_grad():
            w.eval()
            for inputs, targets in testloader:
                inputs, targets = inputs.to(device), targets.to(device)
                distances, plabels = w(inputs)
                _, prediction = torch.min(distances, 1)
                prediction = torch.floor_divide(prediction, self.ppc)
                test_acc = test_acc + torch.sum(prediction == targets)
        return test_acc.item() / len(test_labels)
Beispiel #28
0
    def advance(self, wordLk):
        """
        Given prob over words for every last beam `wordLk` and attention
        `attnOut`: Compute and update the beam search.

        Parameters:

        * `wordLk`- probs of advancing from the last step (K x words)
        * `attnOut`- attention at the last step

        Returns: True if beam search is complete.
        """
        numWords = wordLk.size(1)

        # Sum the previous scores.
        if len(self.prevKs) > 0:
            beamLk = wordLk + self.scores.unsqueeze(1).expand_as(wordLk)

            # Don't let EOS have children.
            for i in range(self.nextYs[-1].size(0)):
                if self.nextYs[-1][i] == self._eos:
                    beamLk[i] = -1e20
        else:
            beamLk = wordLk[0]
        flatBeamLk = beamLk.view(-1)
        bestScores, bestScoresId = flatBeamLk.topk(self.size, 0, True, True)

        self.scores = bestScores

        # bestScoresId is flattened beam x word array, so calculate which
        # word and beam each score came from
        #prevK = bestScoresId / numWords
        #prevK = torch.true_divide(bestScoresId, numWords)
        prevK = torch.floor_divide(bestScoresId, numWords)
        self.prevKs.append(prevK)
        self.nextYs.append((bestScoresId - prevK * numWords))

        for i in range(self.nextYs[-1].size(0)):
            if self.nextYs[-1][i] == self._eos:
                s = self.scores[i]
                self.finished.append((s, len(self.nextYs) - 1, i))

        # End condition is when top-of-beam is EOS and no global score.
        if self.nextYs[-1][0] == self._eos:
            self.eosTop = True
Beispiel #29
0
def floor_divide(a: NdarrayOrTensor, b) -> NdarrayOrTensor:
    """`np.floor_divide` with equivalent implementation for torch.

    As of pt1.8, use `torch.div(..., rounding_mode="floor")`, and
    before that, use `torch.floor_divide`.

    Args:
        a: first array/tensor
        b: scalar to divide by

    Returns:
        Element-wise floor division between two arrays/tensors.
    """
    if isinstance(a, torch.Tensor):
        if is_module_ver_at_least(torch, (1, 8, 0)):
            return torch.div(a, b, rounding_mode="floor")
        return torch.floor_divide(a, b)
    return np.floor_divide(a, b)
    def advance(self, decoder_output):
        vocab_size = decoder_output.size(1)
        beam_scores = decoder_output + self.cur_scores.unsqueeze(1).expand_as(
            decoder_output)
        flat_beam_scores = beam_scores.view(-1)

        # cur_rows_with_eos = self.rows_with_eos.unsqueeze(1).expand_as(decoder_output).view(-1)
        # flat_beam_scores = torch.where(cur_rows_with_eos == 1, torch.zeros_like(flat_beam_scores), flat_beam_scores)

        best_scores, best_score_ids = flat_beam_scores.data.topk(
            self.beam_size)
        self.cur_scores = best_scores

        previous_idxs = torch.floor_divide(best_score_ids, vocab_size)
        self.previous_idx_history.append(previous_idxs)
        self.states.append(best_score_ids - previous_idxs * vocab_size)

        if self.states[-1][0] == self.special_tokens['<EOS>']:
            return True
        return False