Esempio n. 1
0
 def unet_like_1d(x, usual_convolution):
     # u-net like steps for increasing / reducing dimensionality
     x = rearrange(x, 'b c t1 t2 -> b c (t1 t2)')  # reduce dimensionality
     y = rearrange(x, 'b c (t dt) -> b (dt c) t', dt=2)
     y = usual_convolution(y)
     x = x + rearrange(y, 'b (dt c) t -> b c (t dt)', dt=2)
     return x
Esempio n. 2
0
    def forward(self, img: FloatTensor,
                img_mask: LongTensor) -> Tuple[FloatTensor, LongTensor]:
        """encode image to feature

        Parameters
        ----------
        img : FloatTensor
            [b, 1, h', w']
        img_mask: LongTensor
            [b, h', w']

        Returns
        -------
        Tuple[FloatTensor, LongTensor]
            [b, t, d], [b, t]
        """
        # extract feature
        feature, mask = self.model(img, img_mask)
        feature = self.feature_proj(feature)

        # proj
        feature = rearrange(feature, "b d h w -> b h w d")
        feature = self.norm(feature)

        # positional encoding
        feature = self.pos_enc_2d(feature, mask)

        # flat to 1-D
        feature = rearrange(feature, "b h w d -> b (h w) d")
        mask = rearrange(mask, "b h w -> b (h w)")
        return feature, mask
Esempio n. 3
0
    def new_way(input, num_classes, num_anchors, anchors, stride_h, stride_w):
        raw_predictions = rearrange(
            input,
            ' b (anchor prediction) h w -> prediction b anchor h w',
            anchor=num_anchors)

        anchors = torch.FloatTensor(anchors).to(input.device)
        anchor_sizes = rearrange(anchors, 'anchor dim -> dim () anchor () ()')

        _, _, _, in_h, in_w = raw_predictions.shape
        grid_h = rearrange(torch.arange(in_h).float(),
                           'h -> () () h ()').to(input.device)
        grid_w = rearrange(torch.arange(in_w).float(),
                           'w -> () () () w').to(input.device)

        predicted_bboxes = torch.zeros_like(raw_predictions)
        predicted_bboxes[0] = (raw_predictions[0].sigmoid() +
                               grid_h) * stride_h  # center y
        predicted_bboxes[1] = (raw_predictions[1].sigmoid() +
                               grid_w) * stride_w  # center x
        predicted_bboxes[2:4] = (
            raw_predictions[2:4].exp()) * anchor_sizes  # bbox width and height
        predicted_bboxes[4] = raw_predictions[4].sigmoid()  # confidence
        predicted_bboxes[5:] = raw_predictions[5:].sigmoid(
        )  # class predictions
        # only to match results of original code, not needed
        return rearrange(predicted_bboxes,
                         'prediction b anchor h w -> b anchor h w prediction')
Esempio n. 4
0
def test_rearrange_permutations_numpy():
    # tests random permutation of axes against two independent numpy ways
    for n_axes in range(1, 10):
        input = numpy.arange(2**n_axes).reshape([2] * n_axes)
        permutation = numpy.random.permutation(n_axes)
        left_expression = ' '.join('i' + str(axis) for axis in range(n_axes))
        right_expression = ' '.join('i' + str(axis) for axis in permutation)
        expression = left_expression + ' -> ' + right_expression
        result = rearrange(input, expression)

        for pick in numpy.random.randint(0, 2, [10, n_axes]):
            assert input[tuple(pick)] == result[tuple(pick[permutation])]

    for n_axes in range(1, 10):
        input = numpy.arange(2**n_axes).reshape([2] * n_axes)
        permutation = numpy.random.permutation(n_axes)
        left_expression = ' '.join('i' + str(axis)
                                   for axis in range(n_axes)[::-1])
        right_expression = ' '.join('i' + str(axis)
                                    for axis in permutation[::-1])
        expression = left_expression + ' -> ' + right_expression
        result = rearrange(input, expression)
        assert result.shape == input.shape
        expected_result = numpy.zeros_like(input)
        for original_axis, result_axis in enumerate(permutation):
            expected_result |= ((input >> original_axis) & 1) << result_axis

        assert numpy.array_equal(result, expected_result)
    def forward(self, feat_f0, feat_f1, feat_c0, feat_c1, data):
        W = self.W
        stride = data['hw0_f'][0] // data['hw0_c'][0]

        data.update({'W': W})
        if data['b_ids'].shape[0] == 0:
            feat0 = torch.empty(0,
                                self.W**2,
                                self.d_model_f,
                                device=feat_f0.device)
            feat1 = torch.empty(0,
                                self.W**2,
                                self.d_model_f,
                                device=feat_f0.device)
            return feat0, feat1

        # 1. unfold(crop) all local windows
        feat_f0_unfold = F.unfold(feat_f0,
                                  kernel_size=(W, W),
                                  stride=stride,
                                  padding=W // 2)
        feat_f0_unfold = rearrange(feat_f0_unfold,
                                   'n (c ww) l -> n l ww c',
                                   ww=W**2)
        feat_f1_unfold = F.unfold(feat_f1,
                                  kernel_size=(W, W),
                                  stride=stride,
                                  padding=W // 2)
        feat_f1_unfold = rearrange(feat_f1_unfold,
                                   'n (c ww) l -> n l ww c',
                                   ww=W**2)

        # 2. select only the predicted matches
        feat_f0_unfold = feat_f0_unfold[data['b_ids'],
                                        data['i_ids']]  # [n, ww, cf]
        feat_f1_unfold = feat_f1_unfold[data['b_ids'], data['j_ids']]

        # option: use coarse-level loftr feature as context: concat and linear
        if self.cat_c_feat:
            feat_c_win = self.down_proj(
                torch.cat([
                    feat_c0[data['b_ids'], data['i_ids']],
                    feat_c1[data['b_ids'], data['j_ids']]
                ], 0))  # [2n, c]
            feat_cf_win = self.merge_feat(
                torch.cat(
                    [
                        torch.cat([feat_f0_unfold, feat_f1_unfold],
                                  0),  # [2n, ww, cf]
                        repeat(feat_c_win, 'n c -> n ww c', ww=W**
                               2),  # [2n, ww, cf]
                    ],
                    -1))
            feat_f0_unfold, feat_f1_unfold = torch.chunk(feat_cf_win, 2, dim=0)

        return feat_f0_unfold, feat_f1_unfold
Esempio n. 6
0
    def test6(x):
        # parsing parameters
        t = rearrange(x, 'b c h w -> (b h w) c')
        t = t[:, ::
              2]  # replacement for dot-product, just changes size of second axis
        assert t.shape == (10 * 30 * 40, 10)

        y = rearrange(t, '(b h w) c2 -> b c2 h w', **parse_shape(x, 'b _ h w'))
        assert y.shape == (10, 10, 30, 40)
        return y
Esempio n. 7
0
 def convolve_strided_2d(x, h_stride, w_stride, usual_convolution):
     x = rearrange(x,
                   'b c (h hs) (w ws) -> (hs ws b) c h w',
                   hs=h_stride,
                   ws=w_stride)
     x = usual_convolution(x)
     x = rearrange(x,
                   '(hs ws b) c h w -> b c (h hs) (w ws)',
                   hs=h_stride,
                   ws=w_stride)
     return x
Esempio n. 8
0
 def test9(x):
     # squeeze - unsqueeze
     y = reduce(x, 'b c h w -> b c () ()', reduction='max')
     assert y.shape == (10, 20, 1, 1)
     y = rearrange(y, 'b c () () -> c b')
     assert y.shape == (20, 10)
     return y
Esempio n. 9
0
    def forward(self, x):
        b, c, h, w = x.shape
        cls_tokens = repeat(self.CLS, "1 1 d -> b 1 d", b=b)

        # Divide into flattened patches
        x = self.patch_and_flat(x)

        # Linear projection
        x = self.linear_proj(x)

        # Token dropout
        x = self.token_dropout(x)

        # Concatenate CLS if not using multihead attention pooling
        if not self.use_multihead_attention_pooling:
            x = torch.cat([cls_tokens, x], dim=1) + self.position_code

        # Transformer
        x = self.transformer(x)

        # Use multihead attention pooling if specified
        if self.use_multihead_attention_pooling:
            cls_tokens = self.map(cls_tokens, x)
            cls_tokens = rearrange(cls_tokens, "b 1 d -> b d")
        else:
            cls_tokens = x.select(dim=1, index=0)
            cls_tokens = self.proj(cls_tokens)

        return cls_tokens
Esempio n. 10
0
def test_parse_shape_symbolic():
    backends = collect_test_backends(symbolic=True, layers=False)
    backends += collect_test_backends(symbolic=True, layers=True)
    for backend in backends:
        if backend.framework_name == 'keras':
            # need special way to compile, shape vars can be used only inside layers
            continue
        print('special shape parsing for', backend.framework_name)
        input_symbols = [
            backend.create_symbol([10, 20, 30, 40]),
            backend.create_symbol([10, 20, None, None]),
            backend.create_symbol([None, None, None, None]),
        ]
        if backend.framework_name in ['mxnet.symbol']:
            # mxnet can't normally run inference
            input_symbols = [backend.create_symbol([10, 20, 30, 40])]

        for input_symbol in input_symbols:
            shape_placeholder = parse_shape(input_symbol, 'a b c d')
            shape = {}
            for name, symbol in shape_placeholder.items():
                shape[name] = symbol if isinstance(symbol, int) \
                    else backend.eval_symbol(symbol, [(input_symbol, numpy.zeros([10, 20, 30, 40]))])
            print(shape)
            result_placeholder = rearrange(
                input_symbol,
                'a b (c1 c2) (d1 d2) -> (a b d1) c1 (c2 d2)',
                **parse_shape(input_symbol, 'a b c1 _'),
                d2=2)
            result = backend.eval_symbol(
                result_placeholder,
                [(input_symbol, numpy.zeros([10, 20, 30, 40]))])
            print(result.shape)
            assert result.shape == (10 * 20 * 20, 30, 1 * 2)
            assert numpy.allclose(result, 0)
Esempio n. 11
0
    def forward(self, k, q, nbhd_idx):
        # (bs, m, c_in) -> (bs, m, embed_dim) -> (bs * n_heads, m, h_dim)
        K = rearrange(self.fc_k(k), "b n (h d) -> b n h d", h=self.n_heads)
        # (bs, n, c_in) -> (bs, n, embed_dim) -> (bs * n_heads, n, h_dim)
        Q = rearrange(self.fc_q(q), "b n (h d) -> b n h d", h=self.n_heads)
        # Key features are just the same for each point
        K = K.unsqueeze(2).repeat(1, 1, nbhd_idx.shape[2], 1, 1)
        # Batch indices
        B = (torch.arange(
            Q.shape[0], device=Q.device).long()[:, None,
                                                None].expand(*nbhd_idx.shape))
        # Extract the points for each nbhd
        Q = Q[B, nbhd_idx]

        # Concat and return
        return self.fc_o(torch.cat([K, Q], dim=-1))
Esempio n. 12
0
    def forward(self, data):
        """ 
        Update:
            data (dict): {
                'image0': (torch.Tensor): (N, 1, H, W)
                'image1': (torch.Tensor): (N, 1, H, W)
                'mask0'(optional) : (torch.Tensor): (N, H, W) '0' indicates a padded position
                'mask1'(optional) : (torch.Tensor): (N, H, W)
            }
        """
        # 1. Local Feature CNN
        data.update({
            'bs': data['image0'].size(0),
            'hw0_i': data['image0'].shape[2:], 'hw1_i': data['image1'].shape[2:]
        })

        if data['hw0_i'] == data['hw1_i']:  # faster & better BN convergence
            feats_c, feats_f = self.backbone(torch.cat([data['image0'], data['image1']], dim=0))
            (feat_c0, feat_c1), (feat_f0, feat_f1) = feats_c.split(data['bs']), feats_f.split(data['bs'])
        else:  # handle different input shapes
            (feat_c0, feat_f0), (feat_c1, feat_f1) = self.backbone(data['image0']), self.backbone(data['image1'])

        data.update({
            'hw0_c': feat_c0.shape[2:], 'hw1_c': feat_c1.shape[2:],
            'hw0_f': feat_f0.shape[2:], 'hw1_f': feat_f1.shape[2:]
        })

        # 2. coarse-level loftr module
        # add featmap with positional encoding, then flatten it to sequence [N, HW, C]
        feat_c0 = rearrange(self.pos_encoding(feat_c0), 'n c h w -> n (h w) c')
        feat_c1 = rearrange(self.pos_encoding(feat_c1), 'n c h w -> n (h w) c')

        mask_c0 = mask_c1 = None  # mask is useful in training
        if 'mask0' in data:
            mask_c0, mask_c1 = data['mask0'].flatten(-2), data['mask1'].flatten(-2)
        feat_c0, feat_c1 = self.loftr_coarse(feat_c0, feat_c1, mask_c0, mask_c1)

        # 3. match coarse-level
        self.coarse_matching(feat_c0, feat_c1, data, mask_c0=mask_c0, mask_c1=mask_c1)

        # 4. fine-level refinement
        feat_f0_unfold, feat_f1_unfold = self.fine_preprocess(feat_f0, feat_f1, feat_c0, feat_c1, data)
        if feat_f0_unfold.size(0) != 0:  # at least one coarse level predicted
            feat_f0_unfold, feat_f1_unfold = self.loftr_fine(feat_f0_unfold, feat_f1_unfold)

        # 5. match fine-level
        self.fine_matching(feat_f0_unfold, feat_f1_unfold, data)
Esempio n. 13
0
def test_collapsed_ellipsis_errors_out():
    x = numpy.zeros([1, 1, 1, 1, 1])
    rearrange(x, 'a b c d ... ->  a b c ... d')
    with assert_raises(EinopsError):
        rearrange(x, 'a b c d (...) ->  a b c ... d')

    rearrange(x, '... ->  (...)')
    with assert_raises(EinopsError):
        rearrange(x, '(...) -> (...)')
Esempio n. 14
0
 def test10(x):
     # stack
     tensors = list(
         x + 0
     )  # 0 is needed https://github.com/tensorflow/tensorflow/issues/23185
     tensors = rearrange(tensors, 'b c h w -> b h w c')
     assert tensors.shape == (10, 30, 40, 20)
     return tensors
Esempio n. 15
0
 def test11(x):
     # concatenate
     tensors = list(
         x + 0
     )  # 0 is needed https://github.com/tensorflow/tensorflow/issues/23185
     tensors = rearrange(tensors, 'b c h w -> h (b w) c')
     assert tensors.shape == (30, 10 * 40, 20)
     return tensors
Esempio n. 16
0
def test_ellipsis_ops_numpy():
    x = numpy.arange(2 * 3 * 4 * 5 * 6).reshape([2, 3, 4, 5, 6])
    for pattern in identity_patterns:
        assert numpy.array_equal(x, rearrange(x, pattern)), pattern

    for pattern1, pattern2 in equivalent_rearrange_patterns:
        assert numpy.array_equal(rearrange(x, pattern1),
                                 rearrange(x, pattern2))

    for reduction in ['min', 'max', 'sum']:
        for pattern1, pattern2 in equivalent_reduction_patterns:
            assert numpy.array_equal(reduce(x, pattern1, reduction=reduction),
                                     reduce(x, pattern2, reduction=reduction))

    # now just check coincidence with numpy
    all_rearrange_patterns = [*identity_patterns]
    for pattern_pairs in equivalent_rearrange_patterns:
        all_rearrange_patterns.extend(pattern_pairs)
Esempio n. 17
0
    def ensemble_cross_rate_score(
        self,
        src_mask_list: List[Tuple[torch.Tensor, torch.Tensor]],
        hypotheses: List[Hypothesis],
        direction: str,
    ) -> None:
        """give hypotheses to another model, add score to hypotheses inplace

        Args:
            src_mask_list: [([1, len, d_model], [1, len])]
            hypotheses (List[Hypothesis]):
            direction (str): one of {"l2r", "r2l"}
        """
        indices = [h.seq for h in hypotheses]
        tgt, output = to_tgt_output(indices, direction, self.device)

        b, length = tgt.size()
        prob_sum = torch.zeros((b, length, vocab_size),
                               dtype=torch.float,
                               device=self.device)
        for i, m in enumerate(self.models):
            src, src_mask = src_mask_list[i]
            exp_src = repeat(src.squeeze(0), "s e -> b s e", b=b)
            exp_src_mask = repeat(src_mask.squeeze(0), "s -> b s", b=b)

            output_hat = m.bttr.decoder(exp_src, exp_src_mask, tgt)
            prob_sum = prob_sum + torch.softmax(output_hat, dim=-1)
        log_p = torch.log(prob_sum / len(self.models))

        flat_hat = rearrange(log_p, "b l e -> (b l) e")
        flat = rearrange(output, "b l -> (b l)")
        loss = F.nll_loss(flat_hat,
                          flat,
                          ignore_index=vocab.PAD_IDX,
                          reduction="none")

        loss = rearrange(loss, "(b l) -> b l", b=b)
        loss = torch.sum(loss, dim=-1)

        for i, length in enumerate(loss):
            score = -length
            hypotheses[i].score += score
    def get_fine_windows_prediction(self, desc_c1, desc_c2, desc_f1, desc_f2,
                                    matches, data):

        stride = data['fine_size_1'][0] // data['coarse_size_1'][0]

        if matches['b_ids'].shape[0] == 0:
            feat_f1_unfold = torch.empty(0,
                                         self.win_size**2,
                                         data["dim_f"],
                                         device=desc_c1.device)
            feat_f2_unfold = torch.empty(0,
                                         self.win_size**2,
                                         data["dim_f"],
                                         device=desc_c2.device)

        else:
            # 1. unfold(crop) all local windows
            feat_f1_unfold = functional.unfold(desc_f1,
                                               kernel_size=(self.win_size,
                                                            self.win_size),
                                               stride=stride,
                                               padding=self.win_size // 2)

            feat_f2_unfold = functional.unfold(desc_f2,
                                               kernel_size=(self.win_size,
                                                            self.win_size),
                                               stride=stride,
                                               padding=self.win_size // 2)

            feat_f1_unfold = rearrange(feat_f1_unfold,
                                       'n (c ww) l -> n l ww c',
                                       ww=self.win_size**2)
            feat_f2_unfold = rearrange(feat_f2_unfold,
                                       'n (c ww) l -> n l ww c',
                                       ww=self.win_size**2)

            # 2. select only the predicted matches
            feat_f1_unfold = feat_f1_unfold[matches['b_ids'],
                                            matches['i_ids']]  # [n, ww, cf]
            feat_f2_unfold = feat_f2_unfold[matches['b_ids'], matches['j_ids']]

        return feat_f1_unfold, feat_f2_unfold
Esempio n. 19
0
def tensor_train_example_numpy():
    # kept here just for a collection, only tested for numpy
    # https://arxiv.org/pdf/1509.06569.pdf, (5)
    x = numpy.ones([3, 4, 5, 6])
    rank = 4
    if numpy.__version__ < '1.15.0':
        # numpy.einsum fails here, skip test
        return
    # creating appropriate Gs
    Gs = [numpy.ones([d, d, rank, rank]) for d in x.shape]
    Gs[0] = Gs[0][:, :, :1, :]
    Gs[-1] = Gs[-1][:, :, :, :1]

    # einsum way
    y = x.reshape((1, ) + x.shape)
    for G in Gs:
        # taking partial results left-to-right
        # y = numpy.einsum('i j alpha beta, alpha i ...  -> beta ... j', G, y)
        y = numpy.einsum('i j a b, a i ...  -> b ... j', G, y)
    y1 = y.reshape(-1)

    # alternative way
    y = x.reshape(-1)
    for G in Gs:
        i, j, alpha, beta = G.shape
        y = rearrange(y, '(i rest alpha) -> rest (alpha i)', alpha=alpha, i=i)
        y = y @ rearrange(G, 'i j alpha beta -> (alpha i) (j beta)')
        y = rearrange(y, 'rest (beta j) -> (beta rest j)', beta=beta, j=j)
    y2 = y
    assert numpy.allclose(y1, y2)

    # yet another way
    y = x
    for G in Gs:
        i, j, alpha, beta = G.shape
        y = rearrange(y,
                      'i ... (j alpha) -> ... j (alpha i)',
                      alpha=alpha,
                      i=i)
        y = y @ rearrange(G, 'i j alpha beta -> (alpha i) (j beta)')
    y3 = y.reshape(-1)
    assert numpy.allclose(y1, y3)
Esempio n. 20
0
def test_rearrange_consistency_numpy():
    shape = [1, 2, 3, 5, 7, 11]
    x = numpy.arange(numpy.prod(shape)).reshape(shape)
    for pattern in [
            'a b c d e f -> a b c d e f',
            'b a c d e f -> a b d e f c',
            'a b c d e f -> f e d c b a',
            'a b c d e f -> (f e) d (c b a)',
            'a b c d e f -> (f e d c b a)',
    ]:
        result = rearrange(x, pattern)
        assert len(numpy.setdiff1d(x, result)) == 0
        assert result.dtype == x.dtype

    result = rearrange(x, 'a b c d e f -> a (b) (c d e) f')
    assert numpy.array_equal(x.flatten(), result.flatten())

    result = rearrange(x, 'a aa aa1 a1a1 aaaa a11 -> a aa aa1 a1a1 aaaa a11')
    assert numpy.array_equal(x, result)

    result1 = rearrange(x, 'a b c d e f -> f e d c b a')
    result2 = rearrange(x, 'f e d c b a -> a b c d e f')
    assert numpy.array_equal(result1, result2)

    result = rearrange(rearrange(x, 'a b c d e f -> (f d) c (e b) a'),
                       '(f d) c (e b) a -> a b c d e f',
                       b=2,
                       d=5)
    assert numpy.array_equal(x, result)

    sizes = dict(zip('abcdef', shape))
    temp = rearrange(x, 'a b c d e f -> (f d) c (e b) a', **sizes)
    result = rearrange(temp, '(f d) c (e b) a -> a b c d e f', **sizes)
    assert numpy.array_equal(x, result)

    x2 = numpy.arange(2 * 3 * 4).reshape([2, 3, 4])
    result = rearrange(x2, 'a b c -> b c a')
    assert x2[1, 2, 3] == result[2, 3, 1]
    assert x2[0, 1, 2] == result[1, 2, 0]
Esempio n. 21
0
    def forward(self, k, q, nbhd_idx):
        """
        Parameters
        ----------
        query_f : torch.Tensor
            shape (bs, n, c_in)
        key_f : torch.Tensor
            shape (bs, n, m, c_in)

        Returns
        -------
        torch.Tensor
            shape (bs, n, m, h)
        """
        # (bs, m, c_in) -> (bs, m, embed_dim) -> (bs * n_heads, m, h_dim)
        K = rearrange(self.fc_k(k), "b n (h d) -> (b h) n d", h=self.n_heads)
        # (bs, n, c_in) -> (bs, n, embed_dim) -> (bs * n_heads, n, h_dim)
        Q = rearrange(self.fc_q(q), "b n (h d) -> (b h) n d", h=self.n_heads)
        # (bs * n_heads, n, h_dim), (bs * n_heads, m, h_dim) -> (bs * n_heads, n, m)
        A_ = Q.bmm(K.transpose(1, 2)) / math.sqrt(self.head_dim)

        # (bs * n_heads, n, nbhd_size) -> (bs, n, nbhd_size, n_heads)
        A_ = rearrange(A_, "(b h) n m -> b n m h", h=self.n_heads)

        # Batch indicies
        B = (torch.arange(
            A_.shape[0],
            device=A_.device).long()[:, None, None].expand(*nbhd_idx.shape))

        # Get NNS indexes
        NNS = (torch.arange(
            A_.shape[1],
            device=A_.device).long()[None, :, None].expand(*nbhd_idx.shape))

        A_ = A_[B, NNS, nbhd_idx]

        return A_
Esempio n. 22
0
def train_single_epoch(epoch, model, train_loader, optimizer, criterion,
                       device):
    """
	Train single epoch
	"""
    for i, batch in enumerate(iter(train_loader)):
        img, target = batch
        img, target = img.to(device), target.to(device)

        optimizer.zero_grad()

        aux = torch.ones(target.shape[0], 1,
                         dtype=int) * model.vocab.word_to_index['<PAD>']
        aux = aux.to(target.device)
        target = torch.cat([target, aux], dim=1)

        target_loss = target

        output = model(img, target[:, :-1])
        output = rearrange(output,
                           'bsz seq_len vocab_size -> bsz vocab_size seq_len')
        loss = criterion(output, target_loss[:, 1:])
        if i % 100 == 0:
            print(
                '--------------------------------------------------------------------------------------------------'
            )
            print(
                f'Epoch {epoch} batch: {i}/{len(train_loader)} loss: {loss.item()}'
            )

        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=0.25)
        optimizer.step()

        generated_captions = torch.argmax(output.transpose(1, 2), dim=-1)
        expected_captions = target[..., 1:]
        generated_captions, expected_captions, images = generated_captions[:16,
                                                                           ...], expected_captions[:
                                                                                                   16,
                                                                                                   ...], img[:
                                                                                                             16,
                                                                                                             ...]

        write_on_tensorboard(epoch=len(train_loader) * (epoch - 1) + i,
                             model=model,
                             loss=loss.item(),
                             images=images,
                             expected_captions=expected_captions,
                             generated_captions=generated_captions)
Esempio n. 23
0
def test_concatenations_and_stacking():
    for backend in imp_op_backends:
        print('testing shapes for ', backend.framework_name)
        for n_arrays in [1, 2, 5]:
            shapes = [[], [1], [1, 1], [2, 3, 5, 7], [1] * 6]
            for shape in shapes:
                if backend.framework_name == 'mxnet.ndarray' and len(
                        shape) == 0:
                    # known bug of mxnet
                    continue
                arrays1 = [
                    numpy.arange(i, i + numpy.prod(shape)).reshape(shape)
                    for i in range(n_arrays)
                ]
                arrays2 = [backend.from_numpy(array) for array in arrays1]
                result0 = numpy.asarray(arrays1)
                result1 = rearrange(arrays1, '...->...')
                result2 = rearrange(arrays2, '...->...')
                assert numpy.array_equal(result0, result1)
                assert numpy.array_equal(result1, backend.to_numpy(result2))

                result1 = rearrange(arrays1, 'b ... -> ... b')
                result2 = rearrange(arrays2, 'b ... -> ... b')
                assert numpy.array_equal(result1, backend.to_numpy(result2))
Esempio n. 24
0
    def forward(self, pairwise_locations, mask, query_features, key_features,
                nbhd_idx):

        # (bs, m, c_in) -> (bs, m, embed_dim) -> (bs * n_heads, m, h_dim)
        K = rearrange(self.W_k(key_features),
                      "b n (h d) -> (b h) n d",
                      h=self.n_heads)
        # (bs, n, c_in) -> (bs, n, embed_dim) -> (bs * n_heads, n, h_dim)
        Q = rearrange(self.W_q(query_features),
                      "b n (h d) -> (b h) n d",
                      h=self.n_heads)
        e = rearrange(self.W_l(pairwise_locations),
                      "b n m (h d) -> (b h) n m d",
                      h=self.n_heads)
        u = self.u.repeat([mask.shape[0], 1, 1])
        v = self.v.repeat([mask.shape[0], 1, 1])
        nbhd_idx = nbhd_idx.repeat_interleave(self.n_heads, dim=0)

        # Get NNS indexes
        NNS = (torch.arange(
            nbhd_idx.shape[1],
            device=nbhd_idx.device)[None, :,
                                    None].long().expand(*nbhd_idx.shape))

        # Batch indicies
        B = (torch.arange(
            nbhd_idx.shape[0],
            device=nbhd_idx.device)[:, None,
                                    None].long().expand(*nbhd_idx.shape))

        A_ = (Q.bmm(K.transpose(1, 2))[B, NNS, nbhd_idx] + self.lamda *
              (e @ (Q + v).unsqueeze(-1)).squeeze() +
              (u @ K.transpose(1, 2))[B, 0, nbhd_idx]) / math.sqrt(
                  self.head_dim)

        return rearrange(A_, "(b h) n m -> b n m h", h=self.n_heads)
Esempio n. 25
0
    def forward(self, img: FloatTensor,
                img_mask: LongTensor) -> Tuple[FloatTensor, LongTensor]:
        """encode image to feature

        Parameters
        ----------
        img : FloatTensor
            [b, 1, h', w']
        img_mask: LongTensor
            [b, h', w']

        Returns
        -------
        Tuple[FloatTensor, LongTensor]
            [b, t, d], [b, t]
        """
        feature, mask = self.model(img, img_mask)
        feature = self.feature_proj(feature)

        feature = self.pos_enc_2d(feature, mask)

        feature = rearrange(feature, "b d h w -> b (h w) d")
        mask = rearrange(mask, "b h w -> b (h w)")
        return feature, mask
Esempio n. 26
0
def evaluate_tr(model, test_loader, device, epoch,criterion):
	model.eval()

	total_loss = 0.
	
	with torch.no_grad():
		for idx, batch in enumerate(iter(test_loader)):
			img, target = batch
			img = img.to(device)
			target = target.to(device)

			

			aux=torch.ones(target.shape[0],1,dtype=int)*model.vocab.word_to_index['<PAD>']
			aux=aux.to(target.device)
			target=torch.cat([target,aux],dim=1)

			target_loss=target

			output = model(img, target[:,:-1])
			output = rearrange(
				output,
				'bsz seq_len vocab_size -> bsz vocab_size seq_len'
			)
			total_loss = criterion(output, target_loss[:,1:])
			

			if idx % 10 == 0:
				sentence = []
				num_img=random.randint(0,img.shape[0]-1)
				sentence = model.generate(image=img[num_img].unsqueeze(0))
				reference=model.vocab.generate_caption(target[num_img,1:])
				print(f'Evaluating batch {idx} / {len(test_loader)}...')
				print(f'Gen example (no teacher_forcing): {sentence}')
				print(f'Exp example: {reference}')
			#	string=str(num_img)+'_epoch_'+str(epoch)+'_plot.png'
			#	string_att=str(num_img)+'_epoch_'+str(epoch)+'_plot_att.png'
				#Visualization.show_image(img[num_img],title=example,fn=string)
			generated_captions = torch.argmax(output.transpose(1, 2), dim=-1)
			expected_captions = target[...,1:]
			generated_captions, expected_captions = generated_captions[:16,...], expected_captions[:16,...]
			
			write_on_tensorboard_evaluate(model= model,epoch=len(test_loader)*(epoch-1)+idx,loss=total_loss,expected_captions=expected_captions, generated_captions=generated_captions)
				
			#total_loss += corpus_bleu(target_s,sentences,(1.0/1.0,))			
				

		return total_loss
Esempio n. 27
0
    def test_parse_shape_symbolic(self, shape):
        print('special shape parsing for', self.backend.framework_name)
        if self.backend.framework_name in ['mxnet.symbol']:
            # mxnet can't normally run inference
            shape = [10, 20, 30, 40]
        input_symbol = self.backend.create_symbol(shape)

        shape_placeholder = parse_shape(input_symbol, 'a b c d')
        shape = {}
        for name, symbol in shape_placeholder.items():
            shape[name] = symbol if isinstance(symbol, int) \
                else self.backend.eval_symbol(symbol, [(input_symbol, numpy.zeros([10, 20, 30, 40]))])
        print(shape)
        result_placeholder = rearrange(input_symbol, 'a b (c1 c2) (d1 d2) -> (a b d1) c1 (c2 d2)',
                                       **parse_shape(input_symbol, 'a b c1 _'), d2=2)
        result = self.backend.eval_symbol(result_placeholder, [(input_symbol, numpy.zeros([10, 20, 30, 40]))])
        print(result.shape)
        assert result.shape == (10 * 20 * 20, 30, 1 * 2)
        assert numpy.allclose(result, 0)
Esempio n. 28
0
def train_single_batch(epoch, model, batch, optimizer, criterion, device):
    img, target = batch
    img, target = img.to(device), target.to(device)

    optimizer.zero_grad()
    target[target == 2] = 0
    output = model(img, target[:, :-1])
    output = rearrange(output,
                       'bsz seq_len vocab_size -> bsz vocab_size seq_len')
    loss = criterion(output, target[..., 1:])
    print(
        '--------------------------------------------------------------------------------------------------'
    )
    print(f'Loss: {loss.item()}')

    loss.backward()
    torch.nn.utils.clip_grad_norm_(model.parameters(),
                                   max_norm=0.25,
                                   error_if_nonfinite=True)
    optimizer.step()

    if not epoch % 2:
        generated_captions = torch.argmax(output.transpose(1, 2), dim=-1)
        expected_captions = target[..., 1:]
        generated_captions, expected_captions, images = generated_captions[:16,
                                                                           ...], expected_captions[:
                                                                                                   16,
                                                                                                   ...], img[:
                                                                                                             16,
                                                                                                             ...]
        write_on_tensorboard(epoch=epoch,
                             model=model,
                             loss=loss.item(),
                             images=images,
                             expected_captions=expected_captions,
                             generated_captions=generated_captions)
Esempio n. 29
0
 def operation(x):
     if reduction == 'rearrange':
         return rearrange(x, pattern, **axes_lengths)
     else:
         return reduce(x, pattern, reduction, **axes_lengths)
    def forward(self, head_coarse, head_fine, x1, x2, kpts, scores, img_size_1,
                img_size_2, valid_size_1, valid_size_2, intrinsics1,
                intrinsics2, extrinsics1, extrinsics2):

        # select fpn levels
        x1_f, x1_c = self._get_level(x1)
        x2_f, x2_c = self._get_level(x2)

        data = {
            'B': x1[0].size(0),
            'img_size_1': img_size_1,
            'img_size_2': img_size_2,
            'coarse_size_1': x1_c.shape[2:],
            'coarse_size_2': x2_c.shape[2:],
            'fine_size_1': x1_f.shape[2:],
            'fine_size_2': x2_f.shape[2:],
            'dim_c': head_coarse.embedding_size,
            'dim_f': head_fine.embedding_size,
            'win_size': self.win_size
        }

        try:
            # 1. sparse annotation generation

            if kpts.all_none:
                raise Empty

            with torch.no_grad():
                # Compute F and P matrices
                P, E, F = self.get_transformation(intrinsics1, intrinsics2,
                                                  extrinsics1, extrinsics2)

                # Keypoint Generator
                kpts = self.generator(kpts, scores, valid_size_2, intrinsics1,
                                      intrinsics2, F, P)

            if kpts.all_none:
                raise Empty

            # Get contiguious view and valid transformations
            kpts, kpts_idx = kpts.contiguous

            # 2. coarse-level module
            # add featmap with positional encoding, then flatten it to sequence [N, HW, C]
            desc_c1 = rearrange(self.pos_encoding(x1_c),
                                'n c h w -> n (h w) c')
            desc_c2 = rearrange(self.pos_encoding(x2_c),
                                'n c h w -> n (h w) c')

            mask_c0 = mask_c1 = None  # mask is useful in training
            # if 'mask0' in data:
            #     mask_c0, mask_c1 = data['mask0'].flatten(-2), data['mask1'].flatten(-2)

            # Run coarse head
            set_active_group(head_coarse, active_group(True))
            desc_c1, desc_c2 = head_coarse(desc_c1, desc_c2, mask_c0, mask_c1)

            # 3. match coarse-level
            matches = self.coarse_matching(desc_c1,
                                           desc_c2,
                                           data,
                                           mask_c0=mask_c1,
                                           mask_c1=mask_c1)

            # 4. coarse to fine refinement
            # feat_f0_unfold, feat_f1_unfold = self.fine_preprocess(feat_f0, feat_f1, feat_c0, feat_c1, data)
            desc_f1, desc_f2 = self.get_fine_windows_prediction(
                desc_c1, desc_c2, x1_f, x2_f, matches, data)

            # 5. fine-level module
            if desc_f1.size(0) != 0:  # at least one coarse level predicted
                # Run fine head
                set_active_group(head_fine, active_group(True))
                desc_f1, desc_f2 = head_fine(desc_f1, desc_f2)

            # 6. match fine-level
            matches = self.fine_matching(desc_f1, desc_f2, matches, data)

            # Compute Loss
            F, _ = F.contiguous
            P, _ = P.contiguous

            epipolar_loss = self.loss(kpts, F[kpts_idx], P[kpts_idx], matches,
                                      data)

        except Empty:
            active_group(False)
            epipolar_loss = sum(x_i.sum() for x_i in x1) * 0

        return epipolar_loss