예제 #1
0
    def get_triplet_batch(self, cross_modal_pairs, gpu=False):
        assert isinstance(cross_modal_pairs, list)
        modalities = set([pair[0] for pair in cross_modal_pairs])
        if self.x_to_y_name[0] in modalities and self.y_to_x_name[
                0] in modalities:
            longest_modality = self.x_len if self.x_len > self.y_len else self.y_len
        elif self.x_to_y_name[0] in modalities:
            longest_modality = self.x_len
        elif self.y_to_x_name[0] in modalities:
            longest_modality = self.y_len
        else:
            raise Exception(
                'No modalities found in cross_modal_pairs: {}.'.format(
                    cross_modal_pair))

        num_batches = int(np.ceil(longest_modality / self.batch_size))

        x_idxs = np.array(range(self.x_len))
        y_idxs = np.array(range(self.y_len))
        np.random.shuffle(x_idxs)
        np.random.shuffle(y_idxs)
        for i in range(num_batches):
            x_start, x_end = self._get_triplet_batch_start_end(i, self.x_len)
            y_start, y_end = self._get_triplet_batch_start_end(i, self.y_len)
            batch_dict = {}
            for cross_modal_pair in cross_modal_pairs:
                if cross_modal_pair[0] == self.x_to_y_name[0]:
                    anch_vals = self._x
                    start = x_start
                    end = x_end
                    idxs = x_idxs
                else:
                    anch_vals = self._y
                    start = y_start
                    end = y_end
                    idxs = y_idxs
                if cross_modal_pair[1] == self.x_to_y_name[1]:
                    other_vals = self._y
                else:
                    other_vals = self._x
                triplets = self.triplets[cross_modal_pair]
                batch_anchors_idxs = np.repeat(triplets[0][idxs[start:end]],
                                               self.num_triplets)
                batch_pos_idxs = triplets[1][idxs[start:end]].flatten()
                if self.sampling_method in ['random']:
                    batch_neg_idxs = triplets[2][idxs[start:end]].flatten()
                else:
                    neg = np.repeat(triplets[2][idxs[start:end]],
                                    self.num_triplets)
                anch = to_tensor(anch_vals[batch_anchors_idxs, :], gpu=gpu)
                pos = to_tensor(other_vals[batch_pos_idxs, :], gpu=gpu)
                if self.sampling_method in ['random']:
                    neg = to_tensor(other_vals[batch_neg_idxs, :], gpu=gpu)
                batch_dict[cross_modal_pair] = (anch, pos, neg)
            yield batch_dict
예제 #2
0
def _prepare_data(img, img_transform, cfg, device):
    ori_shape = img.shape
    img, img_shape, pad_shape, scale_factor = img_transform(
        img,
        scale=cfg.data.test.img_scale,
        keep_ratio=cfg.data.test.get('resize_keep_ratio', True))
    img = to_tensor(img).to(device).unsqueeze(0)
    img_meta = [
        dict(ori_shape=ori_shape,
             img_shape=img_shape,
             pad_shape=pad_shape,
             scale_factor=scale_factor,
             flip=False)
    ]
    return dict(img=[img], img_meta=[img_meta])
    def forward(self, x):
        x1 = self.fc(x)

        if self.add_batch_norm:
            x1 = self.batch_norm(x1)

        x = th.cat((x, x1), 1)

        return F.glu(x, 1)


if __name__ == '__main__':
    sme = Single_Modality_Embedding([1024, 256], 4)
    from datasets import to_tensor
    sme(to_tensor(np.random.rand(64, 1024)))
    modality_dict = {
        'text': {
            'layer_sizes': [200, 256],
            'num_layers': 2
        },
        'visual': {
            'layer_sizes': [1024, 256],
            'num_layers': 2
        }
    }
    mmen = MMEN(modality_dict)
    xs = mmen([{
        'text': to_tensor(np.random.rand(64, 200))
    }, {
        'visual': to_tensor(np.random.rand(64, 1024))
예제 #4
0
            't': {
                'num_layers': 2,
                'layer_sizes': [200, 256]
            },
            'v': {
                'num_layers': 2,
                'layer_sizes': [1024, 256]
            }
        },
        'noun': {
            't': {
                'num_layers': 2,
                'layer_sizes': [200, 256]
            },
            'v': {
                'num_layers': 2,
                'layer_sizes': [1024, 256]
            }
        }
    }
    jpose = JPOSE(modality_dict)
    from datasets import to_tensor
    v = to_tensor(np.zeros((64, 1024)))
    t = to_tensor(np.zeros((64, 200)))
    verb = {'verb': [{'v': v}, {'t': t}, {'t': t + 1}]}
    noun = {'noun': [{'v': v}, {'t': t}, {'t': t + 1}]}
    jpose(verb)
    jpose(noun)
    action = {**verb, **noun}
    jpose(action, action_output=True)
예제 #5
0
 def get_eval_batch(self, gpu=False):
     return to_tensor(self._x, gpu=gpu), to_tensor(self._y, gpu=gpu)
예제 #6
0
    def forward(self, x, pos, neg):
        d_pos = (x - pos).pow(2).sum(1)
        d_neg = (x - neg).pow(2).sum(1)
        losses = F.relu(self.margin + d_pos - d_neg)
        return self.weight * self.reduction(losses)


if __name__ == '__main__':
    triplet_loss_m = TripletLoss(0.1, 1.0)
    triplet_loss_m_0_1 = TripletLoss(0.1, 0.1)
    triplet_loss_s = TripletLoss(0.1, 1.0, 'sum')
    triplet_loss_n = TripletLoss(0.1, 1.0, 'none')

    from datasets import to_tensor
    xs = to_tensor(np.random.rand(64, 256))
    pos = to_tensor(np.random.rand(64, 256))
    neg = to_tensor(np.random.rand(64, 256))

    loss_m = triplet_loss_m(xs, pos, neg)
    loss_m_0_1 = triplet_loss_m_0_1(xs, pos, neg)
    loss_s = triplet_loss_s(xs, pos, neg)
    loss_n = triplet_loss_n(xs, pos, neg)

    assert 0.1 * loss_m == loss_m_0_1

    assert loss_n.mean() == loss_m
    assert loss_n.sum() == loss_s

    assert loss_m.shape == loss_s.shape
    assert loss_n.shape == th.Size([64])