Beispiel #1
0
def _get_sac_trainer_params(env, sac_model_params, use_gpu):
    state_dim = get_num_output_features(env.normalization)
    action_dim = get_num_output_features(env.normalization_action)
    q1_network = FullyConnectedParametricDQN(
        state_dim,
        action_dim,
        sac_model_params.q_network.layers,
        sac_model_params.q_network.activations,
    )
    q2_network = None
    if sac_model_params.training.use_2_q_functions:
        q2_network = FullyConnectedParametricDQN(
            state_dim,
            action_dim,
            sac_model_params.q_network.layers,
            sac_model_params.q_network.activations,
        )
    value_network = FullyConnectedNetwork(
        [state_dim] + sac_model_params.value_network.layers + [1],
        sac_model_params.value_network.activations + ["linear"],
    )
    actor_network = GaussianFullyConnectedActor(
        state_dim,
        action_dim,
        sac_model_params.actor_network.layers,
        sac_model_params.actor_network.activations,
    )
    if use_gpu:
        q1_network.cuda()
        if q2_network:
            q2_network.cuda()
        value_network.cuda()
        actor_network.cuda()
    value_network_target = deepcopy(value_network)
    min_action_range_tensor_training = torch.full((1, action_dim), -1 + 1e-6)
    max_action_range_tensor_training = torch.full((1, action_dim), 1 - 1e-6)
    action_range_low = env.action_space.low.astype(np.float32)
    action_range_high = env.action_space.high.astype(np.float32)
    min_action_range_tensor_serving = torch.from_numpy(action_range_low).unsqueeze(
        dim=0
    )
    max_action_range_tensor_serving = torch.from_numpy(action_range_high).unsqueeze(
        dim=0
    )

    trainer_args = [
        q1_network,
        value_network,
        value_network_target,
        actor_network,
        sac_model_params,
    ]
    trainer_kwargs = {
        "q2_network": q2_network,
        "min_action_range_tensor_training": min_action_range_tensor_training,
        "max_action_range_tensor_training": max_action_range_tensor_training,
        "min_action_range_tensor_serving": min_action_range_tensor_serving,
        "max_action_range_tensor_serving": max_action_range_tensor_serving,
    }
    return trainer_args, trainer_kwargs
 def __init__(self, in_features, out_features, sigma_zero=0.4, bias=True):
     super(NoisyFactorizedLinear, self).__init__(in_features, out_features, bias=bias)
     sigma_init = sigma_zero / math.sqrt(in_features)
     self.sigma_weight = nn.Parameter(torch.full((out_features, in_features), sigma_init))
     self.register_buffer("epsilon_input", torch.zeros(1, in_features))
     self.register_buffer("epsilon_output", torch.zeros(out_features, 1))
     if bias:
         self.sigma_bias = nn.Parameter(torch.full((out_features,), sigma_init))
 def __init__(self, in_features, out_features, sigma_init=0.017, bias=True):
     super(NoisyLinear, self).__init__(in_features, out_features, bias=bias)
     self.sigma_weight = nn.Parameter(torch.full((out_features, in_features), sigma_init))
     self.register_buffer("epsilon_weight", torch.zeros(out_features, in_features))
     if bias:
         self.sigma_bias = nn.Parameter(torch.full((out_features,), sigma_init))
         self.register_buffer("epsilon_bias", torch.zeros(out_features))
     self.reset_parameters()
def test_apply_init():
    this_tests(apply_leaf, apply_init)
    m = simple_cnn(b,bn=True)
    all2 = lambda m: nn.init.constant_(m.weight,0.2) if hasattr(m, 'weight') else m
    all7 = lambda m: nn.init.constant_(m,0.7)
    apply_leaf(m,all2)
    apply_init(m,all7)
    conv1_w = torch.full([6,3,3,3],0.7)
    bn1_w = torch.full([6],0.2)
    assert conv1_w.equal(m[0][0].weight), "Expected first colvulition layer's weights to be %r" % conv1_w
    assert bn1_w.equal(m[0][2].weight), "Expected first batch norm layers weights to be %r" % bn1_w
Beispiel #5
0
 def __init__(self, memory_spec, algorithm, body):
     util.set_attr(self, memory_spec, [
         'alpha',
         'epsilon',
         'batch_size',
         'max_size',
         'use_cer',
     ])
     self.epsilon = torch.full((1,), self.epsilon)
     self.alpha = torch.full((1,), self.alpha)
     super(PrioritizedReplay, self).__init__(memory_spec, algorithm, body)
Beispiel #6
0
    def pad(self, minibatch):
        """Pad a batch of examples to the length of the longest example.

        Args:
            minibatch (List[torch.FloatTensor]): A list of audio data,
                each having shape 1 x n_feats x len where len is variable.

        Returns:
            torch.FloatTensor or Tuple[torch.FloatTensor, List[int]]: The
                padded tensor of shape ``(batch_size, 1, n_feats, max_len)``.
                and a list of the lengths if `self.include_lengths` is `True`
                else just returns the padded tensor.
        """

        assert not self.pad_first and not self.truncate_first \
            and not self.fix_length and self.sequential
        minibatch = list(minibatch)
        lengths = [x.size(1) for x in minibatch]
        max_len = max(lengths)
        nfft = minibatch[0].size(0)
        sounds = torch.full((len(minibatch), 1, nfft, max_len), self.pad_token)
        for i, (spect, len_) in enumerate(zip(minibatch, lengths)):
            sounds[i, :, :, 0:len_] = spect
        if self.include_lengths:
            return (sounds, lengths)
        return sounds
Beispiel #7
0
    def __init__(self, pad, bos, eos, batch_size, device, parallel_paths,
                 min_length, block_ngram_repeat, exclusion_tokens,
                 return_attention, max_length):

        # magic indices
        self.pad = pad
        self.bos = bos
        self.eos = eos

        # result caching
        self.predictions = [[] for _ in range(batch_size)]
        self.scores = [[] for _ in range(batch_size)]
        self.attention = [[] for _ in range(batch_size)]

        self.alive_seq = torch.full(
            [batch_size * parallel_paths, 1], self.bos,
            dtype=torch.long, device=device)
        self.is_finished = torch.zeros(
            [batch_size, parallel_paths],
            dtype=torch.uint8, device=device)
        self.alive_attn = None

        self.min_length = min_length
        self.max_length = max_length
        self.block_ngram_repeat = block_ngram_repeat
        self.exclusion_tokens = exclusion_tokens
        self.return_attention = return_attention

        self.done = False
    def _viterbi_decode(self, feats):  # just for decode
        backpointers = []
        init_vvars = torch.full((1, self.tagset_size), -10000.)  # Initialize the viterbi variables in log space
        init_vvars[0][self.tag_to_ix[START_TAG]] = 0

        forward_var = init_vvars  # forward_var at step i holds the viterbi variables for step i-1
        for feat in feats:
            bptrs_t = []  # holds the backpointers for this step
            viterbivars_t = []  # holds the viterbi variables for this step
            for next_tag in range(self.tagset_size):
                # next_tag_var[i] holds the viterbi variable for tag i at the previous step, plus the score of transitioning from tag i to next_tag.
                # We don't include the emission scores here because the max does not depend on them (we add them in below)
                next_tag_var = forward_var + self.transitions[next_tag]
                best_tag_id = argmax(next_tag_var)
                bptrs_t.append(best_tag_id)
                viterbivars_t.append(next_tag_var[0][best_tag_id].view(1))
            # Now add in the emission scores, and assign forward_var to the set of viterbi variables we just computed
            forward_var = (torch.cat(viterbivars_t) + feat).view(1, -1)
            backpointers.append(bptrs_t)

        # Transition to STOP_TAG
        terminal_var = forward_var + self.transitions[self.tag_to_ix[STOP_TAG]]
        best_tag_id = argmax(terminal_var)
        path_score = terminal_var[0][best_tag_id]

        # Follow the back pointers to decode the best path.
        best_path = [best_tag_id]
        for bptrs_t in reversed(backpointers):
            best_tag_id = bptrs_t[best_tag_id]
            best_path.append(best_tag_id)
        # Pop off the start tag (we dont want to return that to the caller)
        start = best_path.pop()
        assert start == self.tag_to_ix[START_TAG]  # Sanity check
        best_path.reverse()
        return path_score, best_path
Beispiel #9
0
    def _forward_alg(self, feats):
        # Do the forward algorithm to compute the partition function
        # Initialize alphas
        init_alphas = torch.full((1, self.tagset_size), -10000.)

        # START_TAG has all of the score.
        init_alphas[0][self.tag_to_ix[START_TAG]] = 0.

        # Wrap in a variable so that we will get automatic backprop
        forward_var = init_alphas

        # Iterate through the sentence
        for feat in feats:
            alphas_t = []  # The forward tensors at this timestep
            for next_tag in range(self.tagset_size):
                # broadcast the emission score: it is the same regardless of
                # the previous tag
                emit_score = feat[next_tag].view(
                    1, -1).expand(1, self.tagset_size)
                # the ith entry of trans_score is the score of transitioning to
                # next_tag from i
                trans_score = self.transitions[next_tag].view(1, -1)
                # The ith entry of next_tag_var is the value for the
                # edge (i -> next_tag) before we do log-sum-exp
                next_tag_var = forward_var + trans_score + emit_score
                # The forward variable for this tag is log-sum-exp of all the
                # scores.
                alphas_t.append(log_sum_exp(next_tag_var).view(1))
            forward_var = torch.cat(alphas_t).view(1, -1)
        terminal_var = forward_var + self.transitions[self.tag_to_ix[STOP_TAG]]
        alpha = log_sum_exp(terminal_var)
        return alpha
 def test_repeating_excluded_index_does_not_die(self):
     # batch 0 will repeat excluded idx, batch 1 will repeat
     n_words = 100
     repeat_idx = 47  # will be repeated and should be blocked
     repeat_idx_ignored = 7  # will be repeated and should not be blocked
     ngram_repeat = 3
     for batch_sz in [1, 3, 17]:
         samp = RandomSampling(
             0, 1, 2, batch_sz, torch.device("cpu"), 0, ngram_repeat,
             {repeat_idx_ignored}, False, 30, 1., 5,
             torch.randint(0, 30, (batch_sz,)))
         for i in range(ngram_repeat + 4):
             word_probs = torch.full(
                 (batch_sz, n_words), -float('inf'))
             word_probs[0, repeat_idx_ignored] = 0
             if batch_sz > 1:
                 word_probs[1, repeat_idx] = 0
                 word_probs[2:, repeat_idx + i] = 0
             attns = torch.randn(1, batch_sz, 53)
             samp.advance(word_probs, attns)
             if i <= ngram_repeat:
                 self.assertFalse(samp.topk_scores.eq(
                     self.BLOCKED_SCORE).any())
             else:
                 # now batch 1 dies
                 self.assertFalse(samp.topk_scores[0].eq(
                     self.BLOCKED_SCORE).any())
                 if batch_sz > 1:
                     self.assertTrue(samp.topk_scores[1].eq(
                         self.BLOCKED_SCORE).all())
                     self.assertFalse(samp.topk_scores[2:].eq(
                         self.BLOCKED_SCORE).any())
Beispiel #11
0
 def test_advance_with_all_repeats_gets_blocked(self):
     # all beams repeat (beam >= 1 repeat dummy scores)
     beam_sz = 5
     n_words = 100
     repeat_idx = 47
     ngram_repeat = 3
     for batch_sz in [1, 3]:
         beam = BeamSearch(
             beam_sz, batch_sz, 0, 1, 2, 2,
             torch.device("cpu"), GlobalScorerStub(), 0, 30,
             False, ngram_repeat, set(),
             torch.randint(0, 30, (batch_sz,)), False, 0.)
         for i in range(ngram_repeat + 4):
             # predict repeat_idx over and over again
             word_probs = torch.full(
                 (batch_sz * beam_sz, n_words), -float('inf'))
             word_probs[0::beam_sz, repeat_idx] = 0
             attns = torch.randn(1, batch_sz * beam_sz, 53)
             beam.advance(word_probs, attns)
             if i <= ngram_repeat:
                 expected_scores = torch.tensor(
                             [0] + [-float('inf')] * (beam_sz - 1))\
                         .repeat(batch_sz, 1)
                 self.assertTrue(beam.topk_log_probs.equal(expected_scores))
             else:
                 self.assertTrue(
                     beam.topk_log_probs.equal(
                         torch.tensor(self.BLOCKED_SCORE)
                         .repeat(batch_sz, beam_sz)))
Beispiel #12
0
 def test_advance_with_some_repeats_gets_blocked(self):
     # beam 0 and beam >=2 will repeat (beam >= 2 repeat dummy scores)
     beam_sz = 5
     n_words = 100
     repeat_idx = 47
     ngram_repeat = 3
     beam = Beam(beam_sz, 0, 1, 2, n_best=2,
                 exclusion_tokens=set(),
                 global_scorer=GlobalScorerStub(),
                 block_ngram_repeat=ngram_repeat)
     for i in range(ngram_repeat + 4):
         # non-interesting beams are going to get dummy values
         word_probs = torch.full((beam_sz, n_words), -float('inf'))
         if i == 0:
             # on initial round, only predicted scores for beam 0
             # matter. Make two predictions. Top one will be repeated
             # in beam zero, second one will live on in beam 1.
             word_probs[0, repeat_idx] = -0.1
             word_probs[0, repeat_idx + i + 1] = -2.3
         else:
             # predict the same thing in beam 0
             word_probs[0, repeat_idx] = 0
             # continue pushing around what beam 1 predicts
             word_probs[1, repeat_idx + i + 1] = 0
         attns = torch.randn(beam_sz)
         beam.advance(word_probs, attns)
         if i <= ngram_repeat:
             self.assertFalse(beam.scores[0].eq(self.BLOCKED_SCORE))
             self.assertFalse(beam.scores[1].eq(self.BLOCKED_SCORE))
         else:
             # now beam 0 dies (along with the others), beam 1 -> beam 0
             self.assertFalse(beam.scores[0].eq(self.BLOCKED_SCORE))
             self.assertTrue(
                 beam.scores[1:].equal(torch.tensor(
                     [self.BLOCKED_SCORE] * (beam_sz - 1))))
Beispiel #13
0
 def test_advance_with_all_repeats_gets_blocked(self):
     # all beams repeat (beam >= 1 repeat dummy scores)
     beam_sz = 5
     n_words = 100
     repeat_idx = 47
     ngram_repeat = 3
     beam = Beam(beam_sz, 0, 1, 2, n_best=2,
                 exclusion_tokens=set(),
                 global_scorer=GlobalScorerStub(),
                 block_ngram_repeat=ngram_repeat)
     for i in range(ngram_repeat + 4):
         # predict repeat_idx over and over again
         word_probs = torch.full((beam_sz, n_words), -float('inf'))
         word_probs[0, repeat_idx] = 0
         attns = torch.randn(beam_sz)
         beam.advance(word_probs, attns)
         if i <= ngram_repeat:
             self.assertTrue(
                 beam.scores.equal(
                     torch.tensor(
                         [0] + [-float('inf')] * (beam_sz - 1))))
         else:
             self.assertTrue(
                 beam.scores.equal(torch.tensor(
                     [self.BLOCKED_SCORE] * beam_sz)))
Beispiel #14
0
def script_viterbi(unary, trans, start_idx, end_idx):
    # type: (Tensor, Tensor, int, int) -> Tuple[Tensor, Tensor]
    backpointers = []
    alphas = torch.full((1, unary.size(1)), -1e4, dtype=unary.dtype, device=unary.device)
    alphas[0, start_idx] = 0

    for i in range(unary.size(0)):
        unary_t = unary[i, :]
        next_tag_var = alphas + trans
        viterbi, best_tag_ids = torch.max(next_tag_var, 1)
        backpointers.append(best_tag_ids)
        alphas = viterbi + unary_t
        alphas = alphas.unsqueeze(0)

    terminal_vars = alphas.squeeze(0) + trans[end_idx, :]
    path_score, best_tag_id = torch.max(terminal_vars, 0)

    best_path = [best_tag_id]
    for i in range(len(backpointers)):
        i = len(backpointers) - i - 1
        best_tag_id = backpointers[i][best_tag_id]
        best_path.append(best_tag_id)

    new_path = []
    for i in range(len(best_path)):
        i = len(best_path) - i - 1
        new_path.append(best_path[i])
    return torch.stack(new_path[1:]), path_score
def cross_entropy_loss(input, target):
    total_loss = torch.tensor(0.0)
    for i in range(input.size(1)):
        cls_idx = torch.full((input.size(0),), i, dtype=torch.long)
        loss = F.cross_entropy(input, cls_idx, reduce=False)
        total_loss += target[:, i].dot(loss)
    return total_loss / input.shape[0]
Beispiel #16
0
    def test_beam_is_done_when_n_best_beams_eos_using_min_length(self):
        # this is also a test that when block_ngram_repeat=0,
        # repeating is acceptable
        beam_sz = 5
        batch_sz = 3
        n_words = 100
        _non_eos_idxs = [47, 51, 13, 88, 99]
        valid_score_dist = torch.log_softmax(torch.tensor(
            [6., 5., 4., 3., 2., 1.]), dim=0)
        min_length = 5
        eos_idx = 2
        beam = BeamSearch(
            beam_sz, batch_sz, 0, 1, 2, 2,
            torch.device("cpu"), GlobalScorerStub(),
            min_length, 30, False, 0, set(),
            torch.randint(0, 30, (batch_sz,)), False, 0.)
        for i in range(min_length + 4):
            # non-interesting beams are going to get dummy values
            word_probs = torch.full(
                (batch_sz * beam_sz, n_words), -float('inf'))
            if i == 0:
                # "best" prediction is eos - that should be blocked
                word_probs[0::beam_sz, eos_idx] = valid_score_dist[0]
                # include at least beam_sz predictions OTHER than EOS
                # that are greater than -1e20
                for j, score in zip(_non_eos_idxs, valid_score_dist[1:]):
                    word_probs[0::beam_sz, j] = score
            elif i <= min_length:
                # predict eos in beam 1
                word_probs[1::beam_sz, eos_idx] = valid_score_dist[0]
                # provide beam_sz other good predictions in other beams
                for k, (j, score) in enumerate(
                        zip(_non_eos_idxs, valid_score_dist[1:])):
                    beam_idx = min(beam_sz-1, k)
                    word_probs[beam_idx::beam_sz, j] = score
            else:
                word_probs[0::beam_sz, eos_idx] = valid_score_dist[0]
                word_probs[1::beam_sz, eos_idx] = valid_score_dist[0]
                # provide beam_sz other good predictions in other beams
                for k, (j, score) in enumerate(
                        zip(_non_eos_idxs, valid_score_dist[1:])):
                    beam_idx = min(beam_sz-1, k)
                    word_probs[beam_idx::beam_sz, j] = score

            attns = torch.randn(1, batch_sz * beam_sz, 53)
            beam.advance(word_probs, attns)
            if i < min_length:
                self.assertFalse(beam.done)
            elif i == min_length:
                # beam 1 dies on min_length
                self.assertTrue(beam.is_finished[:, 1].all())
                beam.update_finished()
                self.assertFalse(beam.done)
            else:  # i > min_length
                # beam 0 dies on the step after beam 1 dies
                self.assertTrue(beam.is_finished[:, 0].all())
                beam.update_finished()
                self.assertTrue(beam.done)
Beispiel #17
0
    def _train(self, epoch):
        """Perform the actual train."""
        # put model into train mode
        self.d_model.train()
        # TODO: why?
        cp_loader = deepcopy(self.train_loader)
        if self.verbose:
            progress_bar = tqdm(total=len(cp_loader),
                                desc='Current Epoch',
                                file=sys.stdout,
                                leave=False,
                                ncols=75,
                                position=0,
                                unit=' Batch')
        else:
            progress_bar = None
        real_label = 1
        fake_label = 0
        for batch_idx, inputs in enumerate(cp_loader):
            # Update Discriminator network maximize log(D(x)) + log(1 - D(G(z)))
            # train with real
            self.optimizer_d.zero_grad()
            inputs = inputs.to(self.device)
            batch_size = inputs.size(0)
            outputs = self.d_model(inputs)

            label = torch.full((batch_size,), real_label, device=self.device)
            loss_d_real = self.loss_function(outputs, label)
            loss_d_real.backward()

            # train with fake
            noise = torch.randn((batch_size, self.g_model.nz, 1, 1,), device=self.device)
            fake_outputs = self.g_model(noise)
            label.fill_(fake_label)
            outputs = self.d_model(fake_outputs.detach())
            loss_g_fake = self.loss_function(outputs, label)
            loss_g_fake.backward()
            self.optimizer_d.step()
            # (2) Update G network: maximize log(D(G(z)))
            self.g_model.zero_grad()
            label.fill_(real_label)
            outputs = self.d_model(fake_outputs)
            loss_g = self.loss_function(outputs, label)
            loss_g.backward()
            self.optimizer_g.step()

            if self.verbose:
                if batch_idx % 10 == 0:
                    progress_bar.update(10)
            if self.out_f is not None and batch_idx % 100 == 0:
                fake = self.g_model(self.sample_noise)
                vutils.save_image(
                    fake.detach(),
                    '%s/fake_samples_epoch_%03d.png' % (self.out_f, epoch),
                    normalize=True)
        if self.verbose:
            progress_bar.close()
Beispiel #18
0
    def test_beam_is_done_when_n_best_beams_eos_using_min_length(self):
        # this is also a test that when block_ngram_repeat=0,
        # repeating is acceptable
        beam_sz = 5
        n_words = 100
        _non_eos_idxs = [47, 51, 13, 88, 99]
        valid_score_dist = torch.log_softmax(torch.tensor(
            [6., 5., 4., 3., 2., 1.]), dim=0)
        min_length = 5
        eos_idx = 2
        beam = Beam(beam_sz, 0, 1, eos_idx, n_best=2,
                    exclusion_tokens=set(),
                    min_length=min_length,
                    global_scorer=GlobalScorerStub(),
                    block_ngram_repeat=0)
        for i in range(min_length + 4):
            # non-interesting beams are going to get dummy values
            word_probs = torch.full((beam_sz, n_words), -float('inf'))
            if i == 0:
                # "best" prediction is eos - that should be blocked
                word_probs[0, eos_idx] = valid_score_dist[0]
                # include at least beam_sz predictions OTHER than EOS
                # that are greater than -1e20
                for j, score in zip(_non_eos_idxs, valid_score_dist[1:]):
                    word_probs[0, j] = score
            elif i <= min_length:
                # predict eos in beam 1
                word_probs[1, eos_idx] = valid_score_dist[0]
                # provide beam_sz other good predictions in other beams
                for k, (j, score) in enumerate(
                        zip(_non_eos_idxs, valid_score_dist[1:])):
                    beam_idx = min(beam_sz-1, k)
                    word_probs[beam_idx, j] = score
            else:
                word_probs[0, eos_idx] = valid_score_dist[0]
                word_probs[1, eos_idx] = valid_score_dist[0]
                # provide beam_sz other good predictions in other beams
                for k, (j, score) in enumerate(
                        zip(_non_eos_idxs, valid_score_dist[1:])):
                    beam_idx = min(beam_sz-1, k)
                    word_probs[beam_idx, j] = score

            attns = torch.randn(beam_sz)
            beam.advance(word_probs, attns)
            if i < min_length:
                self.assertFalse(beam.done)
            elif i == min_length:
                # beam 1 dies on min_length
                self.assertEqual(beam.finished[0][1], beam.min_length + 1)
                self.assertEqual(beam.finished[0][2], 1)
                self.assertFalse(beam.done)
            else:  # i > min_length
                # beam 0 dies on the step after beam 1 dies
                self.assertEqual(beam.finished[1][1], beam.min_length + 2)
                self.assertEqual(beam.finished[1][2], 0)
                self.assertTrue(beam.done)
 def test_neg_styblinski_tang_global_maximum(self, cuda=False):
     device = torch.device("cuda") if cuda else torch.device("cpu")
     for dtype in (torch.float, torch.double):
         X = torch.full(
             (3,), GLOBAL_MAXIMIZER, device=device, dtype=dtype, requires_grad=True
         )
         res = neg_styblinski_tang(X)
         res.backward()
         self.assertAlmostEqual(res.item(), 3 * GLOBAL_MAXIMUM, places=4)
         self.assertLess(X.grad.abs().max().item(), 1e-5)
Beispiel #20
0
    def __init__(self, label_smoothing, tgt_vocab_size, padding_idx=0):
        assert 0.0 < label_smoothing <= 1.0
        self.padding_idx = padding_idx
        super(LabelSmoothingLoss, self).__init__()

        smoothing_value = label_smoothing / (tgt_vocab_size - 2)  # -1 for pad, -1 for gold-standard word
        one_hot = torch.full((tgt_vocab_size,), smoothing_value)
        one_hot[self.padding_idx] = 0
        self.register_buffer('one_hot', one_hot.unsqueeze(0))

        self.confidence = 1.0 - label_smoothing
Beispiel #21
0
    def test_doesnt_predict_eos_if_shorter_than_min_len(self):
        # beam 0 will always predict EOS. The other beams will predict
        # non-eos scores.
        for batch_sz in [1, 3]:
            beam_sz = 5
            n_words = 100
            _non_eos_idxs = [47, 51, 13, 88, 99]
            valid_score_dist = torch.log_softmax(torch.tensor(
                [6., 5., 4., 3., 2., 1.]), dim=0)
            min_length = 5
            eos_idx = 2
            lengths = torch.randint(0, 30, (batch_sz,))
            beam = BeamSearch(beam_sz, batch_sz, 0, 1, 2, 2,
                              torch.device("cpu"), GlobalScorerStub(),
                              min_length, 30, False, 0, set(),
                              lengths, False, 0.)
            all_attns = []
            for i in range(min_length + 4):
                # non-interesting beams are going to get dummy values
                word_probs = torch.full(
                    (batch_sz * beam_sz, n_words), -float('inf'))
                if i == 0:
                    # "best" prediction is eos - that should be blocked
                    word_probs[0::beam_sz, eos_idx] = valid_score_dist[0]
                    # include at least beam_sz predictions OTHER than EOS
                    # that are greater than -1e20
                    for j, score in zip(_non_eos_idxs, valid_score_dist[1:]):
                        word_probs[0::beam_sz, j] = score
                else:
                    # predict eos in beam 0
                    word_probs[0::beam_sz, eos_idx] = valid_score_dist[0]
                    # provide beam_sz other good predictions
                    for k, (j, score) in enumerate(
                            zip(_non_eos_idxs, valid_score_dist[1:])):
                        beam_idx = min(beam_sz-1, k)
                        word_probs[beam_idx::beam_sz, j] = score

                attns = torch.randn(1, batch_sz * beam_sz, 53)
                all_attns.append(attns)
                beam.advance(word_probs, attns)
                if i < min_length:
                    expected_score_dist = \
                        (i+1) * valid_score_dist[1:].unsqueeze(0)
                    self.assertTrue(
                        beam.topk_log_probs.allclose(
                            expected_score_dist))
                elif i == min_length:
                    # now the top beam has ended and no others have
                    self.assertTrue(beam.is_finished[:, 0].eq(1).all())
                    self.assertTrue(beam.is_finished[:, 1:].eq(0).all())
                else:  # i > min_length
                    # not of interest, but want to make sure it keeps running
                    # since only beam 0 terminates and n_best = 2
                    pass
Beispiel #22
0
    def __init__(self, label_smoothing, tgt_vocab_size, ignore_index=-100):
        assert 0.0 < label_smoothing <= 1.0
        self.ignore_index = ignore_index
        super(LabelSmoothingLoss, self).__init__()

        smoothing_value = label_smoothing / (tgt_vocab_size - 2)
        one_hot = torch.full((tgt_vocab_size,), smoothing_value)
        one_hot[self.ignore_index] = 0
        self.register_buffer('one_hot', one_hot.unsqueeze(0))

        self.confidence = 1.0 - label_smoothing
Beispiel #23
0
 def test_repeating_excluded_index_does_not_die(self):
     # beam 0 and beam >= 2 will repeat (beam 2 repeats excluded idx)
     beam_sz = 5
     n_words = 100
     repeat_idx = 47  # will be repeated and should be blocked
     repeat_idx_ignored = 7  # will be repeated and should not be blocked
     ngram_repeat = 3
     for batch_sz in [1, 3]:
         beam = BeamSearch(
             beam_sz, batch_sz, 0, 1, 2, 2,
             torch.device("cpu"), GlobalScorerStub(), 0, 30,
             False, ngram_repeat, {repeat_idx_ignored},
             torch.randint(0, 30, (batch_sz,)), False, 0.)
         for i in range(ngram_repeat + 4):
             # non-interesting beams are going to get dummy values
             word_probs = torch.full(
                 (batch_sz * beam_sz, n_words), -float('inf'))
             if i == 0:
                 word_probs[0::beam_sz, repeat_idx] = -0.1
                 word_probs[0::beam_sz, repeat_idx + i + 1] = -2.3
                 word_probs[0::beam_sz, repeat_idx_ignored] = -5.0
             else:
                 # predict the same thing in beam 0
                 word_probs[0::beam_sz, repeat_idx] = 0
                 # continue pushing around what beam 1 predicts
                 word_probs[1::beam_sz, repeat_idx + i + 1] = 0
                 # predict the allowed-repeat again in beam 2
                 word_probs[2::beam_sz, repeat_idx_ignored] = 0
             attns = torch.randn(1, batch_sz * beam_sz, 53)
             beam.advance(word_probs, attns)
             if i <= ngram_repeat:
                 self.assertFalse(beam.topk_log_probs[:, 0].eq(
                     self.BLOCKED_SCORE).any())
                 self.assertFalse(beam.topk_log_probs[:, 1].eq(
                     self.BLOCKED_SCORE).any())
                 self.assertFalse(beam.topk_log_probs[:, 2].eq(
                     self.BLOCKED_SCORE).any())
             else:
                 # now beam 0 dies, beam 1 -> beam 0, beam 2 -> beam 1
                 # and the rest die
                 self.assertFalse(beam.topk_log_probs[:, 0].eq(
                     self.BLOCKED_SCORE).any())
                 # since all preds after i=0 are 0, we can check
                 # that the beam is the correct idx by checking that
                 # the curr score is the initial score
                 self.assertTrue(beam.topk_log_probs[:, 0].eq(-2.3).all())
                 self.assertFalse(beam.topk_log_probs[:, 1].eq(
                     self.BLOCKED_SCORE).all())
                 self.assertTrue(beam.topk_log_probs[:, 1].eq(-5.0).all())
                 self.assertTrue(
                     beam.topk_log_probs[:, 2:].equal(
                         torch.tensor(self.BLOCKED_SCORE)
                         .repeat(batch_sz, beam_sz - 2)))
Beispiel #24
0
 def convert_to_roi_format(self, boxes):
     concat_boxes = cat([b.bbox for b in boxes], dim=0)
     device, dtype = concat_boxes.device, concat_boxes.dtype
     ids = cat(
         [
             torch.full((len(b), 1), i, dtype=dtype, device=device)
             for i, b in enumerate(boxes)
         ],
         dim=0,
     )
     rois = torch.cat([ids, concat_boxes], dim=1)
     return rois
Beispiel #25
0
    def test_doesnt_predict_eos_if_shorter_than_min_len(self):
        # beam 0 will always predict EOS. The other beams will predict
        # non-eos scores.
        # this is also a test that when block_ngram_repeat=0,
        # repeating is acceptable
        beam_sz = 5
        n_words = 100
        _non_eos_idxs = [47, 51, 13, 88, 99]
        valid_score_dist = torch.log_softmax(torch.tensor(
            [6., 5., 4., 3., 2., 1.]), dim=0)
        min_length = 5
        eos_idx = 2
        beam = Beam(beam_sz, 0, 1, eos_idx, n_best=2,
                    exclusion_tokens=set(),
                    min_length=min_length,
                    global_scorer=GlobalScorerStub(),
                    block_ngram_repeat=0)
        for i in range(min_length + 4):
            # non-interesting beams are going to get dummy values
            word_probs = torch.full((beam_sz, n_words), -float('inf'))
            if i == 0:
                # "best" prediction is eos - that should be blocked
                word_probs[0, eos_idx] = valid_score_dist[0]
                # include at least beam_sz predictions OTHER than EOS
                # that are greater than -1e20
                for j, score in zip(_non_eos_idxs, valid_score_dist[1:]):
                    word_probs[0, j] = score
            else:
                # predict eos in beam 0
                word_probs[0, eos_idx] = valid_score_dist[0]
                # provide beam_sz other good predictions
                for k, (j, score) in enumerate(
                        zip(_non_eos_idxs, valid_score_dist[1:])):
                    beam_idx = min(beam_sz-1, k)
                    word_probs[beam_idx, j] = score

            attns = torch.randn(beam_sz)
            beam.advance(word_probs, attns)
            if i < min_length:
                expected_score_dist = (i+1) * valid_score_dist[1:]
                self.assertTrue(beam.scores.allclose(expected_score_dist))
            elif i == min_length:
                # now the top beam has ended and no others have
                # first beam finished had length beam.min_length
                self.assertEqual(beam.finished[0][1], beam.min_length + 1)
                # first beam finished was 0
                self.assertEqual(beam.finished[0][2], 0)
            else:  # i > min_length
                # not of interest, but want to make sure it keeps running
                # since only beam 0 terminates and n_best = 2
                pass
Beispiel #26
0
    def __init__(self, beam_size, batch_size, pad, bos, eos, n_best, mb_device,
                 global_scorer, min_length, max_length, return_attention,
                 block_ngram_repeat, exclusion_tokens, memory_lengths,
                 stepwise_penalty, ratio):
        super(BeamSearch, self).__init__(
            pad, bos, eos, batch_size, mb_device, beam_size, min_length,
            block_ngram_repeat, exclusion_tokens, return_attention,
            max_length)
        # beam parameters
        self.global_scorer = global_scorer
        self.beam_size = beam_size
        self.n_best = n_best
        self.batch_size = batch_size
        self.ratio = ratio

        # result caching
        self.hypotheses = [[] for _ in range(batch_size)]

        # beam state
        self.top_beam_finished = torch.zeros([batch_size], dtype=torch.uint8)
        self.best_scores = torch.full([batch_size], -1e10, dtype=torch.float,
                                      device=mb_device)

        self._batch_offset = torch.arange(batch_size, dtype=torch.long)
        self._beam_offset = torch.arange(
            0, batch_size * beam_size, step=beam_size, dtype=torch.long,
            device=mb_device)
        self.topk_log_probs = torch.tensor(
            [0.0] + [float("-inf")] * (beam_size - 1), device=mb_device
        ).repeat(batch_size)
        self.select_indices = None
        self._memory_lengths = memory_lengths

        # buffers for the topk scores and 'backpointer'
        self.topk_scores = torch.empty((batch_size, beam_size),
                                       dtype=torch.float, device=mb_device)
        self.topk_ids = torch.empty((batch_size, beam_size), dtype=torch.long,
                                    device=mb_device)
        self._batch_index = torch.empty([batch_size, beam_size],
                                        dtype=torch.long, device=mb_device)
        self.done = False
        # "global state" of the old beam
        self._prev_penalty = None
        self._coverage = None

        self._stepwise_cov_pen = (
                stepwise_penalty and self.global_scorer.has_cov_pen)
        self._vanilla_cov_pen = (
            not stepwise_penalty and self.global_scorer.has_cov_pen)
        self._cov_pen = self.global_scorer.has_cov_pen
 def numericalize_inputs(cls, init_case, params):
     bs = params["batch_size"]
     max_len = params["max_len"]
     lengths = torch.randint(1, max_len, (bs,))
     lengths[params["full_length_seq"]] = max_len
     nfeats = params["nfeats"]
     fake_input = torch.full(
         (bs, 1, nfeats, max_len), init_case["pad_index"])
     for b in range(bs):
         fake_input[b, :, :, :lengths[b]] = torch.randn(
             (1, nfeats, lengths[b]))
     if init_case["include_lengths"]:
         fake_input = (fake_input, lengths)
     return fake_input, lengths
 def _forward_alg(self, feats):  # Reference: /Users/coder352/github/jKnowledge/Math_Manual/Machine_Learning/pdf/l85_Named-Entity-Recognition.pdf
     init_alphas = torch.full((1, self.tagset_size), -10000.)  # (1, 5); Do the forward algorithm to compute the partition function
     init_alphas[0][self.tag_to_ix[START_TAG]] = 0.  # START_TAG has all of the score.
     forward_var = init_alphas  # Wrap in a variable so that we will get automatic backprop
     for feat in feats:  # Iterate through the sentence
         alphas_t = []  # The forward tensors at this timestep
         for next_tag in range(self.tagset_size):  # broadcast the emission score: it is the same regardless of the previous tag
             emit_score = feat[next_tag].view(1, -1).expand(1, self.tagset_size)
             trans_score = self.transitions[next_tag].view(1, -1)  # the ith entry of trans_score is the score of transitioning to next_tag from i
             next_tag_var = forward_var + trans_score + emit_score  # The ith entry of next_tag_var is the value for the edge (i -> next_tag) before we do log-sum-exp
             alphas_t.append(log_sum_exp(next_tag_var).view(1))  # The forward variable for this tag is log-sum-exp of all the scores.
         forward_var = torch.cat(alphas_t).view(1, -1)
     terminal_var = forward_var + self.transitions[self.tag_to_ix[STOP_TAG]]
     alpha = log_sum_exp(terminal_var)
     return alpha
Beispiel #29
0
 def test_advance_with_some_repeats_gets_blocked(self):
     # beam 0 and beam >=2 will repeat (beam >= 2 repeat dummy scores)
     beam_sz = 5
     n_words = 100
     repeat_idx = 47
     ngram_repeat = 3
     for batch_sz in [1, 3]:
         beam = BeamSearch(
             beam_sz, batch_sz, 0, 1, 2, 2,
             torch.device("cpu"), GlobalScorerStub(), 0, 30,
             False, ngram_repeat, set(),
             torch.randint(0, 30, (batch_sz,)), False, 0.)
         for i in range(ngram_repeat + 4):
             # non-interesting beams are going to get dummy values
             word_probs = torch.full(
                 (batch_sz * beam_sz, n_words), -float('inf'))
             if i == 0:
                 # on initial round, only predicted scores for beam 0
                 # matter. Make two predictions. Top one will be repeated
                 # in beam zero, second one will live on in beam 1.
                 word_probs[0::beam_sz, repeat_idx] = -0.1
                 word_probs[0::beam_sz, repeat_idx + i + 1] = -2.3
             else:
                 # predict the same thing in beam 0
                 word_probs[0::beam_sz, repeat_idx] = 0
                 # continue pushing around what beam 1 predicts
                 word_probs[1::beam_sz, repeat_idx + i + 1] = 0
             attns = torch.randn(1, batch_sz * beam_sz, 53)
             beam.advance(word_probs, attns)
             if i <= ngram_repeat:
                 self.assertFalse(
                     beam.topk_log_probs[0::beam_sz].eq(
                         self.BLOCKED_SCORE).any())
                 self.assertFalse(
                     beam.topk_log_probs[1::beam_sz].eq(
                         self.BLOCKED_SCORE).any())
             else:
                 # now beam 0 dies (along with the others), beam 1 -> beam 0
                 self.assertFalse(
                     beam.topk_log_probs[:, 0].eq(
                         self.BLOCKED_SCORE).any())
                 self.assertTrue(
                     beam.topk_log_probs[:, 1:].equal(
                         torch.tensor(self.BLOCKED_SCORE)
                         .repeat(batch_sz, beam_sz-1)))
Beispiel #30
0
 def test_repeating_excluded_index_does_not_die(self):
     # beam 0 and beam >= 2 will repeat (beam 2 repeats excluded idx)
     beam_sz = 5
     n_words = 100
     repeat_idx = 47  # will be repeated and should be blocked
     repeat_idx_ignored = 7  # will be repeated and should not be blocked
     ngram_repeat = 3
     beam = Beam(beam_sz, 0, 1, 2, n_best=2,
                 exclusion_tokens=set([repeat_idx_ignored]),
                 global_scorer=GlobalScorerStub(),
                 block_ngram_repeat=ngram_repeat)
     for i in range(ngram_repeat + 4):
         # non-interesting beams are going to get dummy values
         word_probs = torch.full((beam_sz, n_words), -float('inf'))
         if i == 0:
             word_probs[0, repeat_idx] = -0.1
             word_probs[0, repeat_idx + i + 1] = -2.3
             word_probs[0, repeat_idx_ignored] = -5.0
         else:
             # predict the same thing in beam 0
             word_probs[0, repeat_idx] = 0
             # continue pushing around what beam 1 predicts
             word_probs[1, repeat_idx + i + 1] = 0
             # predict the allowed-repeat again in beam 2
             word_probs[2, repeat_idx_ignored] = 0
         attns = torch.randn(beam_sz)
         beam.advance(word_probs, attns)
         if i <= ngram_repeat:
             self.assertFalse(beam.scores[0].eq(self.BLOCKED_SCORE))
             self.assertFalse(beam.scores[1].eq(self.BLOCKED_SCORE))
             self.assertFalse(beam.scores[2].eq(self.BLOCKED_SCORE))
         else:
             # now beam 0 dies, beam 1 -> beam 0, beam 2 -> beam 1
             # and the rest die
             self.assertFalse(beam.scores[0].eq(self.BLOCKED_SCORE))
             # since all preds after i=0 are 0, we can check
             # that the beam is the correct idx by checking that
             # the curr score is the initial score
             self.assertTrue(beam.scores[0].eq(-2.3))
             self.assertFalse(beam.scores[1].eq(self.BLOCKED_SCORE))
             self.assertTrue(beam.scores[1].eq(-5.0))
             self.assertTrue(
                 beam.scores[2:].equal(torch.tensor(
                     [self.BLOCKED_SCORE] * (beam_sz - 2))))
def test_draw_boxes_grayscale():
    img = torch.full((1, 4, 4), fill_value=255, dtype=torch.uint8)
    boxes = torch.tensor([[0, 0, 3, 3]], dtype=torch.int64)
    bboxed_img = utils.draw_bounding_boxes(image=img, boxes=boxes, colors=["#1BBC9B"])
    assert bboxed_img.size(0) == 3
Beispiel #32
0
    def filter_proposals(
            self, proposals: Tensor, objectness: Tensor,
            image_shapes: List[Tuple[int,
                                     int]], num_anchors_per_level: List[int]
    ) -> Tuple[List[Tensor], List[Tensor]]:
        """
        Args:
        Returns:
        """
        num_images = proposals.shape[0]
        device = proposals.device
        # do not backprop throught objectness
        objectness = objectness.detach()
        objectness = objectness.reshape(num_images, -1)

        levels = [
            torch.full((n, ), i, dtype=torch.int64, device=device)
            for i, n in enumerate(num_anchors_per_level)
        ]

        levels = torch.cat(levels, dim=0)
        levels = levels.reshape(1, -1).expand_as(objectness)

        # select top_n boxes independently per level before applying nms
        top_n_idx = self._get_top_n_idx(objectness, num_anchors_per_level)

        image_range = torch.arange(num_images, device=device)
        batch_idx = image_range[:, None]

        objectness = objectness[batch_idx, top_n_idx]
        levels = levels[batch_idx, top_n_idx]
        proposals = proposals[batch_idx, top_n_idx]

        objectness_prob = torch.sigmoid(objectness)

        final_boxes = []
        final_scores = []
        for boxes, scores, lvl, img_shape in zip(proposals, objectness_prob,
                                                 levels, image_shapes):
            boxes = clip_boxes_to_image(boxes, img_shape)

            # remove small boxes
            keep = remove_small_boxes(boxes, self.min_size)
            boxes, scores, lvl = boxes[keep], scores[keep], lvl[keep]

            # remove low scoring boxes
            # use >= for Backwards compatibility
            keep = torch.where(scores >= self.score_thresh)[0]
            boxes, scores, lvl = boxes[keep], scores[keep], lvl[keep]

            # non-maximum suppression, independently done per level
            keep = batched_nms(boxes, scores, lvl, self.nms_thresh)

            # keep only topk scoring predictions
            keep = keep[:self.post_nms_top_n()]
            boxes, scores = boxes[keep], scores[keep]

            final_boxes.append(boxes)
            final_scores.append(scores)

        return final_boxes, final_scores
def est(cnn, mu, Str, fg, pro=None, pos=None, H=None):
    global max_norm
    if (fg == True):
        cnn.bid = 6
        pro.bid = pos + 1
        pro.to(device)
        pro.eval()
    else:
        cnn.to(device)
        torch.save(cnn.state_dict(), 'model_{}.pt'.format(Str))
#    exit(0)
    loss_f = nn.CrossEntropyLoss()
    cnn.eval()
    test_correct = 0
    test_loss = 0
    train_correct = 0
    train_loss = 0
    i = 1
    tr = 0
    with torch.no_grad():

        for X_train, y_train in train_loader:
            X_train = X_train.to(device)
            y_train = y_train.to(device)

            outputs = cnn(X_train)
            if (fg == True):
                pro(X_train)
                #                loss = (1-mu)*loss_f(outputs, y_train)+mu *
                loss = torch.dist(cnn.feat[pos], pro.feat[pos], p=2)
                tr += 1 - loss / pro.feat[pos].norm(p=2)
            else:
                loss = loss_f(outputs, y_train)
                label = y_train.cpu().numpy()
                #                for i in range(6):
                #                    tmp.append(cnn.feat[i].cpu().detach().numpy())
                #                for j in range(len(y_train)):
                #                    y=label[j]
                #                    S_n[y]+=1
                #                    if(S_n[y]<=S_limit):
                #                        for i in range(6):
                #                            if(S_de[i]==True):
                #                                for k in range(dim[i]):
                #                                    X[i][y][S_n[y]][k]=np.dot(tmp[i][j],vec[k][0:re_dim[i]].T)
                #                            else:
                #                                X[i][y][S_n[y]]=tmp[i][j]
                for j in range(label.size):
                    y = label[j]
                    for i in range(6):
                        X[i][y].append(cnn.feat[i][j])
                        if max_norm[i] < cnn.feat[i][j].norm(p=2):
                            max_norm[i] = cnn.feat[i][j].norm(p=2)
                        tmp = len(X[i][y])
                        if (tmp > S_limit[i]):
                            Q = torch.full([tmp, X[i][y][0].size()[0]],
                                           0,
                                           dtype=torch.float32).to(device)
                            for k in range(tmp):
                                Q[k] = X[i][y][k]
#                            H_tmp = entropy(Q)
                            Cnt[i][y] += 1
                            #                            print(i)
                            #                            print(y)
                            #                            print(H_tmp)
                            #                            print('\n')
                            H[i][y] += entropy(Q, max_norm[i])
                            X[i][y] = []

            y_pred = torch.max(outputs, 1).indices

            train_correct += torch.sum(y_pred == y_train).item()
            train_loss += loss.item()

        for X_test, y_test in test_loader:
            X_test = X_test.to(device)
            y_test = y_test.to(device)

            outputs = cnn(X_test)
            if (fg == True):
                pro(X_test)
                #                loss = (1-mu)*loss_f(outputs, y_test)+mu *
                loss = torch.dist(cnn.feat[pos], pro.feat[pos], p=2)
                tr += 1 - loss / pro.feat[pos].norm(p=2)
            else:
                loss = loss_f(outputs, y_test)
                label = y_test.cpu().numpy()
                for j in range(label.size):
                    y = label[j]
                    for i in range(6):
                        X[i][y].append(cnn.feat[i][j])
                        if max_norm[i] < cnn.feat[i][j].norm(p=2):
                            max_norm[i] = cnn.feat[i][j].norm(p=2)
                        tmp = len(X[i][y])
                        if (tmp > S_limit[i]):
                            Q = torch.full([tmp, X[i][y][0].size()[0]],
                                           0,
                                           dtype=torch.float32).to(device)
                            for k in range(tmp):
                                Q[k] = X[i][y][k]


#                            H_tmp = entropy(Q)
                            Cnt[i][y] += 1
                            #                            print(i)
                            #                            print(y)
                            #                            print(H_tmp)
                            #                            print('\n')
                            H[i][y] += entropy(Q, max_norm[i])
                            X[i][y] = []

            y_pred = torch.max(outputs, 1).indices

            test_correct += torch.sum(y_pred == y_test).item()
            test_loss += loss.item()

    torch.cuda.empty_cache()

    if (fg == False):
        for i in range(6):
            for y in range(10):
                tmp = len(X[i][y])
                if (tmp > 0.5 * S_limit[i]):
                    Q = torch.full([tmp, X[i][y][0].size()[0]],
                                   0,
                                   dtype=torch.float32).to(device)
                    for k in range(tmp):
                        Q[k] = X[i][y][k]
                    Cnt[i][y] += 1
                    H[i][y] += entropy(Q, max_norm[i])
                    X[i][y] = []
                H[i][y] /= Cnt[i][y]
    print(Str)
    print('Train Loss {:.4f}'.format(train_loss / len(train_loader)))
    print('Test Loss {:.4f}'.format(test_loss / len(test_loader)))

    print('Train Acc {:.4f}%'.format(train_correct / len(train_data) * 100))
    print('Test Acc {:.4f}%'.format(test_correct / len(test_data) * 100))

    if (fg == True):
        #        print(tr)
        return tr / (len(train_loader) + len(test_loader)), train_loss / len(
            train_loader), test_loss / len(test_loader), train_correct / len(
                train_data), test_correct / len(test_data)
Beispiel #34
0
def collate(batch):
    if len(batch) == 1:
        batch[0]['a_batch_size'] = batch[0]['image'].size(0)
        return batch[0]
    batch = [b for b in batch if b is not None]
    a_batch_size = len(batch[0]['gt'])

    dim1 = batch[0]['image'].shape[1]
    dim3 = max([b['image'].shape[3] for b in batch])
    dim2 = batch[0]['image'].shape[2]

    max_label_len = max([b['label'].size(0) for b in batch])
    if batch[0]['spaced_label'] is not None:
        max_spaced_label_len = max([b['spaced_label'].size(0) for b in batch])
    else:
        max_spaced_label_len = None

    input_batch = torch.full((len(batch) * a_batch_size, dim1, dim2, dim3),
                             PADDING_CONSTANT)
    mask_batch = torch.full((len(batch) * a_batch_size, dim1, dim2, dim3),
                            PADDING_CONSTANT)
    if 'fg_mask' in batch[0]:
        fg_masks = torch.full((len(batch) * a_batch_size, 1, dim2, dim3), 0)
    if 'changed_image' in batch[0]:
        changed_batch = torch.full(
            (len(batch) * a_batch_size, dim1, dim2, dim3), PADDING_CONSTANT)
    top_and_bottom_batch = torch.full((len(batch) * a_batch_size, 2, dim3), 0)
    center_line_batch = torch.full((len(batch) * a_batch_size, dim3), dim2 / 2)
    labels_batch = torch.IntTensor(max_label_len,
                                   len(batch) * a_batch_size).fill_(0)
    if max_spaced_label_len is not None:
        spaced_labels_batch = torch.IntTensor(max_spaced_label_len,
                                              len(batch) *
                                              a_batch_size).fill_(0)
    else:
        spaced_labels_batch = None

    for i in range(len(batch)):
        b_img = batch[i]['image']
        b_mask = batch[i]['mask']
        b_top_and_bottom = batch[i]['top_and_bottom']
        b_center_line = batch[i]['center_line']
        l = batch[i]['label']
        #toPad = (dim3-b_img.shape[3])
        input_batch[i * a_batch_size:(i + 1) * a_batch_size, :, :,
                    0:b_img.shape[3]] = b_img
        mask_batch[i * a_batch_size:(i + 1) * a_batch_size, :, :,
                   0:b_img.shape[3]] = b_mask
        if 'fg_mask' in batch[i]:
            fg_masks[i * a_batch_size:(i + 1) * a_batch_size, :, :,
                     0:b_img.shape[3]] = batch[i]['fg_mask']
        if 'changed_image' in batch[i]:
            changed_batch[i * a_batch_size:(i + 1) * a_batch_size, :, :,
                          0:b_img.shape[3]] = batch[i]['changed_image']
        if b_top_and_bottom is not None:
            top_and_bottom_batch[i * a_batch_size:(i + 1) * a_batch_size, :,
                                 0:b_img.shape[3]] = b_top_and_bottom
        else:
            top_and_bottom_batch = None
        if b_center_line is not None:
            center_line_batch[i * a_batch_size:(i + 1) * a_batch_size,
                              0:b_img.shape[3]] = b_center_line
        else:
            center_line_batch = None
        labels_batch[0:l.size(0), i * a_batch_size:(i + 1) * a_batch_size] = l
        if max_spaced_label_len is not None:
            sl = batch[i]['spaced_label']
            spaced_labels_batch[0:sl.size(0),
                                i * a_batch_size:(i + 1) * a_batch_size] = sl

    if batch[0]['style'] is None:
        style = None
    else:
        style = torch.cat([b['style'] for b in batch], dim=0)

    toRet = {
        "image": input_batch,
        "mask": mask_batch,
        "top_and_bottom": top_and_bottom_batch,
        "center_line": center_line_batch,
        "label": labels_batch,
        "style": style,
        #"style": torch.cat([b['style'] for b in batch],dim=0),
        #"label_lengths": [l for b in batch for l in b['label_lengths']],
        "label_lengths": torch.cat([b['label_lengths'] for b in batch], dim=0),
        "gt": [l for b in batch for l in b['gt']],
        "spaced_label": spaced_labels_batch,
        "author": [l for b in batch for l in b['author']],
        "name": [l for b in batch for l in b['name']],
        "a_batch_size": a_batch_size
    }
    if 'fg_mask' in batch[0]:
        toRet['fg_mask'] = fg_masks
    if 'changed_image' in batch[0]:
        toRet['changed_image'] = changed_batch
    return toRet
Beispiel #35
0
    def _fast_translate_batch(self,
                              batch,
                              data,
                              max_length,
                              min_length=0,
                              n_best=1,
                              return_attention=False):
        # TODO: faster code path for beam_size == 1.

        # TODO: support these blacklisted features.
        assert data.data_type == 'text'
        assert not self.copy_attn
        assert not self.dump_beam
        assert not self.use_filter_pred
        assert self.block_ngram_repeat == 0
        assert self.global_scorer.beta == 0

        beam_size = self.beam_size
        batch_size = batch.batch_size
        vocab = self.fields["tgt"].vocab
        start_token = vocab.stoi[inputters.BOS_WORD]
        end_token = vocab.stoi[inputters.EOS_WORD]

        # Encoder forward.
        src = inputters.make_features(batch, 'src', data.data_type)
        _, src_lengths = batch.src
        enc_states, memory_bank, src_lengths \
            = self.model.encoder(src, src_lengths)
        dec_states = self.model.decoder.init_decoder_state(src,
                                                           memory_bank,
                                                           enc_states,
                                                           with_cache=True)

        # Tile states and memory beam_size times.
        dec_states.map_batch_fn(
            lambda state, dim: tile(state, beam_size, dim=dim))

        if type(memory_bank) == tuple:
            device = memory_bank[0].device
            memory_bank = tuple(tile(m, beam_size, dim=1) for m in memory_bank)
        else:
            memory_bank = tile(memory_bank, beam_size, dim=1)
            device = memory_bank.device
        memory_lengths = tile(src_lengths, beam_size)

        top_beam_finished = torch.zeros([batch_size], dtype=torch.uint8)
        batch_offset = torch.arange(batch_size, dtype=torch.long)

        beam_offset = torch.arange(0,
                                   batch_size * beam_size,
                                   step=beam_size,
                                   dtype=torch.long,
                                   device=device)
        alive_seq = torch.full([batch_size * beam_size, 1],
                               start_token,
                               dtype=torch.long,
                               device=device)
        alive_attn = None

        # Give full probability to the first beam on the first step.
        topk_log_probs = (torch.tensor([0.0] + [float("-inf")] *
                                       (beam_size - 1),
                                       device=device).repeat(batch_size))

        # Structure that holds finished hypotheses.
        hypotheses = [[] for _ in range(batch_size)]  # noqa: F812

        results = {}
        results["predictions"] = [[] for _ in range(batch_size)]  # noqa: F812
        results["scores"] = [[] for _ in range(batch_size)]  # noqa: F812
        results["attention"] = [[] for _ in range(batch_size)]  # noqa: F812
        results["gold_score"] = [0] * batch_size
        results["batch"] = batch

        if self.mask is not None:
            mask = self.mask.get_log_probs_masking_tensor(
                src.squeeze(2), beam_size).to(memory_bank.device)

        for step in range(max_length):
            decoder_input = alive_seq[:, -1].view(1, -1, 1)

            # Decoder forward.
            dec_out, dec_states, attn = self.model.decoder(
                decoder_input,
                memory_bank,
                dec_states,
                memory_lengths=memory_lengths,
                step=step)

            # Generator forward.
            log_probs = self.model.generator.forward(dec_out.squeeze(0))
            vocab_size = log_probs.size(-1)

            if step < min_length:
                log_probs[:, end_token] = -1e20

            if self.mask is not None:
                log_probs = log_probs * mask

            # Multiply probs by the beam probability.
            log_probs += topk_log_probs.view(-1).unsqueeze(1)

            alpha = self.global_scorer.alpha
            length_penalty = ((5.0 + (step + 1)) / 6.0)**alpha

            # Flatten probs into a list of possibilities.
            curr_scores = log_probs / length_penalty
            curr_scores = curr_scores.reshape(-1, beam_size * vocab_size)
            topk_scores, topk_ids = curr_scores.topk(beam_size, dim=-1)

            # Recover log probs.
            topk_log_probs = topk_scores * length_penalty

            # Resolve beam origin and true word ids.
            topk_beam_index = topk_ids.div(vocab_size)
            topk_ids = topk_ids.fmod(vocab_size)

            # Map beam_index to batch_index in the flat representation.
            batch_index = (topk_beam_index +
                           beam_offset[:topk_beam_index.size(0)].unsqueeze(1))
            select_indices = batch_index.view(-1)

            # Append last prediction.
            alive_seq = torch.cat([
                alive_seq.index_select(0, select_indices),
                topk_ids.view(-1, 1)
            ], -1)
            if return_attention:
                current_attn = attn["std"].index_select(1, select_indices)
                if alive_attn is None:
                    alive_attn = current_attn
                else:
                    alive_attn = alive_attn.index_select(1, select_indices)
                    alive_attn = torch.cat([alive_attn, current_attn], 0)

            is_finished = topk_ids.eq(end_token)
            if step + 1 == max_length:
                is_finished.fill_(1)

            # Save finished hypotheses.
            if is_finished.any():
                # Penalize beams that finished.
                topk_log_probs.masked_fill_(is_finished, -1e10)
                is_finished = is_finished.to('cpu')
                top_beam_finished |= is_finished[:, 0].eq(1)

                predictions = alive_seq.view(-1, beam_size, alive_seq.size(-1))
                attention = (alive_attn.view(alive_attn.size(0), -1, beam_size,
                                             alive_attn.size(-1))
                             if alive_attn is not None else None)
                non_finished_batch = []
                for i in range(is_finished.size(0)):
                    b = batch_offset[i]
                    finished_hyp = is_finished[i].nonzero().view(-1)
                    # Store finished hypotheses for this batch.
                    for j in finished_hyp:
                        # if (predictions[i, j, 1:] == end_token).sum() <= 1:
                        hypotheses[b].append((
                            topk_scores[i, j],
                            predictions[i, j, 1:],  # Ignore start_token.
                            attention[:, i, j, :memory_lengths[i]]
                            if attention is not None else None))
                    # End condition is the top beam finished and we can return
                    # n_best hypotheses.
                    if top_beam_finished[i] and len(hypotheses[b]) >= n_best:
                        best_hyp = sorted(hypotheses[b],
                                          key=lambda x: x[0],
                                          reverse=True)
                        for n, (score, pred, attn) in enumerate(best_hyp):
                            if n >= n_best:
                                break
                            results["scores"][b].append(score)
                            results["predictions"][b].append(pred)
                            results["attention"][b].append(
                                attn if attn is not None else [])
                    else:
                        non_finished_batch.append(i)
                non_finished = torch.tensor(non_finished_batch)
                # If all sentences are translated, no need to go further.
                if len(non_finished) == 0:
                    break
                # Remove finished batches for the next step.
                top_beam_finished = top_beam_finished.index_select(
                    0, non_finished)
                batch_offset = batch_offset.index_select(0, non_finished)
                non_finished = non_finished.to(topk_ids.device)
                topk_log_probs = topk_log_probs.index_select(0, non_finished)
                batch_index = batch_index.index_select(0, non_finished)
                select_indices = batch_index.view(-1)
                alive_seq = predictions.index_select(0, non_finished) \
                    .view(-1, alive_seq.size(-1))
                if alive_attn is not None:
                    alive_attn = attention.index_select(1, non_finished) \
                        .view(alive_attn.size(0),
                              -1, alive_attn.size(-1))

            # Reorder states.
            if type(memory_bank) == tuple:
                memory_bank = tuple(
                    m.index_select(1, select_indices) for m in memory_bank)
            else:
                memory_bank = memory_bank.index_select(1, select_indices)
            memory_lengths = memory_lengths.index_select(0, select_indices)
            dec_states.map_batch_fn(
                lambda state, dim: state.index_select(dim, select_indices))

            if self.mask is not None:
                mask = mask.index_select(0, select_indices)

        return results
    def beam_search(
        self,
        encoder_output,
        beam_size,
        store_alphas=False,
        store_beam=False,
        print_beam=False,
    ):
        """Generate and return the top k sequences using beam search."""

        current_beam_width = beam_size

        enc_image_size = encoder_output.size(1)
        encoder_dim = encoder_output.size()[-1]

        # Flatten encoding
        encoder_output = encoder_output.view(1, -1, encoder_dim)

        # We'll treat the problem as having a batch size of k
        encoder_output = encoder_output.expand(
            beam_size, encoder_output.size(1), encoder_dim
        )

        # Tensor to store top k sequences; now they're just <start>
        top_k_sequences = torch.full(
            (beam_size, 1), self.word_map[TOKEN_START], dtype=torch.int64, device=device
        )

        # Tensor to store top k sequences' scores; now they're just 0
        top_k_scores = torch.zeros(beam_size, device=device)

        if store_alphas:
            # Tensor to store top k sequences' alphas; now they're just 1s
            seqs_alpha = torch.ones(beam_size, 1, enc_image_size, enc_image_size).to(
                device
            )

        # Lists to store completed sequences, scores, and alphas and the full decoding beam
        complete_seqs = []
        complete_seqs_alpha = []
        complete_seqs_scores = []
        beam = []

        # Initialize hidden states
        states = self.init_hidden_states(encoder_output)

        # Start decoding
        for step in range(0, self.params["max_caption_len"] - 1):
            prev_words = top_k_sequences[:, step]

            prev_word_embeddings = self.word_embedding(prev_words)
            predictions, states, alpha = self.forward_step(
                encoder_output, prev_word_embeddings, states
            )
            scores = F.log_softmax(predictions, dim=1)

            # Add the new scores
            scores = top_k_scores.unsqueeze(1).expand_as(scores) + scores

            # For the first timestep, the scores from previous decoding are all the same, so in order to create 5
            # different sequences, we should only look at one branch
            if step == 0:
                scores = scores[0]

            # Find the top k of the flattened scores
            top_k_scores, top_k_words = scores.view(-1).topk(
                current_beam_width, 0, largest=True, sorted=True
            )

            # Convert flattened indices to actual indices of scores
            prev_seq_inds = top_k_words / self.vocab_size  # (k)
            next_words = top_k_words % self.vocab_size  # (k)

            # Add new words to sequences
            top_k_sequences = torch.cat(
                (top_k_sequences[prev_seq_inds], next_words.unsqueeze(1)), dim=1
            )

            if print_beam:
                print_current_beam(top_k_sequences, top_k_scores, self.word_map)
            if store_beam:
                beam.append(top_k_sequences)

            # Store the new alphas
            if store_alphas:
                alpha = alpha.view(-1, enc_image_size, enc_image_size)
                seqs_alpha = torch.cat(
                    (seqs_alpha[prev_seq_inds], alpha[prev_seq_inds].unsqueeze(1)),
                    dim=1,
                )

            # Check for complete and incomplete sequences (based on the <end> token)
            incomplete_inds = (
                torch.nonzero(next_words != self.word_map[TOKEN_END]).view(-1).tolist()
            )
            complete_inds = (
                torch.nonzero(next_words == self.word_map[TOKEN_END]).view(-1).tolist()
            )

            # Set aside complete sequences and reduce beam size accordingly
            if len(complete_inds) > 0:
                complete_seqs.extend(top_k_sequences[complete_inds].tolist())
                complete_seqs_scores.extend(top_k_scores[complete_inds])
                if store_alphas:
                    complete_seqs_alpha.extend(seqs_alpha[complete_inds].tolist())

            # Stop if k captions have been completely generated
            current_beam_width = len(incomplete_inds)
            if current_beam_width == 0:
                break

            # Proceed with incomplete sequences
            top_k_sequences = top_k_sequences[incomplete_inds]
            for i in range(len(states)):
                states[i] = states[i][prev_seq_inds[incomplete_inds]]
            encoder_output = encoder_output[prev_seq_inds[incomplete_inds]]
            top_k_scores = top_k_scores[incomplete_inds]
            if store_alphas:
                seqs_alpha = seqs_alpha[incomplete_inds]

        if len(complete_seqs) < beam_size:
            complete_seqs.extend(top_k_sequences.tolist())
            complete_seqs_scores.extend(top_k_scores)
            if store_alphas:
                complete_seqs_alpha.extend(seqs_alpha)

        sorted_sequences = [
            sequence
            for _, sequence in sorted(
                zip(complete_seqs_scores, complete_seqs), reverse=True
            )
        ]
        sorted_alphas = None
        if store_alphas:
            sorted_alphas = [
                alpha
                for _, alpha in sorted(
                    zip(complete_seqs_scores, complete_seqs_alpha), reverse=True
                )
            ]
        return sorted_sequences, sorted_alphas, beam
Beispiel #37
0
    def data2x(self, features, device, y):
        who = features['who']
        hand = features['hand']
        batch_size = hand.size()[0]
        cls_ids = torch.full((batch_size, 1),
                             self.cls_token_id,
                             dtype=torch.long,
                             device=device)
        sep_ids = torch.full((batch_size, 1),
                             self.sep_token_id,
                             dtype=torch.long,
                             device=device)

        x = torch.cat(
            [
                cls_ids,
                hand,  #14
                features['discards'][:, 0, :],
                features['discards'][:, 1, :],
                features['discards'][:, 2, :],
                features['discards'][:, 3, :],  # 100(25)
                features['melds'][0][:, 0],
                features['melds'][1][:, 0],
                features['melds'][2][:, 0],
                features['melds'][3][:, 0],  # 80(20)
                features['action_meld_tiles'],  # 4
                features['menzen'] + self.menzen_offset,
                features['reach_state'] + self.reach_state_offset,
                features['n_reach'] + self.n_reach_offset,
                features['reach_ippatsu'] + self.reach_ippatsu_offset,
                features['doras'],
                features['dans'] + self.dans_offset,
                features['rates'] + self.rates_offset,
                features['scores'] + self.scores_offset,
                features['oya'] + self.oya_offset,
                features['n_honba'] + self.n_honba_offset,
                features['n_round'] + self.n_round_offset,
                features['sanma_or_yonma'] + self.sanma_or_yonma_offset,
                features['han_or_ton'] + self.han_or_ton_offset,
                features['aka_ari'] + self.aka_ari_offset,
                features['kui_ari'] + self.kui_ari_offset,
                features['shanten'] + self.shanten_offset,
                features['who'] + self.who_offset,
                features['sum_discards'] + self.sum_discards_offset
            ],
            dim=1)

        hand_length = hand.size()[1]
        discard_length = features['discards'].size()[2]
        dora_length = features['doras'].size()[1]
        pad_token_type_ids = torch.full((batch_size, 1),
                                        self.pad_token_id,
                                        dtype=torch.long,
                                        device=device)

        token_types = torch.cat([
            pad_token_type_ids,
            torch.full((batch_size, hand_length),
                       self.hand_token_id,
                       dtype=torch.long,
                       device=device),
            torch.full((batch_size, discard_length),
                       self.discard_0_token_id,
                       dtype=torch.long,
                       device=device),
            torch.full((batch_size, discard_length),
                       self.discard_1_token_id,
                       dtype=torch.long,
                       device=device),
            torch.full((batch_size, discard_length),
                       self.discard_2_token_id,
                       dtype=torch.long,
                       device=device),
            torch.full((batch_size, discard_length),
                       self.discard_3_token_id,
                       dtype=torch.long,
                       device=device),
            features['melds'][0][:, 1] + self.meld_0_base_token_id,
            features['melds'][1][:, 1] + self.meld_1_base_token_id,
            features['melds'][2][:, 1] + self.meld_2_base_token_id,
            features['melds'][3][:, 1] + self.meld_3_base_token_id,
            torch.full((batch_size, 4),
                       self.action_meld_tiles_token_id,
                       dtype=torch.long,
                       device=device),
            torch.full((batch_size, 1),
                       self.menzen_0_token_id,
                       dtype=torch.long,
                       device=device),
            torch.full((batch_size, 1),
                       self.menzen_1_token_id,
                       dtype=torch.long,
                       device=device),
            torch.full((batch_size, 1),
                       self.menzen_2_token_id,
                       dtype=torch.long,
                       device=device),
            torch.full((batch_size, 1),
                       self.menzen_3_token_id,
                       dtype=torch.long,
                       device=device),
            torch.full((batch_size, 1),
                       self.reach_state_0_token_id,
                       dtype=torch.long,
                       device=device),
            torch.full((batch_size, 1),
                       self.reach_state_1_token_id,
                       dtype=torch.long,
                       device=device),
            torch.full((batch_size, 1),
                       self.reach_state_2_token_id,
                       dtype=torch.long,
                       device=device),
            torch.full((batch_size, 1),
                       self.reach_state_3_token_id,
                       dtype=torch.long,
                       device=device),
            torch.full((batch_size, 1),
                       self.n_reach_token_id,
                       dtype=torch.long,
                       device=device),
            torch.full((batch_size, 1),
                       self.reach_ippatsu_0_token_id,
                       dtype=torch.long,
                       device=device),
            torch.full((batch_size, 1),
                       self.reach_ippatsu_1_token_id,
                       dtype=torch.long,
                       device=device),
            torch.full((batch_size, 1),
                       self.reach_ippatsu_2_token_id,
                       dtype=torch.long,
                       device=device),
            torch.full((batch_size, 1),
                       self.reach_ippatsu_3_token_id,
                       dtype=torch.long,
                       device=device),
            torch.full((batch_size, dora_length),
                       self.dora_token_id,
                       dtype=torch.long,
                       device=device),
            torch.full((batch_size, 1),
                       self.dans_0_token_id,
                       dtype=torch.long,
                       device=device),
            torch.full((batch_size, 1),
                       self.dans_1_token_id,
                       dtype=torch.long,
                       device=device),
            torch.full((batch_size, 1),
                       self.dans_2_token_id,
                       dtype=torch.long,
                       device=device),
            torch.full((batch_size, 1),
                       self.dans_3_token_id,
                       dtype=torch.long,
                       device=device),
            torch.full((batch_size, 1),
                       self.rates_0_token_id,
                       dtype=torch.long,
                       device=device),
            torch.full((batch_size, 1),
                       self.rates_1_token_id,
                       dtype=torch.long,
                       device=device),
            torch.full((batch_size, 1),
                       self.rates_2_token_id,
                       dtype=torch.long,
                       device=device),
            torch.full((batch_size, 1),
                       self.rates_3_token_id,
                       dtype=torch.long,
                       device=device),
            torch.full((batch_size, 1),
                       self.scores_0_token_id,
                       dtype=torch.long,
                       device=device),
            torch.full((batch_size, 1),
                       self.scores_1_token_id,
                       dtype=torch.long,
                       device=device),
            torch.full((batch_size, 1),
                       self.scores_2_token_id,
                       dtype=torch.long,
                       device=device),
            torch.full((batch_size, 1),
                       self.scores_3_token_id,
                       dtype=torch.long,
                       device=device),
            torch.full((batch_size, 1),
                       self.oya_token_id,
                       dtype=torch.long,
                       device=device),
            torch.full((batch_size, 1),
                       self.n_honba_token_id,
                       dtype=torch.long,
                       device=device),
            torch.full((batch_size, 1),
                       self.n_round_token_id,
                       dtype=torch.long,
                       device=device),
            torch.full((batch_size, 1),
                       self.sanma_or_yonma_token_id,
                       dtype=torch.long,
                       device=device),
            torch.full((batch_size, 1),
                       self.han_or_ton_token_id,
                       dtype=torch.long,
                       device=device),
            torch.full((batch_size, 1),
                       self.aka_ari_token_id,
                       dtype=torch.long,
                       device=device),
            torch.full((batch_size, 1),
                       self.kui_ari_token_id,
                       dtype=torch.long,
                       device=device),
            torch.full((batch_size, 1),
                       self.shanten_token_id,
                       dtype=torch.long,
                       device=device),
            torch.full((batch_size, 1),
                       self.who_token_id,
                       dtype=torch.long,
                       device=device),
            torch.full((batch_size, 1),
                       self.sum_discards_token_id,
                       dtype=torch.long,
                       device=device)
        ],
                                dim=1)

        cls_tokens = torch.tensor([self.tgt_cls_token_id] * batch_size,
                                  dtype=torch.long,
                                  device=device).reshape((batch_size, 1))
        tgt_ids = torch.cat([cls_tokens, y[:, :-1]], axis=1)
        tgt_ids[tgt_ids == -100] = self.tgt_pad_token_id

        return x, token_types, tgt_ids
Beispiel #38
0
def add_lsqmodule(net, constr_weight):
    for name, module in net.named_modules():
        if isinstance(module, Conv2d) or isinstance(module, Linear):
            scale_init = torch.full((1, ), module.weight.abs().mean().item())
            module.wquantizer = LsqWeight(constraint=constr_weight,
                                          scale_init=scale_init.clone())
Beispiel #39
0
fake_label = 0

niter = 25
g_loss = []
d_loss = []

for epoch in range(niter):
    for i, data in enumerate(dataloader, 0):
        ############################
        # (1) Update D network: maximize log(D(x)) + log(1 - D(G(z)))
        ###########################
        # train with real
        netD.zero_grad()
        real_cpu = data[0].to(device)
        batch_size = real_cpu.size(0)
        label = torch.full((batch_size, ), real_label, device=device)

        output = netD(real_cpu)
        errD_real = criterion(output, label)
        errD_real.backward()
        D_x = output.mean().item()

        # train with fake
        noise = torch.randn(batch_size, nz, 1, 1, device=device)
        fake = netG(noise)
        label.fill_(fake_label)
        output = netD(fake.detach())
        errD_fake = criterion(output, label)
        errD_fake.backward()
        D_G_z1 = output.mean().item()
        errD = errD_real + errD_fake
Beispiel #40
0
    def forward(self, inputs, triples, lengths, elmo_embedding, id2_ids_batch):
        if self.args.pretrain_model_type == 'elmo':
            elmo_inputs = torch.Tensor().cuda()
            for i in range(len(inputs)):
                elmo_input = torch.from_numpy(elmo_embedding[' '.join(map(str, inputs[i].cpu().numpy()))].value).type(torch.cuda.FloatTensor)
                try:
                    elmo_inputs = torch.cat((elmo_inputs, elmo_input.unsqueeze(dim=0)))
                except:
                    elmo_inputs = torch.cat((elmo_inputs, elmo_input.unsqueeze(dim=0)[:,:128,:]), dim=0)
            inputs = elmo_inputs
        else:
            inputs = self.embedding(inputs)

        # Introducing external knowledge in different ways.
        t = torch.zeros(inputs.size(0), self.seq_length, self.input_dim + self.triples_embedding_dim).cuda()
        if self.args.concat_mode=="graph_attention":
            for i in range(len(inputs)):
                b = torch.full([self.seq_length, self.triples_number], -1, dtype=torch.long).cuda()
                bb = torch.zeros(self.seq_length, self.triples_embedding_dim).cuda()
                if (torch.equal(id2_ids_batch[i], b)):
                    t[i] = torch.cat((inputs[i], bb), dim=-1)
                else:
                    for k in range(len(id2_ids_batch[i])):
                        c = torch.full([self.triples_number], -1, dtype=torch.long).cuda()
                        cc = torch.zeros(self.triples_embedding_dim).cuda()
                        if (torch.equal(id2_ids_batch[i][k], c)):
                            t[i][k] = torch.cat((inputs[i][k], cc), dim=-1)
                        else:
                            list1 = torch.Tensor().cuda()
                            list2 = torch.Tensor().cuda()
                            head_id, tail_id, relation_id = torch.chunk(triples[i][k], 3, dim=1)
                            t2 = self.embeddings_entity(head_id).cuda()
                            t21 = self.embeddings_entity(tail_id).cuda()
                            t22 = self.embeddings_relation(relation_id).cuda()
                            head_tail = torch.cat((t2, t21), dim=2)
                            list1 = torch.cat((list1, head_tail), dim=0)
                            list2 = torch.cat((list2, t22), dim=0)
                            head_tail_transformed = self.entity_transformed(list1)
                            head_tail_transformed_final = F.tanh(head_tail_transformed)
                            relation_transformed1 = F.tanh(list2)
                            e_weight = (head_tail_transformed_final * relation_transformed1).sum(dim=2)
                            alpha_weight = F.softmax(e_weight, dim=0)
                            graph_embed = (alpha_weight.unsqueeze(1) * head_tail).sum(dim=0)
                            aa = torch.cat((inputs[i][k], graph_embed.squeeze(0)))
                            t[i][k] = aa
        else:
            for i in range(len(inputs)):
                dict = {}
                b = torch.full([self.seq_length, self.triples_number], -1, dtype=torch.long).cuda()
                bb = torch.zeros(self.seq_length, self.triples_embedding_dim).cuda()
                if (torch.equal(id2_ids_batch[i], b)):
                    t[i] = torch.cat((inputs[i], bb), dim=-1)
                else:
                    for k in range(len(id2_ids_batch[i])):
                        a = 0
                        input = torch.Tensor().cuda()
                        c = torch.full([self.triples_number], -1, dtype=torch.long).cuda()
                        cc = torch.zeros(self.triples_embedding_dim).cuda()
                        if (torch.equal(id2_ids_batch[i][k], c)):
                            t[i][k] = torch.cat((inputs[i][k], cc), dim=-1)
                        else:
                            for j in range(len(id2_ids_batch[i][k])):
                                if id2_ids_batch[i][k][j].cpu().numpy() == 1:
                                    inputs_triples = torch.cat(
                                        (inputs[i][k], self.embeddings_entity(triples[i][k][j][1])))
                                elif id2_ids_batch[i][k][j].cpu().numpy() == 2:
                                    inputs_triples = torch.cat(
                                        (inputs[i][k], self.embeddings_entity(triples[i][k][j][0])))
                                else:
                                    continue

                                if a == 0:
                                    a = a + 1
                                    input = torch.cat((inputs_triples, input))
                                else:
                                    a = a + 1
                                    input = input + inputs_triples

                        if a != 0:
                            input = input / a
                            dict[k] = input

                    for k in dict:
                        t[i][k] = dict[k]


        # 1. input
        embedded_input = self.dropout_on_input_to_LSTM(t)
        (sorted_input, sorted_lengths, input_unsort_indices, _) = sort_batch_by_length(embedded_input, lengths)
        packed_input = pack_padded_sequence(sorted_input, sorted_lengths.data.tolist(), batch_first=True)
        packed_sorted_output, _ = self.rnn(packed_input)
        sorted_output, _ = pad_packed_sequence(packed_sorted_output, batch_first=True)
        output = sorted_output[input_unsort_indices]

        # 2. use attention
        if self.args.attention_layer == 'att':
            attention_logits = self.attention_weights(output).squeeze(-1)
            mask_attention_logits = (attention_logits != 0).type(
                torch.cuda.FloatTensor if inputs.is_cuda else torch.FloatTensor)
            softmax_attention_logits = last_dim_softmax(attention_logits, mask_attention_logits)
            softmax_attention_logits0 = softmax_attention_logits.unsqueeze(dim=1)
            input_encoding = torch.bmm(softmax_attention_logits0, output)
            input_encoding0 = input_encoding.squeeze(dim=1)
        else:
            input_encoding = torch.Tensor().cuda()
            querys = self.query_embedding(torch.arange(0,self.args.num_classes,1).cuda())
            attention_weights = torch.Tensor(self.args.num_classes, len(output), len(output[0])).cuda()
            for i in range(self.args.num_classes):
                attention_logits = self.proquery_weights_mp(output)
                attention_logits = torch.bmm(attention_logits, querys[i].unsqueeze(dim=1).repeat(len(output),1,1)).squeeze(dim=-1)
                mask_attention_logits = (attention_logits != 0).type(
                    torch.cuda.FloatTensor if inputs.is_cuda else torch.FloatTensor)
                softmax_attention_logits = last_dim_softmax(attention_logits, mask_attention_logits)
                input_encoding_part = torch.bmm(softmax_attention_logits.unsqueeze(dim=1), output)
                input_encoding = torch.cat((input_encoding,input_encoding_part.squeeze(dim=1)), dim=-1)
                attention_weights[i] = softmax_attention_logits

        # 3. run linear layer
        if self.args.attention_layer == 'att':
            input_encodings = self.dropout_on_input_to_linear_layer(input_encoding0)
            unattized_output = self.output_projection(input_encodings)
            output_distribution = F.log_softmax(unattized_output, dim=-1)
            return output_distribution, softmax_attention_logits.squeeze(dim=1)
        else:
            input_encodings = self.dropout_on_input_to_linear_layer(input_encoding)
            unattized_output = self.multi_output_projection(input_encodings)
            output_distribution = F.log_softmax(unattized_output, dim=-1)
            cos = torch.nn.CosineSimilarity(dim=0, eps=1e-16)
            attention_loss = abs(cos(querys[0], querys[1])) + abs(cos(querys[1], querys[2])) \
                                                            + abs(cos(querys[0], querys[2]))
            return output_distribution, attention_weights, attention_loss
Beispiel #41
0
    def forward(
        self, images: List[Tensor], targets: Optional[List[Dict[str, Tensor]]] = None
    ) -> Tuple[Dict[str, Tensor], List[Dict[str, Tensor]]]:
        if self.training:
            if targets is None:
                torch._assert(False, "targets should not be none when in training mode")
            else:
                for target in targets:
                    boxes = target["boxes"]
                    if isinstance(boxes, torch.Tensor):
                        torch._assert(
                            len(boxes.shape) == 2 and boxes.shape[-1] == 4,
                            f"Expected target boxes to be a tensor of shape [N, 4], got {boxes.shape}.",
                        )
                    else:
                        torch._assert(False, f"Expected target boxes to be of type Tensor, got {type(boxes)}.")

        # get the original image sizes
        original_image_sizes: List[Tuple[int, int]] = []
        for img in images:
            val = img.shape[-2:]
            torch._assert(
                len(val) == 2,
                f"expecting the last two dimensions of the Tensor to be H and W instead got {img.shape[-2:]}",
            )
            original_image_sizes.append((val[0], val[1]))

        # transform the input
        images, targets = self.transform(images, targets)

        # Check for degenerate boxes
        if targets is not None:
            for target_idx, target in enumerate(targets):
                boxes = target["boxes"]
                degenerate_boxes = boxes[:, 2:] <= boxes[:, :2]
                if degenerate_boxes.any():
                    bb_idx = torch.where(degenerate_boxes.any(dim=1))[0][0]
                    degen_bb: List[float] = boxes[bb_idx].tolist()
                    torch._assert(
                        False,
                        "All bounding boxes should have positive height and width."
                        f" Found invalid box {degen_bb} for target at index {target_idx}.",
                    )

        # get the features from the backbone
        features = self.backbone(images.tensors)
        if isinstance(features, torch.Tensor):
            features = OrderedDict([("0", features)])

        features = list(features.values())

        # compute the ssd heads outputs using the features
        head_outputs = self.head(features)

        # create the set of anchors
        anchors = self.anchor_generator(images, features)

        losses = {}
        detections: List[Dict[str, Tensor]] = []
        if self.training:
            matched_idxs = []
            if targets is None:
                torch._assert(False, "targets should not be none when in training mode")
            else:
                for anchors_per_image, targets_per_image in zip(anchors, targets):
                    if targets_per_image["boxes"].numel() == 0:
                        matched_idxs.append(
                            torch.full(
                                (anchors_per_image.size(0),), -1, dtype=torch.int64, device=anchors_per_image.device
                            )
                        )
                        continue

                    match_quality_matrix = box_ops.box_iou(targets_per_image["boxes"], anchors_per_image)
                    matched_idxs.append(self.proposal_matcher(match_quality_matrix))

                losses = self.compute_loss(targets, head_outputs, anchors, matched_idxs)
        else:
            detections = self.postprocess_detections(head_outputs, anchors, images.image_sizes)
            detections = self.transform.postprocess(detections, images.image_sizes, original_image_sizes)

        if torch.jit.is_scripting():
            if not self._has_warned:
                warnings.warn("SSD always returns a (Losses, Detections) tuple in scripting")
                self._has_warned = True
            return losses, detections
        return self.eager_outputs(losses, detections)
Beispiel #42
0
 def forward(self, int_fill: int):
     size = torch.Size(2, 2)
     a = torch.full(size, int_fill)
     b = torch.full(size, 1)
     return (a, b)
Beispiel #43
0
step = 0

print("<---------Training Images------------>")
plot_imgs(device, batch_size, img_size, data_root)

print("<---------Start Training------------>")
for epoch in range(epochs):
    for i, data in enumerate(dataloader, 0):

        #---------------TRAIN D-----------------#
        # train real imgs
        netD.zero_grad()  # clear gradients
        real_img = data[0].to(device)
        b_size = real_img.size(0)
        label = torch.full((b_size, ),
                           real_label,
                           dtype=torch.float,
                           device=device)
        output = netD(real_img).view(-1)
        errD_real = criterion(output, label)
        errD_real.backward()
        D_x = output.mean().item()

        # train fake imgs
        noise = torch.randn(b_size, nz, 1, 1, device=device)
        fake = netG(noise)
        label.fill_(fake_label)
        output = netD(fake.detach()).view(-1)
        errD_fake = criterion(output, label)
        errD_fake.backward()
        D_G_z1 = output.mean().item()
        errD = errD_real + errD_fake
    def nucleus_sampling(self, encoder_output, beam_size, top_p, print_beam=False):
        """Generate and return the top k sequences using nucleus sampling."""

        current_beam_width = beam_size

        encoder_dim = encoder_output.size()[-1]

        # Flatten encoding
        encoder_output = encoder_output.view(1, -1, encoder_dim)

        # We'll treat the problem as having a batch size of k
        encoder_output = encoder_output.expand(
            beam_size, encoder_output.size(1), encoder_dim
        )

        # Tensor to store top k sequences; now they're just <start>
        top_k_sequences = torch.full(
            (beam_size, 1), self.word_map[TOKEN_START], dtype=torch.int64, device=device
        )

        # Tensor to store top k sequences' scores; now they're just 0
        top_k_scores = torch.zeros(beam_size, device=device)

        # Lists to store completed sequences, scores, and alphas and the full decoding beam
        complete_seqs = []
        complete_seqs_scores = []

        # Initialize hidden states
        states = self.init_hidden_states(encoder_output)

        # Start decoding
        for step in range(0, self.params["max_caption_len"] - 1):
            prev_words = top_k_sequences[:, step]

            prev_word_embeddings = self.word_embedding(prev_words)
            predictions, states, alpha = self.forward_step(
                encoder_output, prev_word_embeddings, states
            )
            scores = F.log_softmax(predictions, dim=1)

            sorted_logits, sorted_indices = torch.sort(scores, descending=True, dim=-1)
            cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1)

            # Remove tokens with cumulative probability above the threshold
            sorted_indices_to_remove = cumulative_probs > top_p
            # Shift the indices to the right to keep also the first token above the threshold
            sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[
                ..., :-1
            ].clone()
            sorted_indices_to_remove[..., 0] = 0

            top_k_scores = torch.zeros(
                current_beam_width, dtype=torch.float, device=device
            )
            top_k_words = torch.zeros(
                current_beam_width, dtype=torch.long, device=device
            )

            for i in range(0, current_beam_width):
                scores[i][sorted_indices[i][sorted_indices_to_remove[i]]] = -float(
                    "inf"
                )

                # Sample from the scores
                top_k_words[i] = torch.multinomial(torch.softmax(scores[i], -1), 1)
                top_k_scores[i] = scores[i][top_k_words[i]]

            # Add new words to sequences
            top_k_sequences = torch.cat(
                (top_k_sequences, top_k_words.unsqueeze(1)), dim=1
            )

            if print_beam:
                print_current_beam(top_k_sequences, top_k_scores, self.word_map)

            # Check for complete and incomplete sequences (based on the <end> token)
            incomplete_inds = (
                torch.nonzero(top_k_words != self.word_map[TOKEN_END]).view(-1).tolist()
            )
            complete_inds = (
                torch.nonzero(top_k_words == self.word_map[TOKEN_END]).view(-1).tolist()
            )

            # Set aside complete sequences and reduce beam size accordingly
            if len(complete_inds) > 0:
                complete_seqs.extend(top_k_sequences[complete_inds].tolist())
                complete_seqs_scores.extend(top_k_scores[complete_inds])

            # Stop if k captions have been completely generated
            current_beam_width = len(incomplete_inds)
            if current_beam_width == 0:
                break

            # Proceed with incomplete sequences
            top_k_sequences = top_k_sequences[incomplete_inds]
            for i in range(len(states)):
                states[i] = states[i][incomplete_inds]
            encoder_output = encoder_output[incomplete_inds]
            top_k_scores = top_k_scores[incomplete_inds]

        if len(complete_seqs) < beam_size:
            complete_seqs.extend(top_k_sequences.tolist())
            complete_seqs_scores.extend(top_k_scores)

        sorted_sequences = [
            sequence
            for _, sequence in sorted(
                zip(complete_seqs_scores, complete_seqs), reverse=True
            )
        ]
        return sorted_sequences, None, None
Beispiel #45
0
def mvae(mix, model, n_iter, device, proj_back=True, return_sigma=False):
    """Implementation of Multichannel Conditional VAE.
    It only works in the determined case (n_sources == n_channels).

    Args:
        mix (ndarray): (n_frequencies, n_channels, n_frames)
            STFT representation of the observed signal.
        model (cvae.CVAE): Trained Conditional VAE model.
        n_iter (int): Number of iterations.
        device (torch.device): Device used for computation.
        proj_back (bool): If use back-projection technique.
        return_sigma (bool): If also return estimated power spectrogram for
            each speaker.

    Returns:
        tuple[ndarray, ndarray]: Tuple of separated signal and
            separation matrix. The shapes of separated signal and separation
            matrix are (n_frequencies, n_sources, n_frames) and
            (n_frequencies, n_sources, n_channels), respectively.
    """
    if isinstance(mix, np.ndarray):
        if_use_cuda = False
        xp = np
    elif isinstance(mix, cp.ndarray):
        if_use_cuda = True
        xp = cp
    else:
        raise ValueError('A numpy.ndarray or cupy.ndarray instance should be '
                         'given as `mix` argument')

    n_freq, n_src, n_frame = mix.shape

    sep, sep_mat = ilrma(mix, n_iter=30, n_basis=2)
    sep_pow = xp.power(xp.abs(sep), 2)  # (n_freq, n_src, n_frame)
    c = torch.full((n_src, model.n_speakers), 1 / model.n_speakers,
                   device=device, requires_grad=True)
    log_g = torch.full((n_src, 1, 1), model.log_g.item(), device=device)

    with torch.no_grad():
        if if_use_cuda:
            sep_pow_tensor = to_tensor(sep_pow).transpose(0, 1)
        else:
            sep_pow_tensor =\
                torch.from_numpy(sep_pow).transpose(0, 1).to(device)
        sep_pow_tensor.clamp_(EPS)
        z, _ = model.encode(sep_pow_tensor, c)
        sigma_sq = (model.decode(z, c) + log_g).exp()
        sigma_sq.clamp_(min=EPS)
        sigma_reci = 1 / sigma_sq
        if if_use_cuda:
            sigma_reci = to_cupy(sigma_reci)
        else:
            sigma_reci = sigma_reci.numpy()

    z.requires_grad = True

    eye = xp.tile(xp.eye(n_src), (n_freq, 1, 1))

    for _ in range(n_iter):
        for src in range(n_src):
            h = sigma_reci[src, :, :, None] @ xp.ones((1, n_src))
            h = mix.conj() @ (mix.swapaxes(1, 2) * h)
            u_mat = h.swapaxes(1, 2) / n_frame
            h = sep_mat @ u_mat + EPS * eye
            sep_mat[:, src, :] = xp.linalg.solve(h, eye[:, :, src]).conj()
            h = sep_mat[:, src, None, :] @ u_mat
            h = (h @ sep_mat[:, src, :, None].conj()).squeeze(2)
            sep_mat[:, src, :] = (sep_mat[:, src, :] / xp.sqrt(h).conj())

        sep = sep_mat @ mix
        xp.power(xp.abs(sep), 2, out=sep_pow)
        xp.clip(sep_pow, a_min=EPS, a_max=None, out=sep_pow)

        optimizer = torch.optim.Adam((z, c), lr=1e-3)
        if if_use_cuda:
            sep_pow_tensor = to_tensor(sep_pow).transpose(0, 1)
        else:
            sep_pow_tensor = \
                torch.from_numpy(sep_pow).transpose(0, 1).to(device)
        for _ in range(50):
            log_sigma_sq = model.decode(z, torch.softmax(c, dim=1)) + log_g
            loss = torch.sum(
                log_sigma_sq + (sep_pow_tensor.log() - log_sigma_sq).exp())
            model.zero_grad()
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        with torch.no_grad():
            sigma_sq = (model.decode(z, torch.softmax(c, dim=1)) + log_g).exp()
            lbd = torch.sum(sep_pow_tensor / sigma_sq, dim=(1, 2))
            lbd = lbd / n_freq / n_frame / log_g.squeeze(2).squeeze(1).exp()
            log_g[:, 0, 0] += torch.log(lbd)
            sigma_sq *= lbd.unsqueeze(1).unsqueeze(2)
            if if_use_cuda:
                sep_mat *= to_cupy(lbd.unsqueeze(0).unsqueeze(2))
                sigma_reci = to_cupy(1 / sigma_sq)
            else:
                sep_mat *= lbd.unsqueeze(0).unsqueeze(2).numpy()
                sigma_reci = (1 / sigma_sq).numpy()

    # Back-projection technique
    if proj_back:
        z = projection_back(sep, mix[:, 0, :])
        sep *= xp.conj(z[:, :, None])

    if return_sigma:
        return sep, sep_mat, sigma_sq.cpu().numpy()
    else:
        return sep, sep_mat
    def forward(self, encoder_output, target_captions=None, decode_lengths=None):
        """
        Forward propagation.

        :param encoder_output: output features of the encoder
        :param target_captions: encoded target captions, shape: (batch_size, max_caption_length)
        :param decode_lengths: caption lengths, shape: (batch_size, 1)
        :return: scores for vocabulary, decode lengths, weights
        """

        batch_size = encoder_output.size(0)

        # Flatten image
        encoder_output = encoder_output.view(batch_size, -1, encoder_output.size(-1))

        if not self.training:
            decode_lengths = torch.full(
                (batch_size,),
                self.params["max_caption_len"],
                dtype=torch.int64,
                device=device,
            )

        # Initialize LSTM state
        states = self.init_hidden_states(encoder_output)

        # Tensors to hold word prediction scores and alphas
        scores = torch.zeros(
            (batch_size, max(decode_lengths), self.vocab_size), device=device
        )
        alphas = torch.zeros(
            batch_size, max(decode_lengths), encoder_output.size(1), device=device
        )

        # At the start, all 'previous words' are the <start> token
        prev_words = torch.full(
            (batch_size,), self.word_map[TOKEN_START], dtype=torch.int64, device=device
        )

        for t in range(max(decode_lengths)):
            if not self.training:
                # Find all sequences where an <end> token has been produced in the last timestep
                ind_end_token = (
                    torch.nonzero(prev_words == self.word_map[TOKEN_END])
                    .view(-1)
                    .tolist()
                )

                # Update the decode lengths accordingly
                decode_lengths[ind_end_token] = torch.min(
                    decode_lengths[ind_end_token],
                    torch.full_like(decode_lengths[ind_end_token], t, device=device),
                )

            # Check if all sequences are finished:
            indices_incomplete_sequences = torch.nonzero(decode_lengths > t).view(-1)
            if len(indices_incomplete_sequences) == 0:
                break

            prev_words_embedded = self.word_embedding(prev_words)
            scores_for_timestep, states, alphas_for_timestep = self.forward_step(
                encoder_output, prev_words_embedded, states
            )

            # Update the previously predicted words
            prev_words = self.update_previous_word(
                scores_for_timestep, target_captions, t
            )

            scores[indices_incomplete_sequences, t, :] = scores_for_timestep[
                indices_incomplete_sequences
            ]
            if alphas_for_timestep is not None:
                alphas[indices_incomplete_sequences, t, :] = alphas_for_timestep[
                    indices_incomplete_sequences
                ]

        return scores, decode_lengths, alphas
    for i, data in enumerate(dataloader):
        niter = epoch * len(dataloader) + i

        # Save just first batch of real data for displaying
        if i == 0:
            real_display = data.cpu()

        ############################
        # (1) Update D network: maximize log(D(x)) + log(1 - D(G(z)))
        ###########################

        # Train with real data
        netD.zero_grad()
        real = data.to(device)
        batch_size, seq_len = real.size(0), real.size(1)
        label = torch.full((batch_size, seq_len, 1), real_label, device=device)

        # real = real.type(torch.DoubleTensor)
        output = netD(real)
        # .type(torch.DoubleTensor)
        errD_real = criterion(output, label)
        errD_real.backward()
        D_x = output.mean().item()

        # Train with fake data
        noise = torch.randn(batch_size, seq_len, nz, device=device)
        if opt.delta_condition:
            # Sample a delta for each batch and concatenate to the noise for each timestep
            deltas = dataset.sample_deltas(batch_size).unsqueeze(2).repeat(
                1, seq_len, 1)
            noise = torch.cat((noise, deltas), dim=2)
Beispiel #48
0
 def generate_square_subsequent_mask(sz: int) -> Tensor:
     r"""Generate a square mask for the sequence. The masked positions are filled with float('-inf').
         Unmasked positions are filled with float(0.0).
     """
     return torch.triu(torch.full((sz, sz), float('-inf')), diagonal=1)
Beispiel #49
0
def main(noise_factor, data, gan_model):

    ############################
    result_dir = './gan_mnist/' + gan_model + data + str(noise_factor)
    BATCH_SIZE = 64
    WORKERS = 2
    NGPU = 1

    Z_dim = 100
    X_dim = 784
    Img_dim = 28

    LR = 0.0002
    N_EPOCHS = 200
    ###########################

    transform = transforms.Compose([transforms.ToTensor()])
    #                transforms.Normalize([0.5], [0.5])])

    dataset_class = getattr(torchvision.datasets, data)
    trainset = dataset_class(root='./data',
                             train=True,
                             download=True,
                             transform=transform)

    trainloader = torch.utils.data.DataLoader(trainset,
                                              batch_size=BATCH_SIZE,
                                              shuffle=True,
                                              num_workers=WORKERS)

    # Decide which device we want to run on
    device = torch.device("cuda:0" if (
        torch.cuda.is_available() and NGPU > 0) else "cpu")

    netG = Gen(NGPU).to(device)
    netD = Dis(NGPU).to(device)
    # Handle multi-gpu if desired
    if (device.type == 'cuda') and (NGPU > 1):
        netG = nn.DataParallel(netG, list(range(NGPU)))
        netD = nn.DataParallel(netD, list(range(NGPU)))

    print(netG)
    print(netD)
    print(device)

    criterion = nn.BCEWithLogitsLoss()
    sig = nn.Sigmoid()
    # Create batch of latent vectors that we will use to visualize
    # the result of the generator
    fixed_noise = torch.randn(64, Z_dim, device=device)
    # Establish convention for real and fake labels
    real_label = 1
    fake_label = 0

    # Setup Adam optimizers
    optimizerD = optim.Adam(netD.parameters(), lr=LR)
    optimizerG = optim.Adam(netG.parameters(), lr=LR)

    # Training Loop

    # Lists to keep track of progress
    G_losses = []
    D_losses = []

    # results save folder
    if not os.path.isdir(result_dir):
        os.mkdir(result_dir)

    print("Starting Training Loop...")
    # For each epoch
    for epoch in range(N_EPOCHS):
        # For each batch
        for i, data in enumerate(trainloader):
            ############################
            # (1) Update D network: maximize log(D(x)) + log(1 - D(G(z)))
            ###########################
            ## Train with all-real batch
            netD.zero_grad()
            # Format batch
            real = data[0].to(device)
            b_size = real.size(0)
            label = torch.full((b_size, ), real_label, device=device)
            # Forward pass real batch through D
            real = real.view(-1, X_dim)
            real = salt_and_pepper(real, device, p=noise_factor).to(device)
            output = netD(real).view(-1)
            # Calculate loss on all-real batch
            errD_real = criterion(output, label)
            # Calculate gradients for D in backward pass
            errD_real.backward()
            output = sig(output)
            D_x = output.mean().item()

            ## Train with all-fake batch
            # Generate batch of latent vectors
            noise = torch.randn(b_size, Z_dim, device=device)
            # Generate fake image batch with G
            fake = netG(noise)
            label.fill_(fake_label)
            # Classify all fake batch with D
            # Detach to avoid training G on these labels (&save time)
            output = netD(fake.detach()).view(-1)
            # Calculate D's loss on the all-fake batch
            errD_fake = criterion(output, label)
            # Calculate the gradients for this batch
            errD_fake.backward()
            output = sig(output)
            D_G_z1 = output.mean().item()
            # Add the gradients from the all-real and all-fake batches
            errD = errD_real + errD_fake
            # Update D
            optimizerD.step()

            ############################
            # (2) Update G network: maximize log(D(G(z)))
            ###########################
            netG.zero_grad()
            label.fill_(real_label)  # fake labels are real for generator cost
            # Since we just updated D, perform another forward pass of all-fake batch through D
            output = netD(fake).view(-1)
            # Calculate G's loss based on this output
            errG = criterion(output, label)
            # Calculate gradients for G
            errG.backward()
            output = sig(output)
            D_G_z2 = output.mean().item()
            # Update G
            optimizerG.step()

            # Output training stats
            if i % 1000 == 0:
                print(
                    f'[{epoch}/{N_EPOCHS}], {i}, {len(trainloader)}, Loss_D: {errD.item()}, '
                    f'Loss_G: {errG.item()}, D(x): {D_x}, D(G(z)): {D_G_z1}/{D_G_z2}'
                )

            # Save Losses for plotting later
            G_losses.append(errG.item())
            D_losses.append(errD.item())

            # Check how the generator is doing by saving G's output on fixed_noise
            if i == len(trainloader) % 10:
                # if epoch == N_EPOCHS-1 and i == len(trainloader)-1:
                with torch.no_grad():
                    fake = netG(fixed_noise).detach().cpu()
                    np.save(result_dir + '/' + str(epoch), fake.numpy())

    showloss(G_losses, D_losses, result_dir)
    #     imshow(torch.reshape(fake, (64, 1, Img_dim, Img_dim)), result_dir)
    # result for FID
    z_ = torch.randn(10000, Z_dim, device=device)  #10000
    z_fid = netG(z_).detach().cpu()
    np.save(result_dir + '/result4FID', z_fid.numpy())
Beispiel #50
0
def dcgan(dat, netG, netD, args):
    device = args.device
    if torch.cuda.is_available():
        netG.cuda()
        netD.cuda()
        criterion.cuda()
        criterion_mse.cuda()
    X_training = dat['X_train'].to(device)
    fixed_noise = torch.randn(args.num_gen_images,
                              args.nz,
                              1,
                              1,
                              device=device)
    optimizerD = optim.Adam(netD.parameters(),
                            lr=args.lrD,
                            betas=(args.beta1, 0.999))
    optimizerG = optim.Adam(netG.parameters(),
                            lr=args.lrG,
                            betas=(args.beta1, 0.999))

    for epoch in range(1, args.epochs + 1):
        for i in range(0, len(X_training), args.batchSize):
            netD.zero_grad()
            stop = min(args.batchSize, len(X_training[i:]))
            real_cpu = X_training[i:i + stop].to(device)

            batch_size = real_cpu.size(0)
            label = torch.full((batch_size, ), real_label, device=device)

            output = netD(real_cpu)
            errD_real = criterion(output, label)
            errD_real.backward()
            D_x = output.mean().item()

            # train with fake
            noise = torch.randn(batch_size, args.nz, 1, 1, device=device)
            fake = netG(noise)
            label.fill_(fake_label)
            output = netD(fake.detach())
            errD_fake = criterion(output, label)
            errD_fake.backward()
            D_G_z1 = output.mean().item()
            errD = errD_real + errD_fake
            optimizerD.step()

            # (2) Update G network: maximize log(D(G(z)))

            netG.zero_grad()
            label.fill_(real_label)
            output = netD(fake)
            errG = criterion(output, label)
            errG.backward()
            D_G_z2 = output.mean().item()
            optimizerG.step()

            ## log performance
            if i % args.log == 0:
                print(
                    'Epoch [%d/%d] .. Batch [%d/%d] .. Loss_D: %.4f .. Loss_G: %.4f .. D(x): %.4f .. D(G(z)): %.4f / %.4f'
                    % (epoch, args.epochs, i, len(X_training), errD.data,
                       errG.data, D_x, D_G_z1, D_G_z2))

        print('*' * 100)
        print('End of epoch {}'.format(epoch))
        print('*' * 100)

        if epoch % args.save_imgs_every == 0:
            fake = netG(fixed_noise).detach()
            vutils.save_image(fake,
                              '%s/dcgan_%s_fake_epoch_%03d.png' %
                              (args.results_folder, args.dataset, epoch),
                              normalize=True,
                              nrow=20)

        if epoch % args.save_ckpt_every == 0:
            torch.save(
                netG.state_dict(),
                os.path.join(
                    args.results_folder,
                    'netG_dcgan_%s_epoch_%s.pth' % (args.dataset, epoch)))
Beispiel #51
0
def beam_search(
        decoder: Decoder,
        size: int,
        bos_index: int, eos_index: int, pad_index: int,
        encoder_output: Tensor, encoder_hidden: Tensor,
        src_mask: Tensor, max_output_length: int, alpha: float,
        embed: Embeddings, n_best: int = 1) -> (np.array, np.array):
    """
    Beam search with size k.
    Inspired by OpenNMT-py, adapted for Transformer.

    In each decoding step, find the k most likely partial hypotheses.

    :param decoder:
    :param size: size of the beam
    :param bos_index:
    :param eos_index:
    :param pad_index:
    :param encoder_output:
    :param encoder_hidden:
    :param src_mask:
    :param max_output_length:
    :param alpha: `alpha` factor for length penalty
    :param embed:
    :param n_best: return this many hypotheses, <= beam (currently only 1)
    :return:
        - stacked_output: output hypotheses (2d array of indices),
        - stacked_attention_scores: attention scores (3d array)
    """
    assert size > 0, 'Beam size must be >0.'
    assert n_best <= size, 'Can only return {} best hypotheses.'.format(size)

    # init
    transformer = isinstance(decoder, TransformerDecoder)
    batch_size = src_mask.size(0)
    att_vectors = None  # not used for Transformer

    # Recurrent models only: initialize RNN hidden state
    # pylint: disable=protected-access
    if not transformer:
        hidden = decoder._init_hidden(encoder_hidden)
    else:
        hidden = None

    # tile encoder states and decoder initial states beam_size times
    if hidden is not None:
        hidden = tile(hidden, size, dim=1)  # layers x batch*k x dec_hidden_size

    encoder_output = tile(encoder_output.contiguous(), size,
                          dim=0)  # batch*k x src_len x enc_hidden_size
    src_mask = tile(src_mask, size, dim=0)  # batch*k x 1 x src_len

    # Transformer only: create target mask
    if transformer:
        trg_mask = src_mask.new_ones([1, 1, 1])  # transformer only
    else:
        trg_mask = None

    # numbering elements in the batch
    batch_offset = torch.arange(
        batch_size, dtype=torch.long, device=encoder_output.device)

    # numbering elements in the extended batch, i.e. beam size copies of each
    # batch element
    beam_offset = torch.arange(
        0,
        batch_size * size,
        step=size,
        dtype=torch.long,
        device=encoder_output.device)

    # keeps track of the top beam size hypotheses to expand for each element
    # in the batch to be further decoded (that are still "alive")
    alive_seq = torch.full(
        [batch_size * size, 1],
        bos_index,
        dtype=torch.long,
        device=encoder_output.device)

    # Give full probability to the first beam on the first step.
    topk_log_probs = torch.zeros(batch_size, size, device=encoder_output.device)
    topk_log_probs[:, 1:] = float("-inf")

    # Structure that holds finished hypotheses.
    hypotheses = [[] for _ in range(batch_size)]

    results = {
        "predictions": [[] for _ in range(batch_size)],
        "scores": [[] for _ in range(batch_size)],
        "gold_score": [0] * batch_size,
    }

    for step in range(max_output_length):

        # This decides which part of the predicted sentence we feed to the
        # decoder to make the next prediction.
        # For Transformer, we feed the complete predicted sentence so far.
        # For Recurrent models, only feed the previous target word prediction
        if transformer:  # Transformer
            decoder_input = alive_seq  # complete prediction so far
        else:  # Recurrent
            decoder_input = alive_seq[:, -1].view(-1, 1)  # only the last word

        # expand current hypotheses
        # decode one single step
        # logits: logits for final softmax
        # pylint: disable=unused-variable
        trg_embed = embed(decoder_input)
        logits, hidden, att_scores, att_vectors = decoder(
            encoder_output=encoder_output,
            encoder_hidden=encoder_hidden,
            src_mask=src_mask,
            trg_embed=trg_embed,
            hidden=hidden,
            prev_att_vector=att_vectors,
            unroll_steps=1,
            trg_mask=trg_mask  # subsequent mask for Transformer only
        )

        # For the Transformer we made predictions for all time steps up to
        # this point, so we only want to know about the last time step.
        if transformer:
            logits = logits[:, -1]  # keep only the last time step
            hidden = None           # we don't need to keep it for transformer

        # batch*k x trg_vocab
        log_probs = F.log_softmax(logits, dim=-1).squeeze(1)

        # multiply probs by the beam probability (=add logprobs)
        log_probs += topk_log_probs.view(-1).unsqueeze(1)
        curr_scores = log_probs.clone()

        # compute length penalty
        if alpha > -1:
            length_penalty = ((5.0 + (step + 1)) / 6.0) ** alpha
            curr_scores /= length_penalty

        # flatten log_probs into a list of possibilities
        curr_scores = curr_scores.reshape(-1, size * decoder.output_size)

        # pick currently best top k hypotheses (flattened order)
        topk_scores, topk_ids = curr_scores.topk(size, dim=-1)

        if alpha > -1:
            # recover original log probs
            topk_log_probs = topk_scores * length_penalty
        else:
            topk_log_probs = topk_scores.clone()

        # reconstruct beam origin and true word ids from flattened order
        topk_beam_index = topk_ids.div(decoder.output_size)
        topk_ids = topk_ids.fmod(decoder.output_size)

        # map beam_index to batch_index in the flat representation
        batch_index = (
            topk_beam_index
            + beam_offset[:topk_beam_index.size(0)].unsqueeze(1))
        select_indices = batch_index.view(-1)

        # append latest prediction
        alive_seq = torch.cat(
            [alive_seq.index_select(0, select_indices),
             topk_ids.view(-1, 1)], -1)  # batch_size*k x hyp_len

        is_finished = topk_ids.eq(eos_index)
        if step + 1 == max_output_length:
            is_finished.fill_(True)
        # end condition is whether the top beam is finished
        end_condition = is_finished[:, 0].eq(True)

        # save finished hypotheses
        if is_finished.any():
            predictions = alive_seq.view(-1, size, alive_seq.size(-1))
            for i in range(is_finished.size(0)):
                b = batch_offset[i]
                if end_condition[i]:
                    is_finished[i].fill_(1)
                finished_hyp = is_finished[i].nonzero().view(-1)
                # store finished hypotheses for this batch
                for j in finished_hyp:
                    # Check if the prediction has more than one EOS.
                    # If it has more than one EOS, it means that the
                    # prediction should have already been added to
                    # the hypotheses, so you don't have to add them again.
                    if (predictions[i, j, 1:] == eos_index).nonzero().numel() \
                            < 2:
                        # ignore start_token
                        hypotheses[b].append(
                            (topk_scores[i, j], predictions[i, j, 1:])
                        )
                # if the batch reached the end, save the n_best hypotheses
                if end_condition[i]:
                    best_hyp = sorted(
                        hypotheses[b], key=lambda x: x[0], reverse=True)
                    for n, (score, pred) in enumerate(best_hyp):
                        if n >= n_best:
                            break
                        results["scores"][b].append(score)
                        results["predictions"][b].append(pred)
            non_finished = end_condition.eq(False).nonzero().view(-1)
            # if all sentences are translated, no need to go further
            # pylint: disable=len-as-condition
            if len(non_finished) == 0:
                break
            # remove finished batches for the next step
            topk_log_probs = topk_log_probs.index_select(0, non_finished)
            batch_index = batch_index.index_select(0, non_finished)
            batch_offset = batch_offset.index_select(0, non_finished)
            alive_seq = predictions.index_select(0, non_finished) \
                .view(-1, alive_seq.size(-1))

        # reorder indices, outputs and masks
        select_indices = batch_index.view(-1)
        encoder_output = encoder_output.index_select(0, select_indices)
        src_mask = src_mask.index_select(0, select_indices)

        if hidden is not None and not transformer:
            if isinstance(hidden, tuple):
                # for LSTMs, states are tuples of tensors
                h, c = hidden
                h = h.index_select(1, select_indices)
                c = c.index_select(1, select_indices)
                hidden = (h, c)
            else:
                # for GRUs, states are single tensors
                hidden = hidden.index_select(1, select_indices)

        if att_vectors is not None:
            att_vectors = att_vectors.index_select(0, select_indices)

    def pad_and_stack_hyps(hyps, pad_value):
        filled = np.ones((len(hyps), max([h.shape[0] for h in hyps])),
                         dtype=int) * pad_value
        for j, h in enumerate(hyps):
            for k, i in enumerate(h):
                filled[j, k] = i
        return filled

    # from results to stacked outputs
    assert n_best == 1
    # only works for n_best=1 for now
    final_outputs = pad_and_stack_hyps([r[0].cpu().numpy() for r in
                                        results["predictions"]],
                                       pad_value=pad_index)

    return final_outputs, None
Beispiel #52
0
def presgan(dat, netG, netD, log_sigma, args):
    writer = SummaryWriter(log_dir='tensorboard' + args.dataset)
    device = args.device
    if torch.cuda.is_available():
        print("cuda")
        netG.cuda()
        netD.cuda()
        criterion.cuda()
        criterion_mse.cuda()
    X_training = dat['X_train'].to(device)  # [60000, 1, 64, 64]
    fixed_noise = torch.randn(args.num_gen_images,
                              args.nz,
                              1,
                              1,
                              device=device)
    torch.manual_seed(123)
    # NEW
    Y_training = dat['Y_train'].to(device)
    # NUM_CLASS = 10
    NUM_CLASS = args.n_classes

    optimizerD = optim.Adam(netD.parameters(),
                            lr=args.lrD,
                            betas=(args.beta1, 0.999))
    optimizerG = optim.Adam(netG.parameters(),
                            lr=args.lrG,
                            betas=(args.beta1, 0.999))
    sigma_optimizer = optim.Adam([log_sigma],
                                 lr=args.sigma_lr,
                                 betas=(args.beta1, 0.999))
    if args.restrict_sigma:
        logsigma_min = math.log(math.exp(args.sigma_min) - 1.0)
        logsigma_max = math.log(math.exp(args.sigma_max) - 1.0)
    #stepsize = args.stepsize_num / args.nz

    #bsz = args.batchSize

    #print(X_training.shape)

    #print(X_training.shape)
    #print(X_training.shape)

    #asdfasdfcscv

    stepsize = args.stepsize_num / args.nz

    Y_forY_training = dat['Y_train'].to(device)

    bsz = args.batchSize

    for epoch in range(1, args.epochs + 1):
        for i in range(0, len(X_training), bsz):
            # sigma_x = F.softplus(log_sigma).view(1, 1, args.imageSize, args.imageSize)

            # sigma_x = F.softplus(log_sigma).view(1, 1, args.imageSize, args.imageSize)
            # sigma_x = F.softplus(log_sigma).view(1, 1, args.imageSize, args.imageSize)

            stop = min(bsz, len(X_training[i:]))
            real_cpu = X_training[i:i + stop].to(device)
            y_real_cpu = Y_forY_training[i:i + stop].to(device)

            for sadgfjasj in range(len(y_real_cpu)):
                if (sadgfjasj > 0) and (y_real_cpu[sadgfjasj] == 2):
                    y_real_cpu[sadgfjasj] = y_real_cpu[sadgfjasj - 1]
                    real_cpu[sadgfjasj, :] = real_cpu[sadgfjasj - 1, :]
                elif (sadgfjasj == 0) and (y_real_cpu[sadgfjasj] == 2):
                    y_real_cpu[sadgfjasj] = y_real_cpu[sadgfjasj + 1]
                    real_cpu[sadgfjasj, :] = real_cpu[sadgfjasj + 1, :]

            X_training[i:i + stop] = real_cpu
            Y_forY_training[i:i + stop] = y_real_cpu

            #sigma_x = F.softplus(log_sigma).view(1, 1, args.imageSize, args.imageSize)

            #sigma_x = F.softplus(log_sigma).view(1, 1, args.imageSize, args.imageSize)
            #sigma_x = F.softplus(log_sigma).view(1, 1, args.imageSize, args.imageSize)

            #sigma_x = F.softplus(log_sigma).view(1, 1, args.imageSize, args.imageSize)
            sigma_x = F.softplus(log_sigma).view(1, 1, args.imageSize,
                                                 args.imageSize)

            netD.zero_grad()
            stop = min(bsz, len(X_training[i:]))
            real_cpu = X_training[i:i + stop].to(device)
            '''
            for epoch in range(1, args.epochs+1):
                for i in range(0, len(X_training), bsz): # bsz = 64
        
                    sigma_x = F.softplus(log_sigma).view(1, 1, args.imageSize, args.imageSize)
        
                    netD.zero_grad()
                    stop = min(bsz, len(X_training[i:]))
                    real_cpu = X_training[i:i+stop].to(device) # [64, 1, 64, 64]
            '''

            batch_size = real_cpu.size(0)
            labelv = torch.full((batch_size, ), real_label).to(device)

            # train discriminator on real (noised) data and real labels
            y_labels = Y_training[i:i + stop].to(device)
            y_one_hot = torch.FloatTensor(batch_size, NUM_CLASS).to(
                device)  # adding cuda here
            # print(batch_size, bsz, y_labels.size())
            y_one_hot = y_one_hot.zero_().scatter_(
                1, y_labels.view(batch_size, 1), 1).to(device)

            noise_eta = torch.randn_like(real_cpu).to(device)
            noised_data = real_cpu + sigma_x.detach() * noise_eta
            out_real = netD(noised_data, y_one_hot)  #, y_one_hot_labels
            errD_real = criterion(out_real, labelv)
            errD_real.backward()
            D_x = out_real.mean().item()

            # make generator output image from random labels; make discriminator classify
            rand_y_one_hot = torch.FloatTensor(
                batch_size, NUM_CLASS).zero_().to(device)  # adding cuda here
            rand_y_one_hot.scatter_(
                1,
                torch.randint(0,
                              NUM_CLASS,
                              size=(batch_size, 1),
                              device=device), 1
            )  # #rand_y_one_hot.scatter_(1, torch.from_numpy(np.random.randint(0, 10, size=(bsz,1))), 1)

            noise = torch.randn(batch_size, args.nz, 1, 1, device=device)
            mu_fake = netG(noise, rand_y_one_hot)
            fake = mu_fake + sigma_x * noise_eta
            labelv = labelv.fill_(fake_label).to(device)
            out_fake = netD(fake.detach(), rand_y_one_hot)
            errD_fake = criterion(out_fake, labelv)
            errD_fake.backward()
            D_G_z1 = out_fake.mean().item()
            errD = errD_real + errD_fake
            optimizerD.step()

            # update G network: maximize log(D(G(z)))

            netG.zero_grad()
            sigma_optimizer.zero_grad()

            rand_y_one_hot = torch.FloatTensor(batch_size,
                                               NUM_CLASS).zero_().to(device)
            rand_y_one_hot = rand_y_one_hot.scatter_(
                1,
                torch.randint(0,
                              NUM_CLASS,
                              size=(batch_size, 1),
                              device=device), 1).to(device)
            labelv = labelv.fill_(real_label).to(device)
            gen_input = torch.randn(batch_size, args.nz, 1, 1, device=device)
            out = netG(gen_input, rand_y_one_hot)  # add rand y labels
            noise_eta = torch.randn_like(out)
            g_fake_data = out + noise_eta * sigma_x

            dg_fake_decision = netD(g_fake_data,
                                    rand_y_one_hot)  # add rand y labels
            g_error_gan = criterion(dg_fake_decision, labelv)
            D_G_z2 = dg_fake_decision.mean().item()

            #             # TO TEST WITHOUT ENTROPY, SET:
            #             if epoch < 10 and args.lambda_ != 0 and args.dataset != 'mnist':
            #                 args.lambda_ = 0
            #             elif epoch < 20 and args.lambda_ != 0 and args.dataset != 'mnist':
            #                 args.lambda_ = 0.0001
            #             elif args.lambda_ != 0 and args.dataset != 'mnist':
            #                 args.lambda_ = 0.0002

            if args.lambda_ == 0:
                g_error_gan.backward()
                optimizerG.step()
                sigma_optimizer.step()

            else:
                # added y_tilde param (rand_y_one_hot)
                hmc_samples, hmc_labels, acceptRate, stepsize = hmc.get_samples(
                    netG, g_fake_data.detach(), rand_y_one_hot.detach(),
                    gen_input.clone(), sigma_x.detach(), args.burn_in,
                    args.num_samples_posterior, args.leapfrog_steps, stepsize,
                    args.flag_adapt, args.hmc_learning_rate,
                    args.hmc_opt_accept)

                bsz, d = hmc_samples.size()
                hmc_samples = hmc_samples.view(bsz, d, 1, 1).to(device)
                hmc_labels = hmc_labels.to(device)
                mean_output = netG(hmc_samples, hmc_labels)
                bsz = g_fake_data.size(0)

                mean_output_summed = torch.zeros_like(g_fake_data).to(device)
                for cnt in range(args.num_samples_posterior):
                    mean_output_summed = mean_output_summed + mean_output[
                        cnt * bsz:(cnt + 1) * bsz]
                mean_output_summed = mean_output_summed / args.num_samples_posterior

                c = ((g_fake_data - mean_output_summed) / sigma_x**2).detach()
                g_error_entropy = torch.mul(c, out +
                                            sigma_x * noise_eta).mean(0).sum()

                g_error = g_error_gan - args.lambda_ * g_error_entropy
                g_error.backward()
                optimizerG.step()
                sigma_optimizer.step()

            if args.restrict_sigma:
                log_sigma.data.clamp_(min=logsigma_min, max=logsigma_max)

            ## log performance
            if i % args.log == 0:
                print(
                    'Epoch [%d/%d] .. Batch [%d/%d] .. Loss_D: %.4f .. Loss_G: %.4f .. D(x): %.4f .. D(G(z)): %.4f / %.4f'
                    % (epoch, args.epochs, i, len(X_training), errD.data,
                       g_error_gan.data, D_x, D_G_z1, D_G_z2))
                with open('%s/log.csv' % args.results_folder, 'a') as f:
                    r = csv.writer(f)
                    # Loss_G, Loss_D, D(x), D(G(z))
                    r.writerow([g_error_gan.data, errD.data, D_x, D_G_z2])

            if i % (2 * args.log) == 0:
                t_iter = (epoch * len(X_training) + i) / bsz
                writer.add_scalar('Loss_G', g_error_gan.data, t_iter)
                writer.add_scalar('Loss_D', errD.data, t_iter)
                writer.add_scalar('D(x)', D_x, t_iter)
                writer.add_scalar('D(G(z))', D_G_z2, t_iter)

        print('*' * 100)
        print('End of epoch {}'.format(epoch))
        print('sigma min: {} .. sigma max: {}'.format(torch.min(sigma_x),
                                                      torch.max(sigma_x)))
        print('*' * 100)
        if args.lambda_ > 0:
            print(
                '| MCMC diagnostics ====> | stepsize: {} | min ar: {} | mean ar: {} | max ar: {} |'
                .format(stepsize,
                        acceptRate.min().item(),
                        acceptRate.mean().item(),
                        acceptRate.max().item()))

        if epoch % args.save_imgs_every == 0:
            rand_y_one_hot = torch.FloatTensor(args.num_gen_images,
                                               NUM_CLASS).zero_().to(
                                                   device)  # adding cuda here
            rand_y_one_hot = rand_y_one_hot.scatter_(
                1,
                torch.randint(0,
                              NUM_CLASS,
                              size=(args.num_gen_images, 1),
                              device=device), 1
            ).to(
                device
            )  # #rand_y_one_hot.scatter_(1, torch.from_numpy(np.random.randint(0, 10, size=(bsz,1))), 1)
            fake = netG(fixed_noise, rand_y_one_hot).detach()

            vutils.save_image(fake,
                              '%s/presgan_%s_fake_epoch_%03d.png' %
                              (args.results_folder, args.dataset, epoch),
                              normalize=True,
                              nrow=20)

        if epoch % args.save_ckpt_every == 0:
            torch.save(
                netG.state_dict(),
                os.path.join(
                    args.results_folder,
                    'netG_presgan_%s_epoch_%s.pth' % (args.dataset, epoch)))
            torch.save(
                log_sigma,
                os.path.join(args.results_folder,
                             'log_sigma_%s_%s.pth' % (args.dataset, epoch)))
            torch.save(
                netD.state_dict(),
                os.path.join(
                    args.results_folder,
                    'netD_presgan_%s_epoch_%s.pth' % (args.dataset, epoch)))
Beispiel #53
0
    def learn(self, batch, max_episode_len, train_step):
        """
        在learn的时候,抽取到的数据是四维的,四个维度分别为
        1——第几个episode
        2——episode中第几个transition
        3——第几个agent的数据
        4——具体obs维度。
        因为在选动作时不仅需要输入当前的inputs,还要给神经网络输入hidden_state,
        hidden_state和之前的经验相关,因此就不能随机抽取经验进行学习。所以这里一次抽取多个episode,
        然后一次给神经网络传入每个episode的同一个位置的transition
        :param batch:
        :param max_episode_len:
        :param train_step:
        :param epsilon:
        :return:
        """
        # 获得episode的数目
        episode_num = batch['o'].shape[0]
        # 初始化隐藏状态
        self.init_hidden(episode_num)
        # 数据转为tensor
        # for key in batch.keys():
        #     if key == 'a':
        #         batch[key] = torch.LongTensor(batch[key])
        #     else:
        #         batch[key] = torch.Tensor(batch[key])
        for key in batch.keys():
            if key == 'a':
                batch[key] = torch.as_tensor(batch[key],
                                             dtype=torch.long,
                                             device=self.args.device)
            else:
                batch[key] = torch.as_tensor(batch[key],
                                             dtype=torch.float,
                                             device=self.args.device)

        s, next_s, a, r, avail_a, next_avail_a, done = batch['s'], batch['next_s'], batch['a'], \
                                                       batch['r'], batch['avail_a'], batch['next_avail_a'], \
                                                       batch['done']
        # 避免填充的产生 TD-error 影响训练
        mask = 1 - batch["padded"].float()
        # 获取当前与下个状态的q值,(episode, max_episode_len, n_agents, n_actions)
        eval_qs, target_qs = self.get_q(batch, episode_num, max_episode_len)
        # 是否使用GPU
        # if self.args.cuda:
        #     a = a.cuda()
        #     r = r.cuda()
        #     done = done.cuda()
        #     mask = mask.cuda()
        #     # if 'qmix' in self.args.alg:
        #     s = s.cuda()
        #     next_s = next_s.cuda()
        # 得到每个动作对应的 q 值
        eval_qsa = torch.gather(eval_qs, dim=3, index=a).squeeze(3)
        # 计算Q_tot
        if self.args.alg == 'qatten':
            eval_q_total, q_attend_regs, head_entropies = self.eval_mix_net(
                eval_qsa, s, a)
        else:
            eval_q_total = self.eval_mix_net(eval_qsa, s)
        qstar_q_total, qstar_loss, q_attend_regs = None, None, None
        # 需要先把不行动作的mask掉
        target_qs[next_avail_a == 0.0] = -9999999
        target_qsa = target_qs.max(dim=3)[0]
        if self.wqmix > 0:
            # TODO 找到使得Q_tot最大的联合动作,由于qmix是单调假设的,每个agent q值最大则 Q_tot最大,因此联合动作就是每个agent q值最大的动作
            argmax_u = target_qs.argmax(dim=3).unsqueeze(3)
            qstar_eval_qs, qstar_target_qs = self.get_q(
                batch, episode_num, max_episode_len, True)
            # 获得对应的动作q值
            qstar_eval_qs = torch.gather(qstar_eval_qs, dim=3,
                                         index=a).squeeze(3)
            qstar_target_qs = torch.gather(qstar_target_qs,
                                           dim=3,
                                           index=argmax_u).squeeze(3)
            # 通过前馈网络得到qstar
            qstar_q_total = self.qstar_eval_mix(qstar_eval_qs, s)
            next_q_total = self.qstar_target_mix(qstar_target_qs, next_s)
        elif self.args.alg == 'qatten':
            # chosen_action_qvals, q_attend_regs, head_entropies = self.mixer(chosen_action_qvals, batch["state"][:, :-1],
            #                                                                 actions)
            target_next_actions = target_qs.max(
                dim=3)[1].unsqueeze(-1).detach()
            next_q_total, q_attend_regs, _ = self.target_mix_net(
                target_qsa, next_s, target_next_actions)
        else:
            # 得到 target q,是inf出现的nan
            # target_qs[next_avail_a == 0.0] = float('-inf')
            # target_qs = target_qs.max(dim=3)[0]
            # 计算target Q_tot
            next_q_total = self.target_mix_net(target_qsa, next_s)

        target_q_total = r + self.args.gamma * next_q_total * (1 - done)
        # weights = torch.Tensor(np.ones(eval_q_total.shape))
        weights = torch.as_tensor(np.ones(eval_q_total.shape),
                                  dtype=torch.float,
                                  device=self.args.device)
        if self.wqmix > 0:
            # 1- 可以保证weights在 (0, 1]
            # TODO: 这里只说是 (0, 1] 之间,文中有介绍具体的参数设置
            # weights = torch.Tensor(1 - np.random.ranf(eval_q_total.shape))
            weights = torch.full(eval_q_total.shape,
                                 self.alpha,
                                 device=self.args.device)
            if self.args.alg == 'cwqmix':
                error = mask * (target_q_total - qstar_q_total)
            elif self.args.alg == 'owqmix':
                error = mask * (target_q_total - eval_q_total)
            else:
                raise Exception("模型不存在")
            weights[error > 0] = 1.
            # qstar 参数更新
            qstar_error = mask * (qstar_q_total - target_q_total.detach())

            qstar_loss = (qstar_error**2).sum() / mask.sum()
            # self.qstar_optimizer.zero_grad()
            # qstar_loss.backward()
            # torch.nn.utils.clip_grad_norm_(self.qstar_params, self.args.clip_norm)
            # self.qstar_optimizer.step()

        # 计算 TD error
        # TODO 这里权值detach有影响吗
        td_error = mask * (eval_q_total - target_q_total.detach())
        # if self.args.cuda:
        #     weights = weights.cuda()

        loss = (weights.detach() * td_error**2).sum() / mask.sum()
        if self.args.alg == 'qatten':
            loss += q_attend_regs
        elif self.wqmix > 0:
            loss += qstar_loss

        self.optimizer.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(self.eval_params, self.args.clip_norm)
        self.optimizer.step()

        if train_step > 0 and train_step % self.args.target_update_period == 0:
            self.target_rnn.load_state_dict(self.eval_rnn.state_dict())
            self.target_mix_net.load_state_dict(self.eval_mix_net.state_dict())
            if self.wqmix > 0:
                self.qstar_target_rnn.load_state_dict(
                    self.qstar_eval_rnn.state_dict())
                self.qstar_target_mix.load_state_dict(
                    self.qstar_eval_mix.state_dict())
Beispiel #54
0
def full(shape, fill_value, dtype, ctx):
    return th.full(shape, fill_value, dtype=dtype, device=ctx)
Beispiel #55
0
    def aug_test_vote(self, imgs, img_metas, rescale=False):
        # recompute feats to save memory
        feats = self.extract_feats(imgs)

        aug_bboxes = []
        aug_labels = []
        for i, (x, img_meta) in enumerate(zip(feats, img_metas)):
            # only one image in the batch
            # TODO more flexible
            outs = self.bbox_head(x)
            bbox_inputs = outs + (img_meta, self.test_cfg, False, True)
            det_bboxes, det_labels = self.bbox_head.get_bboxes(*bbox_inputs)[0]
            keeped = self.remove_boxes(det_bboxes,
                                       self.test_cfg.scale_ranges[i // 2][0],
                                       self.test_cfg.scale_ranges[i // 2][1])
            det_bboxes, det_labels = det_bboxes[keeped, :], det_labels[keeped]
            aug_bboxes.append(det_bboxes)
            aug_labels.append(det_labels)

        # after merging, bboxes will be rescaled to the original image size
        merged_bboxes, merged_labels = self.merge_aug_vote_results(
            aug_bboxes, aug_labels, img_metas)

        det_bboxes = []
        det_labels = []
        for j in range(80):
            inds = (merged_labels == j).nonzero().squeeze(1)

            scores_j = merged_bboxes[inds, 4]
            bboxes_j = merged_bboxes[inds, :4].view(-1, 4)
            bboxes_j, scores_j = self.bboxes_vote(bboxes_j, scores_j)

            if len(bboxes_j) > 0:
                det_bboxes.append(
                    torch.cat([bboxes_j, scores_j[:, None]], dim=1))
                det_labels.append(
                    torch.full((bboxes_j.shape[0], ),
                               j,
                               dtype=torch.int64,
                               device=scores_j.device))

        if len(det_bboxes) > 0:
            det_bboxes = torch.cat(det_bboxes, dim=0)
            det_labels = torch.cat(det_labels)
        else:
            det_bboxes = merged_bboxes.new_zeros((0, 5))
            det_labels = merged_bboxes.new_zeros((0, ), dtype=torch.long)

        if det_bboxes.shape[0] > 1000 > 0:
            cls_scores = det_bboxes[:, 4]
            image_thresh, _ = torch.kthvalue(cls_scores.cpu(),
                                             det_bboxes.shape[0] - 1000 + 1)
            keep = cls_scores >= image_thresh.item()
            keep = torch.nonzero(keep, as_tuple=False).squeeze(1)
            det_bboxes = det_bboxes[keep]
            det_labels = det_labels[keep]

        if rescale:
            _det_bboxes = det_bboxes
        else:
            _det_bboxes = det_bboxes.clone()
            _det_bboxes[:, :4] *= img_metas[0][0]['scale_factor']
        bbox_results = bbox2result(_det_bboxes, det_labels,
                                   self.bbox_head.num_classes)
        return bbox_results
Beispiel #56
0
def train(model, model2, dset_loaders, criterion, BCEcriterion, epoch, phase,
          optimizer, optimizer_Global, args, logger, use_gpu):

    model.train()
    logger.info('-' * 10)
    logger.info('Epoch {}/{}'.format(epoch, args.epochs - 1))
    logger.info('Current Learning rate: {}'.format(showLR(optimizer)))

    running_loss, running_corrects, global_loss, running_all = 0., 0., 0., 0.
    since = time.time()
    last_time_batch_idx = -1
    for batch_idx, (inputs, targets) in enumerate(dset_loaders[phase]):

        label_real = torch.full((inputs.size(0), ), 1)
        label_fake = torch.full((inputs.size(0), ), 0)

        if use_gpu:
            inputs = inputs.cuda()
            targets = targets.cuda()

            target_mi = make_one_hot_global(
                targets, 1000
            )  #They would be concatenated with final representations(Global)
            label_fake = label_fake.cuda()
            label_real = label_real.cuda()

        outputs = model(inputs)

        _, preds = torch.max(outputs.data, 1)

        optimizer.zero_grad()
        optimizer_Global.zero_grad()
        loss = criterion(outputs, targets)
        loss.backward()

        # Paired samples(Global)
        info_real_output_global = model2(target_mi, outputs)
        loss_real_global = BCEcriterion(info_real_output_global.squeeze(),
                                        label_real)
        loss_real_global.backward(retain_graph=True)

        # Unpaired samples(Global)
        info_fake_output_global = model2(
            target_mi, torch.cat((outputs[2:, ...], outputs[0:2, ...]), dim=0))
        loss_fake_global = BCEcriterion(info_fake_output_global.squeeze(),
                                        label_fake)
        loss_fake_global.backward()

        optimizer.step()
        optimizer_Global.step()

        # stastics
        running_loss += loss.item() * inputs.size(0)
        batch_correct = (preds == targets.data).sum().item()
        running_corrects += batch_correct
        running_all += len(inputs)

        error_info_global = loss_real_global.item() + loss_fake_global.item()
        global_loss += error_info_global * inputs.size(0)

        if batch_idx % args.interval == 0 or (batch_idx
                                              == len(dset_loaders[phase]) - 1):
            print(
                'Process: [{:5.0f}/{:5.0f} ({:.0f}%)]\tLoss batch: {:.4f}\tLoss total: {:.4f}\tAcc batch:{:.4f}\tAcc total:{:.4f}\tEstimated time:{:5.0f}s\r'
                .format(running_all, len(dset_loaders[phase].dataset),
                        100. * batch_idx / (len(dset_loaders[phase]) - 1),
                        float(loss),
                        float(running_loss) / running_all,
                        float(batch_correct) / len(inputs),
                        float(running_corrects) / running_all,
                        (time.time() - since) /
                        (batch_idx - last_time_batch_idx) *
                        (len(dset_loaders[phase]) - batch_idx - 1))),

            last_time_batch_idx = batch_idx
            since = time.time()

    loss_epoch = float(running_loss) / len(dset_loaders[phase].dataset)
    acc_epoch = float(running_corrects) / len(dset_loaders[phase].dataset)
    global_loss_epoch = float(global_loss) / len(dset_loaders[phase].dataset)

    logger.info(
        '{} Epoch:\t{:2}\tLoss: {:.4f}\tAcc:{:.4f}\tglobal:{:.4f}\n'.format(
            phase, epoch, loss_epoch, acc_epoch, global_loss_epoch))
def test_draw_boxes_colors(colors):
    img = torch.full((3, 100, 100), 0, dtype=torch.uint8)
    utils.draw_bounding_boxes(img, boxes, fill=False, width=7, colors=colors)
 def evaluate(self, input: RLEstimatorInput, **kwargs) -> EstimatorResults:
     assert input.value_function is not None
     logging.info(f"{self}: start evaluating")
     stime = time.process_time()
     results = EstimatorResults()
     num_resamples = kwargs["num_resamples"] if "num_resamples" in kwargs else 200
     loss_threhold = (
         kwargs["loss_threhold"] if "loss_threhold" in kwargs else 0.00001
     )
     lr = kwargs["lr"] if "lr" in kwargs else 0.0001
     logging.info(
         f"  params: num_resamples[{num_resamples}], "
         f"loss_threshold[{loss_threhold}], "
         f"lr[{lr}]"
     )
     for state, mdps in input.log.items():
         n = len(mdps)
         horizon = len(reduce(lambda a, b: a if len(a) > len(b) else b, mdps))
         ws = self._calc_weights(n, horizon, zip_longest(*mdps), input.target_policy)
         last_ws = torch.zeros((n, horizon), device=self._device)
         last_ws[:, 0] = 1.0 / n
         last_ws[:, 1:] = ws[:, :-1]
         discount = torch.full((horizon,), input.gamma, device=self._device)
         discount[0] = 1.0
         discount = discount.cumprod(0)
         rs = torch.zeros((n, horizon))
         vs = torch.zeros((n, horizon))
         qs = torch.zeros((n, horizon))
         for ts, j in zip(zip_longest(*mdps), count()):
             for t, i in zip(ts, count()):
                 if t is not None and t.action is not None:
                     qs[i, j] = input.value_function(t.last_state, t.action)
                     vs[i, j] = input.value_function(t.last_state)
                     rs[i, j] = t.reward
         vs = vs.to(device=self._device)
         qs = qs.to(device=self._device)
         rs = rs.to(device=self._device)
         wdrs = ((ws * (rs - qs) + last_ws * vs) * discount).cumsum(1)
         wdr = wdrs[:, -1].sum(0)
         next_vs = torch.zeros((n, horizon), device=self._device)
         next_vs[:, :-1] = vs[:, 1:]
         gs = wdrs + ws * next_vs * discount
         gs_normal = gs.sub(torch.mean(gs, 0))
         assert n > 1
         omiga = (n / (n - 1.0)) * torch.einsum("ij,ik->jk", gs_normal, gs_normal)
         resample_wdrs = torch.zeros((num_resamples,))
         for i in range(num_resamples):
             samples = random.choices(range(n), k=n)
             sws = ws[samples, :]
             last_sws = last_ws[samples, :]
             srs = rs[samples, :]
             svs = vs[samples, :]
             sqs = qs[samples, :]
             resample_wdrs[i] = (
                 ((sws * (srs - sqs) + last_sws * svs).sum(0) * discount)
                 .sum()
                 .item()
             )
         resample_wdrs, _ = resample_wdrs.to(device=self._device).sort(0)
         lb = torch.min(wdr, resample_wdrs[int(round(0.05 * num_resamples))])
         ub = torch.max(wdr, resample_wdrs[int(round(0.95 * num_resamples)) - 1])
         b = torch.tensor(
             list(
                 map(
                     lambda a: a - ub if a > ub else (a - lb if a < lb else 0.0),
                     # pyre-fixme[6]: Expected `Iterable[Variable[_T1]]` for 2nd
                     #  param but got `Tensor`.
                     gs.sum(0),
                 )
             ),
             device=self._device,
         )
         b.unsqueeze_(0)
         bb = b * b.t()
         cov = omiga + bb
         # x = torch.rand((1, horizon), device=self.device, requires_grad=True)
         x = torch.zeros((1, horizon), device=self._device, requires_grad=True)
         # using SGD to find min x
         optimizer = torch.optim.SGD([x], lr=lr)
         last_y = 0.0
         for i in range(100):
             x = torch.nn.functional.softmax(x, dim=1)
             y = torch.mm(torch.mm(x, cov), x.t())
             if abs(y.item() - last_y) < loss_threhold:
                 print(f"{i}: {last_y} -> {y.item()}")
                 break
             last_y = y.item()
             optimizer.zero_grad()
             y.backward(retain_graph=True)
             optimizer.step()
         x = torch.nn.functional.softmax(x, dim=1)
         estimate = torch.mm(x, gs.sum(0, keepdim=True).t())
         if input.ground_truth is not None:
             ground_truth = input.ground_truth(state)
         else:
             ground_truth = None
         results.append(
             EstimatorResult(
                 self._log_reward(input.gamma, mdps), estimate, ground_truth
             )
         )
     logging.info(
         f"{self}: finishing evaluating["
         f"process_time={time.process_time() - stime}]"
     )
     return results
Beispiel #59
0
def find_top_rrpn_proposals(
    proposals,
    pred_objectness_logits,
    image_sizes,
    nms_thresh,
    pre_nms_topk,
    post_nms_topk,
    min_box_size,
    training,
):
    """
    For each feature map, select the `pre_nms_topk` highest scoring proposals,
    apply NMS, clip proposals, and remove small boxes. Return the `post_nms_topk`
    highest scoring proposals among all the feature maps if `training` is True,
    otherwise, returns the highest `post_nms_topk` scoring proposals for each
    feature map.

    Args:
        proposals (list[Tensor]): A list of L tensors. Tensor i has shape (N, Hi*Wi*A, 5).
            All proposal predictions on the feature maps.
        pred_objectness_logits (list[Tensor]): A list of L tensors. Tensor i has shape (N, Hi*Wi*A).
        image_sizes (list[tuple]): sizes (h, w) for each image
        nms_thresh (float): IoU threshold to use for NMS
        pre_nms_topk (int): number of top k scoring proposals to keep before applying NMS.
            When RRPN is run on multiple feature maps (as in FPN) this number is per
            feature map.
        post_nms_topk (int): number of top k scoring proposals to keep after applying NMS.
            When RRPN is run on multiple feature maps (as in FPN) this number is total,
            over all feature maps.
        min_box_size(float): minimum proposal box side length in pixels (absolute units wrt
            input images).
        training (bool): True if proposals are to be used in training, otherwise False.
            This arg exists only to support a legacy bug; look for the "NB: Legacy bug ..."
            comment.

    Returns:
        proposals (list[Instances]): list of N Instances. The i-th Instances
            stores post_nms_topk object proposals for image i.
    """
    num_images = len(image_sizes)
    device = proposals[0].device

    # 1. Select top-k anchor for every level and every image
    topk_scores = []  # #lvl Tensor, each of shape N x topk
    topk_proposals = []
    level_ids = []  # #lvl Tensor, each of shape (topk,)
    batch_idx = torch.arange(num_images, device=device)
    for level_id, proposals_i, logits_i in zip(itertools.count(), proposals,
                                               pred_objectness_logits):
        Hi_Wi_A = logits_i.shape[1]
        num_proposals_i = min(pre_nms_topk, Hi_Wi_A)

        # sort is faster than topk (https://github.com/pytorch/pytorch/issues/22812)
        # topk_scores_i, topk_idx = logits_i.topk(num_proposals_i, dim=1)
        logits_i, idx = logits_i.sort(descending=True, dim=1)
        topk_scores_i = logits_i[batch_idx, :num_proposals_i]
        topk_idx = idx[batch_idx, :num_proposals_i]

        # each is N x topk
        topk_proposals_i = proposals_i[batch_idx[:, None],
                                       topk_idx]  # N x topk x 5

        topk_proposals.append(topk_proposals_i)
        topk_scores.append(topk_scores_i)
        level_ids.append(
            torch.full((num_proposals_i, ),
                       level_id,
                       dtype=torch.int64,
                       device=device))

    # 2. Concat all levels together
    topk_scores = cat(topk_scores, dim=1)
    topk_proposals = cat(topk_proposals, dim=1)
    level_ids = cat(level_ids, dim=0)

    # 3. For each image, run a per-level NMS, and choose topk results.
    results = []
    for n, image_size in enumerate(image_sizes):
        boxes = RotatedBoxes(topk_proposals[n])
        scores_per_img = topk_scores[n]
        valid_mask = torch.isfinite(
            boxes.tensor).all(dim=1) & torch.isfinite(scores_per_img)
        if not valid_mask.all():
            boxes = boxes[valid_mask]
            scores_per_img = scores_per_img[valid_mask]
        boxes.clip(image_size)

        # filter empty boxes
        keep = boxes.nonempty(threshold=min_box_size)
        lvl = level_ids
        if keep.sum().item() != len(boxes):
            boxes, scores_per_img, lvl = (boxes[keep], scores_per_img[keep],
                                          level_ids[keep])

        keep = batched_nms_rotated(boxes.tensor, scores_per_img, lvl,
                                   nms_thresh)
        # In Detectron1, there was different behavior during training vs. testing.
        # (https://github.com/facebookresearch/Detectron/issues/459)
        # During training, topk is over the proposals from *all* images in the training batch.
        # During testing, it is over the proposals for each image separately.
        # As a result, the training behavior becomes batch-dependent,
        # and the configuration "POST_NMS_TOPK_TRAIN" end up relying on the batch size.
        # This bug is addressed in Detectron2 to make the behavior independent of batch size.
        keep = keep[:post_nms_topk]

        res = Instances(image_size)
        res.proposal_boxes = boxes[keep]
        res.objectness_logits = scores_per_img[keep]
        results.append(res)
    return results
Beispiel #60
0
    def forward(self,
                s,
                nrows=None,
                ncols=None,
                exp=False,
                exp_alpha=20,
                dummy_row=False,
                dtype=torch.float32):
        batch_size = s.shape[0]

        if dummy_row:
            dummy_shape = list(s.shape)
            dummy_shape[1] = s.shape[2] - s.shape[1]
            s = torch.cat((s, torch.full(dummy_shape, 0.).to(s.device)), dim=1)
            new_nrows = ncols
            for b in range(batch_size):
                s[b, nrows[b]:new_nrows[b], :ncols[b]] = self.epsilon
            nrows = new_nrows

        row_norm_ones = torch.zeros(batch_size,
                                    s.shape[1],
                                    s.shape[1],
                                    device=s.device)  # size: row x row
        col_norm_ones = torch.zeros(batch_size,
                                    s.shape[2],
                                    s.shape[2],
                                    device=s.device)  # size: col x col
        for b in range(batch_size):
            row_slice = slice(0, nrows[b] if nrows is not None else s.shape[2])
            col_slice = slice(0, ncols[b] if ncols is not None else s.shape[1])
            row_norm_ones[b, row_slice, row_slice] = 1
            col_norm_ones[b, col_slice, col_slice] = 1

        # for Sinkhorn stacked on last dimension
        if len(s.shape) == 4:
            row_norm_ones = row_norm_ones.unsqueeze(-1)
            col_norm_ones = col_norm_ones.unsqueeze(-1)

        s += self.epsilon

        for i in range(self.max_iter):
            if exp:
                s = torch.exp(exp_alpha * s)
            if i % 2 == 1:
                # column norm
                sum = torch.sum(torch.mul(s.unsqueeze(3),
                                          col_norm_ones.unsqueeze(1)),
                                dim=2)
            else:
                # row norm
                sum = torch.sum(torch.mul(row_norm_ones.unsqueeze(3),
                                          s.unsqueeze(1)),
                                dim=2)

            tmp = torch.zeros_like(s)
            for b in range(batch_size):
                row_slice = slice(
                    0, nrows[b] if nrows is not None else s.shape[2])
                col_slice = slice(
                    0, ncols[b] if ncols is not None else s.shape[1])
                tmp[b, row_slice, col_slice] = 1 / sum[b, row_slice, col_slice]
            s = s * tmp

        if dummy_row and dummy_shape[1] > 0:
            s = s[:, :-dummy_shape[1]]

        return s