Python Beamの例、utils.Beam Pythonの例

コード例 #1

0

ファイルを表示

ファイル: unittest.py プロジェクト: ForeverZyh/certified_lstms

 def test_beam(self):
     beams = [Beam(4), Beam(3), Beam(2)]
     test_sets = [[("aaa", 1), ("bbb", 0), ("ccc", -3)],
                  [("aaa", 1), ("bbb", -3), ("ccc", 0)],
                  [("aaa", 1), ("bbb", -3), ("ccc", 0), ("ddd", 2)]]
     for (candidates, tests) in zip(beams, test_sets):
         random.shuffle(tests)
         for a in tests:
             candidates.add(*a)
         tests.sort(key=lambda x: -x[1])
         tests = tests[:candidates.budget]
         ans = candidates.check_balance()
         self.assertEqual(len(tests), len(ans))
         for (a, b) in zip(tests, ans):
             self.assertTupleEqual(a, b)

コード例 #2

0

ファイルを表示

ファイル: multi_seq2seq.py プロジェクト: HLTCHKUST/sentiment-lookahead

    def beam_search(self, seqs, lens, B, L, vocab):
        n_best = 1
        K = constant.beam_size
        beam = [Beam(K, 
                     n_best=n_best,
                     global_scorer=GNMTGlobalScorer(),)
                     # min_length=self.min_length,                                               
                     # stepwise_penalty=self.stepwise_penalty,                                                       
                     # block_ngram_repeat=self.block_ngram_repeat,                                                            
                     # exclusion_tokens=exclusion_tokens)                                         
                for _ in range(B)]

        # (1) Run the encoder on the src.
        src_h, dec_h_t = self.encode(seqs, lens)

        # (2) Repeat src objects `beam_size` times.
        # Tile states and inputs K times
        dec_h_t = tile(dec_h_t, K, dim=1)
        
        if self.use_attn:
            src_h = tile(src_h.contiguous(), K, dim=0)
        
        # We use now  batch_size x beam_size (same as fast mode)

        # (3) run the decoder to generate sentences, using beam search.
        for t in range(L):
            if all((b.done() for b in beam)):
                break

            # (a) Construct batch x beam_size nxt words.
            # Get all the pending current beam words and arrange for forward.
            x_t = torch.stack([b.get_current_state() for b in beam])#.t().contiguous()
            x_t = x_t.view(-1)

            # (b) Decode and forward
            y_t, dec_h_t = self.decoder(x_t, dec_h_t, src_h)
            y_t = y_t.view(B, K, -1) # B, K, V
            y_t = F.log_softmax(y_t, dim=2)

            # (c) Advance each beam.
            select_indices_array = []
            # Loop over the batch_size number of beams (beam search per sequence)
            for j, b in enumerate(beam):
                b.advance(y_t[j], None)
                select_indices_array.append(
                    b.get_current_origin() + j * K)
            
            select_indices = torch.cat(select_indices_array)
            dec_h_t = dec_h_t.index_select(1, select_indices) # select correct nodes

        # (4) Extract sentences from beam.
        preds = []
        for b in beam:
            scores, ks = b.sort_finished(minimum=n_best)
            hyps = []
            for times, k in ks[:n_best]:
                hyp, _ = b.get_hyp(times, k)
                hyps.append(" ".join([vocab.index2word[word.item()] for word in hyp if word.item() not in [constant.eou_idx, constant.pad_idx]]))
            preds.append(hyps[0])
        return preds

コード例 #3

0

ファイルを表示

    def try_all_pos(self, pos: list, tran: Transformation, gradients,
                    get_embed, candidates: Beam):
        """
        Try all possible positions for trans
        :param pos: possible positions, a list of (start_pos, end_pos)
        :param tran: the target transformation
        :param gradients: the gradients tensor with respect to self.x
        :param get_embed: a function for getting the embedding of a list of tokens
        :param candidates: a beam of candidates, will be modified by this methods
        :return: None
        """

        for (start_pos_ori, end_pos_ori) in pos:
            if all(self.map_ori2x[i] is not None
                   for i in range(start_pos_ori, end_pos_ori)):
                start_pos_x = self.map_ori2x[start_pos_ori]
                # notice that self.map_ori2x[end_pos] can be None, we need to calculate from self.map_ori2x[end_pos - 1]
                end_pos_x = self.map_ori2x[end_pos_ori - 1] + 1

                for new_x in tran.transformer(self.x, start_pos_x, end_pos_x):
                    delta_len = len(new_x) - len(self.x)
                    if get_embed is not None and gradients is not None:
                        old_embedding = get_embed(self.x[start_pos_x:])
                        # ret_len specifies the ret length (padding if not enought)
                        new_embedding = get_embed(
                            new_x[start_pos_x:min(len(new_x), len(self.x))],
                            ret_len=len(self.x) - start_pos_x)
                        # gradients[start_pos_x:] has shape (len(self.x) - start_pos_x, dim)
                        new_score = self.score + np.sum(
                            gradients[start_pos_x:] *
                            (new_embedding - old_embedding))
                    else:  # we use random sampling
                        new_score = np.random.random()
                    new_map_ori2x = self.map_ori2x[:start_pos_ori] + [None] * (
                        end_pos_ori - start_pos_ori) + [
                            p if p is None else p + delta_len
                            for p in self.map_ori2x[end_pos_ori:]
                        ]
                    new_candidate = Candidate(new_x, new_score, new_map_ori2x)
                    candidates.add(new_candidate, new_score)

コード例 #4

0

ファイルを表示

    def gen_adv(self,
                model,
                tree,
                x: list,
                top_n: int,
                dataset_vocab,
                return_score=False):
        """
        Beam search for the perturbation space. The order of beam search is the same in the perturbation DSL.
        Some adversarial attack tries to rearrange the order of beam search for better performance.
        TODO: the order of beam search can be rearranged for better performance.
        :param model: the victim model, which has to support a method get_grad.
        :param tree: the target tree
        :param x: a list of input tokens.
        :param top_n: maximum number of adversarial candidates given the perturbation space.
        :param dataset_vocab: the vocabulary in the dataset, not the vocabulary used in training
        :param return_score: whether return the score as a list [(sen, score)], default False, i.e., return [sen]
        :return: a list of adversarial examples
        """

        try:
            model.get_grad
        except:
            raise AttributeError(
                "The victim model does not support get_grad method.")

        meta_data = (x, tree, dataset_vocab)
        candidate = CandidateTree(
            tree, x, 0 if not self.use_random_aug else np.random.random())
        candidates = Beam(top_n)
        candidates.add(candidate, candidate.score)
        for (tran, delta) in self.perturbation:
            possible_pos = tran.get_pos(x)  # get a list of possible positions
            perturbed_set = set(
            )  # restore the perturbed candidates to eliminate duplications
            for _ in range(delta):
                # keep the old candidates because we will change candidates in the following loop
                old_candidates = candidates.check_balance()
                for (candidate, _) in old_candidates:
                    if candidate not in perturbed_set:
                        if len(x) > 0:
                            if self.use_random_aug:
                                candidate.try_all_pos(meta_data, possible_pos,
                                                      tran, None, model,
                                                      candidates)
                            else:
                                candidate.try_all_pos(
                                    meta_data, possible_pos, tran,
                                    model.get_grad(candidate.tree,
                                                   isinstance(tran, Ins)),
                                    model, candidates)
                        perturbed_set.add(candidate)

        ret = candidates.check_balance()
        if return_score:
            return [(x.tree, x.score) for (x, _) in ret]
        else:
            return [x.tree for (x, _) in ret]

コード例 #5

0

ファイルを表示

    def gen_adv(self,
                model,
                x: list,
                y,
                top_n: int,
                get_embed,
                return_score=False):
        """
        Beam search for the perturbation space. The order of beam search is the same in the perturbation DSL.
        Some adversarial attack tries to rearrange the order of beam search for better performance.
        TODO: the order of beam search can be rearranged for better performance.
        :param model: the victim model, which has to support a method get_grad.
        :param x: a list of input tokens.
        :param y: the correct label of input x.
        :param top_n: maximum number of adversarial candidates given the perturbation space.
        :param get_embed: get_embed(x, ret_len) takes a list of tokens as inputs and output the embedding matrix with
        shape (ret_len, dim).
        if ret_len > len(x), then padding is needed.
        if ret_len < len(x), then truncating is needed. (Currently, we do not need truncate)
        :param return_score: whether return the score as a list [(sen, score)], default False, i.e., return [sen]
        :return: a list of adversarial examples
        """

        try:
            model.get_grad
        except:
            raise AttributeError(
                "The victim model does not support get_grad method.")

        candidate = Candidate(
            x, 0 if not self.use_random_aug else np.random.random())
        candidates = Beam(top_n)
        candidates.add(candidate, candidate.score)
        for (tran, delta) in self.perturbation:
            possible_pos = tran.get_pos(x)  # get a list of possible positions
            perturbed_set = set(
            )  # restore the perturbed candidates to eliminate duplications
            for _ in range(delta):
                # keep the old candidates because we will change candidates in the following loop
                old_candidates = candidates.check_balance()
                for (candidate, _) in old_candidates:
                    if candidate not in perturbed_set:
                        if len(candidate.x) > 0:
                            if self.use_random_aug:
                                candidate.try_all_pos(possible_pos, tran, None,
                                                      None, candidates)
                            else:
                                candidate.try_all_pos(
                                    possible_pos, tran,
                                    model.get_grad(candidate.x, y), get_embed,
                                    candidates)
                        perturbed_set.add(candidate)

        ret = candidates.check_balance()
        if return_score:
            return [(x.x, x.score) for (x, _) in ret]
        else:
            return [x.x for (x, _) in ret]

コード例 #6

0

ファイルを表示

def eval(model,
         token_tables,
         num_samples=100,
         beam_size=10,
         em=True,
         num_examples=4):
    '''
    Evaluates the "goodness" of model on num_samples and reports the number which satisfied
    the criterion. beam_size determines the size of the beam decoder (which keeps the top beam_size
    scored candidate solutions, and em determines if we should use exact match or consistency
    as the evalation metric
    '''
    model.eval()
    num_match = 0
    print("Evaluatng Examples...")
    num_in_beams_consistent = [0] * (beam_size + 1)
    for idx in range(num_samples):
        if (idx % 10 == 0):
            print("On example {}".format(idx))
        expected_programs, examples = sample(token_tables)

        # do not allow generating progams longer than max_len!
        max_len = len(expected_programs) + 5

        # Elements in beam are tuples ([sequence], output_all_hidden, hidden)
        beam = Beam(beam_size)
        res_beam = Beam(beam_size)
        output_all_hidden, hidden = model.encode_io([examples])
        beam.add(([model.program_size], output_all_hidden, hidden), 0)

        iteration = 0
        while (len(beam) > 0 and iteration < max_len):

            next_beam = Beam(beam_size)
            for elt, score in beam.get_elts_and_scores():

                # Get the next probabilities
                sequence, output_all_hidden, hidden = elt
                inp_idx = sequence[-1]
                decoder_input = [
                    model.decoder_embedding(
                        torch.tensor([inp_idx],
                                     device=model.device,
                                     dtype=torch.long))
                    for _ in range(hidden[0].size()[1])
                ]
                probs, output_all_hidden, hidden = model.next_probs(
                    decoder_input, hidden, output_all_hidden)
                probs = F.log_softmax(probs.squeeze(0), dim=-1)

                # Add the top beam_size candidates
                scored, idx = torch.topk(probs, dim=-1, k=beam_size)
                for next_score, next_idx in zip(scored, idx):
                    if (next_idx == 0):  #EOS!
                        next_sequence = copy.deepcopy(sequence)
                        next_sequence.append(next_idx.item())
                        res_beam.add(next_sequence, score + next_score)
                    else:
                        next_sequence = copy.deepcopy(sequence)
                        next_sequence.append(next_idx.item())
                        next_beam.add(
                            (next_sequence, output_all_hidden, hidden),
                            score + next_score)
                beam = next_beam
            iteration += 1

        # Evaluate this beam!
        this_beam = 0
        matched = False
        for sequence, _ in res_beam.get_elts_and_scores():
            sequence = sequence[1:]
            if (em and sequence == expected_programs):
                num_match += 1
                break
            elif not em:
                expected_in = copy.deepcopy(expected_programs)
                if (num_consistent((expected_in, examples), sequence,
                                   token_tables) == num_examples):
                    if (not matched):
                        num_match += 1
                        matched = True
                    this_beam += 1
        num_in_beams_consistent[this_beam] += 1

    print("Number of beams with [index] many consistent programs in the beam:")
    print(num_in_beams_consistent)

    print('{}\% Accuracy!'.format((num_match / num_samples) * 100))

コード例 #7

0

ファイルを表示

    def beam_search(self,
                    teststring,
                    beam_width=5,
                    clip_len=7,
                    end_token='<end>',
                    start_token='<start>'):
        encoded_word = self.__encode_string(teststring)
        beam = Beam(beam_width)
        beam.add((0.0, False, [self.__label_edge_index],
                  [start_token]))  #initialise the beam

        while True:
            curr_beam = Beam(beam_width)

            for (logprob, complete, prefix, labels) in beam:
                #print(labels)
                if complete == True:
                    curr_beam.add((logprob, True, prefix, labels))

                else:
                    for (next_prob, i, next_word) in self.__distribution(
                            encoded_word, prefix):
                        if next_word == end_token:
                            curr_beam.add(
                                (logprob + next_prob, True, prefix, labels))
                        else:
                            curr_beam.add((logprob + next_prob, False,
                                           prefix + [i], labels + [next_word]))

            #sorted_beam = sorted(curr_beam)
            any_removals = False

            while True:
                #(best_prob, best_complete, best_prefix, best_labels) = sorted_beam[-1]
                (best_prob, best_complete, best_prefix,
                 best_labels) = curr_beam.get_best()[0]

                if best_complete or len(best_prefix) - 1 == clip_len:
                    yield (best_labels[1:], math.exp(best_prob))
                    curr_beam.remove(
                        (best_prob, best_complete, best_prefix, best_labels))
                    any_removals = True

                    if curr_beam.is_empty():
                        break
                else:
                    break

            if any_removals:
                if curr_beam.is_empty():
                    break
                else:
                    beam = Beam(beam_width, curr_beam)
            else:
                beam = curr_beam

コード例 #8

0

ファイルを表示

    def beam_decode(self, dec_hidden, enc_out, beam_width=3, topk=3):
        # Start decoding step with <SOS> token and empty input feed, stored in a Beam node
        dec_input = torch.zeros(1, 1).fill_(2).long().to(self.device)
        input_feed = torch.zeros(1, 1, enc_out.size(2), device=self.device)
        node = BeamSearchNode(dec_hidden, None, dec_input, 0, 1, input_feed)

        # Initialize Beam queue objects and an output list
        in_nodes = Beam()
        out_nodes = Beam()
        endnodes = []

        # Feed the input Beam queue with the start token
        in_nodes.put((node.eval(), node))

        # Start Beam search
        for i in range(self.max_len_sentence):
            # At each step, keep the beam_width best nodes
            for i in range(beam_width):
                # Get the best node in the input Beam queue
                score, n = in_nodes.get()
                # Collect the values of the node to decode
                dec_input = n.wordid
                dec_hidden = n.hidden
                input_feed = n.feed

                # If we find an <EOS> token, then stop the decoding for this Beam
                if n.wordid.item(
                ) in self.special_tokens_ids and n.prevnode != None:
                    endnodes.append((score, n))
                    # Break the loop if we have enough decoded sentences
                    if len(endnodes) >= topk:
                        break
                    else:
                        continue

                # Decode with the RNN
                dec_input = self.embedding(
                    dec_input)  # (batch size, 1, emb dim)
                dec_input = torch.cat((dec_input, input_feed), 2)
                dec_output, dec_hidden = self.decode_rnn(
                    dec_input, dec_hidden, enc_out)
                out = self.gen(dec_output)
                # Extract the top K most likely tokens and their log probability (log softmax)
                log_prob, indexes = torch.topk(out, beam_width)

                # Create a node for each of the K outputs and score them (sum of log probs div by length of sequence)
                nextnodes = []
                for new_k in range(beam_width):
                    out_t = indexes[0][new_k].view(1, -1)
                    log_p = log_prob[0][new_k].item()
                    node = BeamSearchNode(dec_hidden, n, out_t, n.logp + log_p,
                                          n.leng + 1, dec_output)
                    score = node.eval()
                    nextnodes.append((score, node))
                # Push the nodes to the output Beam queue
                for i in range(len(nextnodes)):
                    score, nn = nextnodes[i]
                    out_nodes.put((score, nn))

                # Break the loop if the input Beam is empty (only happens with <SOS> token at first step)
                if len(in_nodes) == 0:
                    break

            # Fill the input Beam queue with the previously computed output Beam nodes
            in_nodes = out_nodes
            out_nodes = Beam()
            # Stop decoding when we have enough output sequences
            if len(endnodes) >= topk:
                break

        # In the case where we did not encounter a <EOS> token, take the most likely sequences
        if len(endnodes) == 0:
            endnodes = [in_nodes.get() for _ in range(topk)]

        # Now we unpack the sequences in reverse order to retrieve the sentences
        utterances = []
        for score, n in sorted(endnodes, key=operator.itemgetter(0)):
            utterance = [n.wordid.item()]
            while n.prevnode != None:
                n = n.prevnode
                utterance.append(n.wordid.item())
            # Reverse the sentence
            utterance = utterance[::-1]
            utterances.append(utterance)

        return utterances

コード例 #9

0

ファイルを表示

ファイル: predict.py プロジェクト: Wasim37/marketing_text_generation

    def beam_search(self, x, max_sum_len, beam_width, len_oovs,
                    x_padding_masks):
        """Using beam search to generate summary.

        Args:
            x (Tensor): Input sequence as the source.
            max_sum_len (int): The maximum length a summary can have.
            beam_width (int): Beam size.
            max_oovs (int): Number of out-of-vocabulary tokens.
            x_padding_masks (Tensor):
                The padding masks for the input sequences.

        Returns:
            result (list(Beam)): The list of best k candidates.
        """
        # run body_sequence input through encoder. Call encoder forward propagation
        ###########################################
        #          TODO: module 4 task 2          #
        ###########################################
        encoder_output, encoder_states = self.model.encoder(
            replace_oovs(x, self.vocab), self.model.decoder.embedding)
        coverage_vector = torch.zeros((1, x.shape[1])).to(self.DEVICE)
        # initialize decoder states with encoder forward states
        decoder_states = self.model.reduce_state(encoder_states)

        # initialize the hypothesis with a class Beam instance.

        init_beam = Beam([self.vocab.SOS], [0], decoder_states,
                         coverage_vector)

        # get the beam size and create a list for stroing current candidates
        # and a list for completed hypothesis
        k = beam_width
        curr, completed = [init_beam], []

        # use beam search for max_sum_len (maximum length) steps
        for _ in range(max_sum_len):
            # get k best hypothesis when adding a new token

            topk = []
            for beam in curr:
                # When an EOS token is generated, add the hypo to the completed
                # list and decrease beam size.
                if beam.tokens[-1] == self.vocab.EOS:
                    completed.append(beam)
                    k -= 1
                    continue
                for can in self.best_k(beam, k,
                                       encoder_output, x_padding_masks, x,
                                       torch.max(len_oovs)):
                    # Using topk as a heap to keep track of top k candidates.
                    # Using the sequence scores of the hypos to campare
                    # and object ids to break ties.
                    add2heap(topk, (can.seq_score(), id(can), can), k)

            curr = [items[2] for items in topk]
            # stop when there are enough completed hypothesis
            if len(completed) == beam_width:
                break
        # When there are not engouh completed hypotheses,
        # take whatever when have in current best k as the final candidates.
        completed += curr
        # sort the hypothesis by normalized probability and choose the best one
        result = sorted(completed, key=lambda x: x.seq_score(),
                        reverse=True)[0].tokens
        return result

コード例 #10

0

ファイルを表示

ファイル: unittest.py プロジェクト: ForeverZyh/certified_lstms

    def test_map_ori2x(self):
        model = TestModel(100)
        perturb = [(Sub("data/pddb", True), 3), (Del({"floor"}), 2),
                   (Ins(), 1)]
        sen = "i see that a cat sits on the floor .".split()
        sub_pos = perturb[0][0].get_pos(sen)
        del_pos = perturb[1][0].get_pos(sen)
        ins_pos = perturb[2][0].get_pos(sen)
        candidate = Candidate(["dummy"], 0, [None] * len(sen))
        candidates = Beam(1)
        candidate.try_all_pos(ins_pos, perturb[2][0],
                              model.get_grad(candidate.x, 0), model.get_embed,
                              candidates)
        self.assertEqual(0, len(candidates.queue))
        map_ori2x = [None] * len(sen)
        map_ori2x[sen.index("floor")] = 1
        candidate = Candidate(["dummy1", "floor", "dummy2"], 0,
                              map_ori2x)  # all perturbation can be applied
        candidates = Beam(1)
        candidate.try_all_pos(sub_pos, perturb[0][0],
                              model.get_grad(candidate.x, 0), model.get_embed,
                              candidates)
        self.assertSequenceEqual(["dummy1", "flooring", "dummy2"],
                                 candidates.check_balance()[0][0].x)

        candidates = Beam(1)
        candidate.try_all_pos(del_pos, perturb[1][0],
                              model.get_grad(candidate.x, 0), model.get_embed,
                              candidates)
        self.assertSequenceEqual(["dummy1", "dummy2"],
                                 candidates.check_balance()[0][0].x)

        candidates = Beam(1)
        candidate.try_all_pos(ins_pos, perturb[2][0],
                              model.get_grad(candidate.x, 0), model.get_embed,
                              candidates)
        self.assertSequenceEqual(["dummy1", "floor", "floor", "dummy2"],
                                 candidates.check_balance()[0][0].x)

コード例 #11

0

ファイルを表示

    def try_all_pos(self, meta_data, pos: list, tran: Transformation,
                    gradients, victim_model, candidates: Beam):
        """
        Try all possible positions for trans
        :param meta_data: (ori, old_tree, vocab) original input tokens, original tree, dataset vocab (not the vocab in
        training). Difference: dataset vocab contains out of vocab (OOV) words, while vocab in training map OOV to UNK.
        :param pos: possible positions, a list of (start_pos, end_pos)
        :param tran: the target transformation
        :param gradients: the gradients tensor with respect to self.x
        :param victim_model: the victim model
        :param candidates: a beam of candidates, will be modified by this methods
        :return: None
        """

        get_embed = victim_model.get_embed
        ori, old_tree, vocab = meta_data
        for (start_pos_ori, end_pos_ori) in pos:
            if all(self.map_ori2x[i] is not None
                   for i in range(start_pos_ori, end_pos_ori)):
                start_pos_x = self.map_ori2x[start_pos_ori]
                # notice that self.map_ori2x[end_pos] can be None, we need to calculate from self.map_ori2x[end_pos - 1]
                end_pos_x = self.map_ori2x[end_pos_ori - 1] + 1

                for new_x in tran.transformer(self.x, start_pos_x, end_pos_x):
                    delta_len = len(new_x) - len(self.x)
                    if isinstance(tran, Del):
                        new_trans_on_pos = self.trans_on_pos[:start_pos_ori] + [
                            1
                        ] + self.trans_on_pos[end_pos_ori:]
                        new_syns_on_pos = copy.copy(self.syns_on_pos)
                        if gradients is not None:
                            old_embedding = get_embed([self.x[start_pos_x]
                                                       ])[0]  # (dim)
                            new_score = self.score + np.sum(
                                (0 - old_embedding) * gradients[start_pos_x])
                        else:
                            new_score = np.random.random()
                    elif isinstance(tran, Ins):
                        new_trans_on_pos = self.trans_on_pos[:start_pos_ori] + [
                            2
                        ] + self.trans_on_pos[end_pos_ori:]
                        new_syns_on_pos = copy.copy(self.syns_on_pos)
                        if gradients is not None:
                            old_embedding = get_embed([self.x[start_pos_x]
                                                       ])  # (1, dim)
                            ioux_grads, c_grads = gradients
                            delta_ioux, delta_c = victim_model.model.cal_delta_Ins(
                                old_embedding)
                            new_score = self.score + np.sum(
                                ioux_grads[start_pos_x] * delta_ioux) + np.sum(
                                    c_grads[start_pos_x] * delta_c)
                        else:
                            new_score = np.random.random()
                    elif isinstance(tran, Sub):
                        if vocab.get(new_x[start_pos_x], -1) == -1:
                            continue
                        new_trans_on_pos = self.trans_on_pos[:start_pos_ori] + [
                            3
                        ] + self.trans_on_pos[end_pos_ori:]
                        new_syns_on_pos = self.syns_on_pos[:start_pos_ori] + [
                            new_x[start_pos_x]
                        ] + self.syns_on_pos[end_pos_ori:]
                        if gradients is not None:
                            old_embedding = get_embed([self.x[start_pos_x]
                                                       ])[0]  # (dim)
                            new_embedding = get_embed([new_x[start_pos_x]
                                                       ])[0]  # (dim)
                            new_score = self.score + np.sum(
                                (new_embedding - old_embedding) *
                                gradients[start_pos_x])
                        else:
                            new_score = np.random.random()
                    else:
                        raise NotImplementedError
                    new_tree = cons_tree(ori, new_trans_on_pos,
                                         new_syns_on_pos, old_tree, vocab)
                    new_map_ori2x = self.map_ori2x[:start_pos_ori] + [None] * (
                        end_pos_ori - start_pos_ori) + [
                            p if p is None else p + delta_len
                            for p in self.map_ori2x[end_pos_ori:]
                        ]
                    new_candidate = CandidateTree(new_tree, new_x, new_score,
                                                  new_map_ori2x,
                                                  new_trans_on_pos,
                                                  new_syns_on_pos)
                    candidates.add(new_candidate, new_score)