Ejemplo n.º 1
0
    def cube_pruning(self):

        for bidx in range(1, self.maxlen + 1):
            #print bidx

            eq_classes = OrderedDict()
            self.pop_subcube_approx_cache, self.push_subcube_approx_cache, \
                    self.subcube_lines_cache = [], [], []

            self.merge(bidx, eq_classes)

            # create cube and generate next beam from cube
            if self.ifbatch:
                cube = self.create_cube_batch(bidx, eq_classes)
            else:
                cube = self.create_cube(bidx, eq_classes)

            if self.cube_prune(bidx, cube):
                sorted_samples = sorted(self.translations,
                                        key=lambda tup: tup[0])
                best_sample = sorted_samples[0]
                return back_tracking(self.beam, best_sample, False)

            self.beam[bidx] = sorted(self.beam[bidx], key=lambda tup: tup[0])

        # no early stop, back tracking
        if len(self.translations) == 0:
            best_sample = (self.beam[self.maxlen][0][0],) + \
                self.beam[self.maxlen][0][2:] + (self.maxlen, )
            return back_tracking(self.beam, best_sample, False)
        else:
            sorted_samples = sorted(self.translations, key=lambda tup: tup[0])
            best_sample = sorted_samples[0]
            return back_tracking(self.beam, best_sample, False)
Ejemplo n.º 2
0
    def cube_pruning(self):

        for bidx in range(1, self.maxlen + 1):

            eq_classes = OrderedDict()
            self.approx_items, self.cube_lines_mergeout = [], []
            self.merge(bidx, eq_classes)

            # create cube and generate next beam from cube
            cube = self.create_cube(bidx, eq_classes)

            if self.cube_prune(bidx, cube):
                _log('early stop! see {} samples ending with EOS.'.format(
                    self.k))
                avg_bp = format(self.locrt[0] / self.locrt[1], '0.3f')
                _log('average location of back pointers [{}/{}={}]'.format(
                    self.locrt[0], self.locrt[1], avg_bp))
                sorted_samples = sorted(self.translations,
                                        key=lambda tup: tup[0])
                best_sample = sorted_samples[0]
                _log('translation length(with EOS) [{}]'.format(
                    best_sample[-1]))
                for sample in sorted_samples:  # tuples
                    _log('{}'.format(sample))

                return back_tracking(self.beam, best_sample, False)

            self.beam[bidx] = sorted(self.beam[bidx], key=lambda tup: tup[0])
            debug('beam {} ----------------------------'.format(bidx))
            for b in self.beam[bidx]:
                debug('{}'.format(b))
                # debug('{}'.format(b[0:1] + b[2:]))
            # because of the the estimation of P(f|abcd) as P(f|cd), so the generated beam by
            # cube pruning may out of order by loss, so we need to sort it again here
            # losss from low to high

        # no early stop, back tracking
        avg_bp = format(self.locrt[0] / self.locrt[1], '0.3f')
        _log('average location of back pointers [{}/{}={}]'.format(
            self.locrt[0], self.locrt[1], avg_bp))
        if len(self.translations) == 0:
            _log('no early stop, no candidates ends with EOS, selecting from '
                 'len {} candidates, may not end with EOS.'.format(
                     self.maxlen))
            best_sample = (self.beam[self.maxlen][0][0],) + \
                self.beam[self.maxlen][0][2:] + (self.maxlen, )
            _log('translation length(with EOS) [{}]'.format(best_sample[-1]))
            return back_tracking(self.beam, best_sample, False)
        else:
            _log(
                'no early stop, not enough {} candidates end with EOS, selecting the best '
                'sample ending with EOS from {} samples.'.format(
                    self.k, len(self.translations)))
            sorted_samples = sorted(self.translations, key=lambda tup: tup[0])
            best_sample = sorted_samples[0]
            _log('translation length(with EOS) [{}]'.format(best_sample[-1]))
            for sample in sorted_samples:  # tuples
                _log('{}'.format(sample))
            return back_tracking(self.beam, best_sample, False)
Ejemplo n.º 3
0
    def beam_search_comb(self, np_src_sent):

        maxlen = self.maxlen
        hyp_scores = np.zeros(1).astype('float32')
        s_init, ctx0, c_x0 = self.fn_init(
            np_src_sent)  # np_src_sent (sl, 1), beam==1
        detail = False
        y_emb_im1 = self.fn_emb([-1])
        init_beam_sm(self.beam,
                     cnt=maxlen,
                     init_state=s_init[0],
                     init_y_emb_im1=y_emb_im1)
        for i in range(1, maxlen + 1):
            # beam search here
            if (i - 1) % 10 == 0:
                debug(str(i - 1))

            prevb = self.beam[i - 1]
            len_prevb = len(prevb)
            cands = []
            # batch states of previous beam
            s_im1 = np.array([b[1] for b in prevb])
            yemb_im1 = np.array([b[3][0] for b in prevb])
            # (src_sent_len, 1, 2*src_nhids) -> (src_sent_len, len_prevb, 2*src_nhids)
            context = np.tile(ctx0, [len_prevb, 1])
            c_x = np.tile(c_x0, [len_prevb, 1])

            if self.ifsplit:
                #yemb_im1, hi = self.fn_nh(y_im1, s_im1)
                #pi, ai = self.fn_na(context, c_x, hi)
                #si = self.fn_ns(hi, ai)
                #mo = self.fn_mo(yemb_im1, ai, si)
                #next_scores = self.fn_pws(mo, self.ptv)
                #next_probs = -next_scores if self.ifscore else self.fn_ce(next_scores)
                hi = self.fn_nh(yemb_im1, s_im1)
                pi, ai = self.fn_na(context, c_x, hi)
                si = self.fn_ns(hi, ai)
                mo = self.fn_mo(yemb_im1, ai, si)
                next_scores = self.fn_pws(mo, self.ptv)
                next_probs = -next_scores if self.ifscore else self.fn_ce(
                    next_scores)
            else:
                y_im1 = np.array([b[2] for b in prevb])
                next_probs, si = self.fn_next(*[y_im1, context, s_im1])

            next_ces = -np.log(next_probs)
            cand_scores = hyp_scores[:, None] + next_ces
            cand_scores_flat = cand_scores.flatten()
            ranks_flat = part_sort(cand_scores_flat,
                                   self.k - len(self.translations))
            voc_size = next_ces.shape[1]
            prevb_id = ranks_flat // voc_size
            word_indices = ranks_flat % voc_size
            costs = cand_scores_flat[ranks_flat]

            for b in zip(costs, si[prevb_id], word_indices, prevb_id):
                if b[2] == self.eos_id:
                    if self.ifnorm:
                        self.translations.append(((b[0] / i), b[0]) + b[2:] +
                                                 (i, ))
                    else:
                        self.translations.append((b[0], ) + b[2:] + (i, ))
                    if len(self.translations) == self.k:
                        # output sentence, early stop, best one in k
                        debug('early stop! see {} samples ending with EOS.'.
                              format(self.k))
                        avg_bp = format(self.locrt[0] / self.locrt[1], '0.3f')
                        debug('average location of back pointers [{}/{}={}]'.
                              format(self.locrt[0], self.locrt[1], avg_bp))
                        sorted_samples = sorted(self.translations,
                                                key=lambda tup: tup[0])
                        best_sample = sorted_samples[0]
                        debug('translation length(with EOS) [{}]'.format(
                            best_sample[-1]))
                        for sample in sorted_samples:  # tuples
                            debug('{}'.format(sample))
                        return back_tracking(self.beam, best_sample, detail)
                else:
                    # should calculate when generate item in current beam
                    self.locrt[0] += (b[-1] + 1)
                    self.locrt[1] += 1
                    self.beam[i].append(
                        (b[0], b[1], b[2], self.fn_emb([b[2]]), b[3]))
            debug('beam {} ----------------------------'.format(i))
            for b in self.beam[i]:
                debug(b[0:1] + b[2:])  # do not output state
            hyp_scores = np.array([b[0] for b in self.beam[i]])

        # no early stop, back tracking
        avg_bp = format(self.locrt[0] / self.locrt[1], '0.3f')
        debug('average location of back pointers [{}/{}={}]'.format(
            self.locrt[0], self.locrt[1], avg_bp))
        if len(self.translations) == 0:
            debug('no early stop, no candidates ends with EOS, selecting from '
                  'len {} candidates, may not end with EOS.'.format(maxlen))
            best_sample = ((self.beam[maxlen][0][0], ) +
                           self.beam[maxlen][0][2:] + (maxlen, ))
            debug('translation length(with EOS) [{}]'.format(best_sample[-1]))
            return back_tracking(self.beam, best_sample, detail)
        else:
            debug(
                'no early stop, not enough {} candidates end with EOS, selecting the best '
                'sample ending with EOS from {} samples.'.format(
                    self.k, len(self.translations)))
            sorted_samples = sorted(self.translations, key=lambda tup: tup[0])
            best_sample = sorted_samples[0]
            debug('translation length(with EOS) [{}]'.format(best_sample[-1]))
            for sample in sorted_samples:  # tuples
                debug('{}'.format(sample))
            return back_tracking(self.beam, best_sample, detail)
Ejemplo n.º 4
0
    def beam_search(self, np_src_sent):

        maxlen = self.maxlen
        s_init, context, c_x = self.fn_init(
            np_src_sent)  # np_src_sent (sl, 1), beam==1
        # (1, trg_nhids), (src_len, 1, src_nhids*2)
        detail = False
        y_emb_im1 = self.fn_emb([-1])
        init_beam_sm(self.beam,
                     cnt=maxlen,
                     init_state=s_init,
                     init_y_emb_im1=y_emb_im1)

        for i in range(1, maxlen + 1):
            if (i - 1) % 10 == 0:
                debug(str(i - 1))
            cands = []
            for j in xrange(len(self.beam[i - 1])):  # size of last beam
                # (45.32, (beam, trg_nhids), -1, 0)
                #accum_loss_im1, accum_im1, _, s_im1, y_im1, bp_im1 = self.beam[i - 1][j]
                accum_im1, s_im1, y_im1, yemb_im1, bp_im1 = self.beam[i - 1][j]

                if self.ifsplit:
                    #yemb_im1, hi = self.fn_nh(y_im1, s_im1)
                    #pi, ai = self.fn_na(context, c_x, hi)
                    # pi: (src_len, ) sum == 1
                    #si = self.fn_ns(hi, ai)
                    #mo = self.fn_mo(yemb_im1, ai, si)
                    #next_scores = self.fn_pws(mo, self.ptv)
                    #next_probs = -next_scores if self.ifscore else self.fn_ce(next_scores)
                    hi = self.fn_nh(yemb_im1, s_im1)
                    _, ai = self.fn_na(context, c_x, hi)
                    # pi: (src_len, ) sum == 1
                    si = self.fn_ns(hi, ai)
                    mo = self.fn_mo(yemb_im1, ai, si)
                    next_scores = self.fn_pws(mo, self.ptv)
                    next_probs = -next_scores if self.ifscore else self.fn_ce(
                        next_scores)
                else:
                    next_probs, si = self.fn_next(*[y_im1, context, s_im1])

                next_ces = -np.log(next_probs)
                next_ces_flat = next_ces.flatten()  # (1,vocsize) -> (vocsize,)
                ranks_idx_flat = part_sort(next_ces_flat,
                                           self.k - len(self.translations))
                #ranks_idx_flat = part_sort(next_ces_flat, self.k)
                k_avg_loss_flat = next_ces_flat[
                    ranks_idx_flat]  # -log_p_y_given_x
                # for idx in ranks_idx_flat:
                #    print self.tvcb_i2w[idx],
                # print '\n'

                accum_i = accum_im1 + k_avg_loss_flat
                #accum_loss_i = self.loss_with_nlcp(accum_i, pi, bp_im1, j, i)
                #cands += [(accum_loss_i[idx], accum_i[idx], pi, si, wid, j)
                #          for idx, wid in enumerate(ranks_idx_flat)]
                cands += [(accum_i[idx], si, wid, self.fn_emb([wid]), j)
                          for idx, wid in enumerate(ranks_idx_flat)]

            k_ranks_flat = part_sort(
                np.asarray([cand[0] for cand in cands] + [np.inf]),
                self.k - len(self.translations))
            #k_ranks_flat = part_sort(np.asarray(
            #    [cand[0] for cand in cands] + [np.inf]), self.k)
            k_sorted_cands = [cands[r] for r in k_ranks_flat]

            for b in k_sorted_cands:
                if b[2] == self.eos_id:
                    debug('add: {}'.format(((b[0] / i), b[0]) + b[-2:] +
                                           (i, )))
                    if self.ifnorm:
                        self.translations.append(((b[0] / i), b[0]) + b[-2:] +
                                                 (i, ))
                    else:
                        self.translations.append((b[0], ) + b[-2:] + (i, ))
                    if len(self.translations) == self.k:
                        # output sentence, early stop, best one in k
                        debug('early stop! see {} samples ending with EOS.'.
                              format(self.k))
                        avg_bp = format(self.locrt[0] / self.locrt[1], '0.3f')
                        debug('average location of back pointers [{}/{}={}]'.
                              format(self.locrt[0], self.locrt[1], avg_bp))
                        sorted_samples = sorted(self.translations,
                                                key=lambda tup: tup[0])
                        best_sample = sorted_samples[0]
                        debug('translation length(with EOS) [{}]'.format(
                            best_sample[-1]))
                        for sample in sorted_samples:  # tuples
                            debug('{}'.format(sample))

                        return back_tracking(self.beam, best_sample, detail)
                else:
                    # should calculate when generate item in current beam
                    self.locrt[0] += (b[-1] + 1)
                    self.locrt[1] += 1
                    self.beam[i].append(b)
            debug('beam {} ----------------------------'.format(i))
            for b in self.beam[i]:
                debug(b[0:2] + b[-2:])  # do not output state

        # no early stop, back tracking
        avg_bp = format(self.locrt[0] / self.locrt[1], '0.3f')
        debug('average location of back pointers [{}/{}={}]'.format(
            self.locrt[0], self.locrt[1], avg_bp))
        if len(self.translations) == 0:
            debug('no early stop, no candidates ends with EOS, selecting from '
                  'len {} candidates, may not end with EOS.'.format(maxlen))
            best_sample = (self.beam[maxlen][0][0],
                           ) + self.beam[maxlen][0][-2:] + (maxlen, )
            debug('translation length(with EOS) [{}]'.format(best_sample[-1]))
            return back_tracking(self.beam, best_sample, detail)
        else:
            debug(
                'no early stop, not enough {} candidates end with EOS, selecting the best '
                'sample ending with EOS from {} samples.'.format(
                    self.k, len(self.translations)))
            sorted_samples = sorted(self.translations, key=lambda tup: tup[0])
            best_sample = sorted_samples[0]
            debug('translation length(with EOS) [{}]'.format(best_sample[-1]))
            for sample in sorted_samples:  # tuples
                debug('{}'.format(sample))
            return back_tracking(self.beam, best_sample, detail)