Example #1
0
    def create_cube(self, bidx, eq_classes):
        # eq_classes: (score_im1, y_im1, hi, ai, loc_in_prevb) NEW

        cube = []
        cnt_transed = len(self.translations)
        for whichsubcub, leq_class in eq_classes.iteritems():  # sub cube

            each_subcube_rowsz = len(leq_class)
            score_im1_r0, s_im1_r0, y_im1, y_emb_im1, _ = leq_class[0]
            subcube = []
            subcube_line_cache = []
            _avg_si, _avg_hi, _avg_ai, _avg_scores_i = None, None, None, None
            _cube_krank_scores_i = None

            _avg_sim1 = s_im1_r0

            if self.ifsplit:
                _avg_hi = self.fn_nh(y_emb_im1, _avg_sim1)
                _, _avg_ai = self.fn_na(self.context, self.uh, _avg_hi)
                _avg_si = self.fn_ns(_avg_hi, _avg_ai)
                _avg_moi = self.fn_mo(y_emb_im1, _avg_ai, _avg_si)
                _avg_scores_i = self.fn_pws(_avg_moi,
                                            self.ptv)  # the larger the better
                _avg_probs_i = self.fn_ce(_avg_scores_i).flatten()
            else:
                _avg_probs_i, _avg_si = self.fn_next(
                    *[y_im1, self.context, _avg_sim1])
                _avg_probs_i = _avg_probs_i.flatten()

            _next_krank_wids = part_sort(-_avg_probs_i, self.k - cnt_transed)
            _avg_ces_i = -numpy.log(_avg_probs_i[_next_krank_wids])
            _cube_krank_scores_i = _cube_krank_ces_ith = _avg_ces_i

            self.pop_subcube_approx_cache.append(
                (_avg_ai, _avg_si, _cube_krank_ces_ith))
            self.push_subcube_approx_cache.append(None)

            # add cnt for error The truth value of an array with more than one element is ambiguous
            for i, tup in enumerate(leq_class):
                if i > 1: break
                subcube.append([
                    tup + (_avg_sim1, None if _cube_krank_scores_i is None else
                           _cube_krank_scores_i[j], wid, i, j, whichsubcub,
                           each_subcube_rowsz)
                    for j, wid in enumerate(_next_krank_wids)
                ])
                subcube_line_cache.append(None)

            cube.append(subcube)
            self.subcube_lines_cache.append(subcube_line_cache)

        return cube
Example #2
0
    def create_cube_batch(self, bidx, eq_classes):
        # eq_classes: (score_im1, y_im1, hi, ai, loc_in_prevb) NEW
        cube = []
        cnt_transed = len(self.translations)
        batch_y_im1, batch_s_im1 = [], []
        for whichsubcub, leq_class in eq_classes.iteritems():  # sub cube

            each_subcube_rowsz = len(leq_class)
            self.prev_beam_ptrs += each_subcube_rowsz
            score_im1_r0, s_im1_r0, y_im1, y_im2, y_im3, _ = leq_class[0]
            subcube_line_cache = []
            _avg_si, _avg_hi, _avg_ai, _avg_scores_i = None, None, None, None
            _cube_lm_krank_ces_i, _cube_krank_scores_i = None, None

            if each_subcube_rowsz == 1:
                _avg_sim1 = s_im1_r0
                self.onerow_subcube_cnt += 1
            else:
                merged_sim1 = [tup[1] for tup in leq_class[0:1]]
                np_merged_sim1 = numpy.array(merged_sim1)
                # arithmetic mean
                _avg_sim1 = numpy.mean(np_merged_sim1, axis=0)
            #print _avg_sim1
            batch_y_im1.append(y_im1)
            batch_s_im1.append(_avg_sim1)
            self.push_subcube_approx_cache.append(None)

        np_batch_s_im1 = numpy.array(batch_s_im1, dtype='float32')
        subcube_num = len(batch_y_im1)
        ctx = numpy.tile(self.context, [subcube_num, 1])
        if np_batch_s_im1.shape[0] == 1 and 3 == len(np_batch_s_im1.shape):
            np_batch_s_im1 = np_batch_s_im1[0]
        next_probs, next_states = self.fn_next(
            *[batch_y_im1, ctx, np_batch_s_im1])
        #print next_probs.shape
        #print next_states.shape

        for which in range(len(eq_classes)):
            _avg_sim1, leq_class, next_prob, _avg_si = batch_s_im1[which], \
                    eq_classes[which], next_probs[which], next_states[which]
            each_subcube_rowsz = len(leq_class)
            next_prob_flat = next_prob.flatten()
            _next_krank_wids = part_sort(-next_prob_flat,
                                         self.k - len(self.translations))
            k_avg_loss_flat = -numpy.log(next_prob_flat[_next_krank_wids])

            self.pop_subcube_approx_cache.append(
                (None, _avg_hi, _avg_ai, _avg_si, None, None, _next_krank_wids,
                 k_avg_loss_flat))
            # add cnt for error The truth value of an array with more than one element is ambiguous
            subcube = []
            for i, tup in enumerate(leq_class):
                subcube.append([
                    tup + (_avg_sim1, None if _cube_lm_krank_ces_i is None else
                           _cube_lm_krank_ces_i[j], k_avg_loss_flat[j], wid, i,
                           j, which, each_subcube_rowsz)
                    for j, wid in enumerate(_next_krank_wids)
                ])
                subcube_line_cache.append(None)

            #print len(subcube)
            cube.append(subcube)
            self.subcube_lines_cache.append(subcube_line_cache)

        self.printCube(cube)

        return cube
Example #3
0
    def create_cube(self, bidx, eq_classes):
        # eq_classes: (score_im1, y_im1, hi, ai, loc_in_prevb) NEW

        cube = []
        cnt_transed = len(self.translations)
        for whichsubcub, leq_class in eq_classes.iteritems():  # sub cube

            each_subcube_rowsz = len(leq_class)
            self.prev_beam_ptrs += each_subcube_rowsz
            #print self.prev_beam_ptrs
            #if bidx >= 2 and self.prev_beam_ptrs > self.avg_bp_by_cur_step + 5:
            #    return cube

            score_im1_r0, s_im1_r0, y_im1, y_im2, y_im3, _ = leq_class[0]
            subcube = []
            subcube_line_cache = []
            _avg_si, _avg_hi, _avg_ai, _avg_scores_i = None, None, None, None
            _cube_lm_krank_ces_i, _cube_krank_scores_i = None, None

            if each_subcube_rowsz == 1:
                _avg_sim1 = s_im1_r0
                self.onerow_subcube_cnt += 1
            else:
                merged_score_im1 = [tup[0] for tup in leq_class]
                merged_sim1 = [tup[1] for tup in leq_class[0:1]]
                np_merged_score_im1 = numpy.array(merged_score_im1,
                                                  dtype='float32')
                np_merged_sim1 = numpy.array(merged_sim1)
                # arithmetic mean
                _avg_sim1 = numpy.mean(np_merged_sim1, axis=0)

                # geometric mean , not work
                #_avg_sim1 = numpy.power(numpy.prod(np_merged_sim1, axis=0), 1.0 /
                #                        np_merged_sim1.shape[0])

                # harmonic mean
                #_avg_sim1 = np_merged_sim1.shape[0] / numpy.sum(1.0 / np_merged_sim1, axis=0)

                # weighted harmonic mean
                #assert(np_merged_sim1.shape[0] == np_merged_score_im1.shape[0])
                #_avg_sim1 = numpy.sum(np_merged_score_im1, axis=0) / numpy.sum(
                #    np_merged_score_im1[:,None,None] / np_merged_sim1, axis=0)

                # weighted mean
                #exp_score_im1 = numpy.exp(np_merged_score_im1 -
                #                                    numpy.max(np_merged_score_im1, axis=0))
                #softmax_score_im1 = exp_score_im1 / exp_score_im1.sum()
                #_avg_sim1 = numpy.sum(softmax_score_im1[:,None,None] * np_merged_sim1, axis=0)

                # quadratic mean, not work
                #_avg_sim1 = numpy.power(numpy.mean(numpy.power(np_merged_sim1, 2), axis=0),
                #                       1.0 / np_merged_sim1.shape[0])

                #
                # for tup in leq_class: watch the attention prob pi dist here ....

            if self.lm is not None and bidx >= 4:
                # TODO sort the row dimension by language model words distribution
                debug('sort by lm: -3 -2 -1 => {} {} {}'.format(
                    y_im3, y_im2, y_im1))
                if self.ngram == 2:
                    gram = [y_im1]
                elif self.ngram == 3:
                    gram = [y_im1] if y_im2 == -1 else [y_im2, y_im1]
                elif self.ngram == 4:
                    gram = [y_im1] if y_im3 == -1 and y_im2 == -1 else (
                        [y_im2, y_im1]
                        if y_im3 == -1 else [y_im3, y_im2, y_im1])
                else:
                    raise NotImplementedError

                lm_next_logps, next_ids = vocab_prob_given_ngram(
                    self.lm, gram, self.tvcb, self.tvcb_i2w)
                np_lm_next_neg_logps = -numpy.asarray(lm_next_logps)
                np_next_ids = numpy.asarray(next_ids)

                _next_krank_ids = part_sort(np_lm_next_neg_logps,
                                            self.k - cnt_transed)
                _cube_lm_krank_ces_i = np_lm_next_neg_logps[_next_krank_ids]
                _next_krank_wids = np_next_ids[_next_krank_ids]

                for idx in gram:
                    _log(idx if idx == -1 else self.tvcb_i2w[idx] + ' ',
                         nl=False)
                _log('=> ', nl=False)
                for wid in _next_krank_wids:
                    _log('{}({}) '.format(self.tvcb_i2w[wid],
                                          np_lm_next_neg_logps[wid]),
                         nl=False)
                _log('')
                self.pop_subcube_approx_cache.append(None)
            else:
                # TODO sort the row dimension by average scores
                debug('sort by averge scores')
                _y_emb_im1, _avg_hi = self.fn_nh(y_im1, _avg_sim1)
                _, _avg_ai = self.fn_na(self.context, self.uh, _avg_hi)
                _avg_si = self.fn_ns(_avg_hi, _avg_ai)
                _avg_moi = self.fn_mo(_y_emb_im1, _avg_ai, _avg_si)
                _avg_scores_i = self.fn_pws(_avg_moi,
                                            self.ptv)  # the larger the better
                _avg_ces_i = self.fn_ce(_avg_scores_i).flatten()
                _next_krank_wids = part_sort(_avg_ces_i, self.k - cnt_transed)
                _cube_krank_scores_i = _cube_krank_ces_ith = _avg_ces_i[
                    _next_krank_wids]

                self.pop_subcube_approx_cache.append(
                    (_y_emb_im1, _avg_hi, _avg_ai, _avg_si, _avg_moi,
                     _avg_scores_i, _next_krank_wids, _cube_krank_ces_ith))
            self.push_subcube_approx_cache.append(None)

            # add cnt for error The truth value of an array with more than one element is ambiguous
            for i, tup in enumerate(leq_class):
                subcube.append([
                    tup +
                    (_avg_sim1, None if _cube_lm_krank_ces_i is None else
                     _cube_lm_krank_ces_i[j], None if
                     _cube_krank_scores_i is None else _cube_krank_scores_i[j],
                     wid, i, j, whichsubcub, each_subcube_rowsz)
                    for j, wid in enumerate(_next_krank_wids)
                ])
                subcube_line_cache.append(None)

            cube.append(subcube)
            self.subcube_lines_cache.append(subcube_line_cache)

            self.printCube(cube)

        return cube
Example #4
0
    def create_cube(self, bidx, eq_classes):
        # eq_classes: (score_im1, y_im1, hi, ai, loc_in_prevb) NEW

        cube = []
        cnt_transed = len(self.translations)
        for whichsubcub, leq_class in eq_classes.iteritems():  # sub cube

            each_subcube_rowsz = len(leq_class)

            score_im1_r0, s_im1_r0, y_im1, y_im2, y_im3, _ = leq_class[0]
            subcube = []
            subcube_line_mergeout = []
            _avg_si, _avg_hi, _avg_ai, _avg_scores_i = None, None, None, None
            _cube_lm_krank_ces_i, _cube_krank_scores_i = None, None

            if each_subcube_rowsz == 1:
                _avg_sim1 = s_im1_r0
            else:
                merged_sim1 = [tup[1] for tup in leq_class]
                _avg_sim1 = numpy.mean(numpy.array(merged_sim1), axis=0)
                # for tup in leq_class: watch the attention prob pi dist here ....

            if self.lm is not None and bidx >= 4:
                # TODO sort the row dimension by language model words distribution
                debug('sort by lm: -3 -2 -1 => {} {} {}'.format(
                    y_im3, y_im2, y_im1))
                if self.ngram == 2:
                    gram = [y_im1]
                elif self.ngram == 3:
                    gram = [y_im1] if y_im2 == -1 else [y_im2, y_im1]
                elif self.ngram == 4:
                    gram = [y_im1] if y_im3 == -1 and y_im2 == -1 else (
                        [y_im2, y_im1]
                        if y_im3 == -1 else [y_im3, y_im2, y_im1])
                else:
                    raise NotImplementedError

                lm_next_logps, next_wids = vocab_prob_given_ngram(
                    self.lm, gram, self.tvcb, self.tvcb_i2w)
                np_lm_next_logps = numpy.asarray(lm_next_logps)
                np_next_wids = numpy.asarray(next_wids)

                np_lm_next_neg_logps = -np_lm_next_logps
                _next_krank_ids = part_sort(np_lm_next_neg_logps,
                                            self.k - cnt_transed)
                _cube_lm_krank_ces_i = np_lm_next_neg_logps[_next_krank_ids]
                _next_krank_wids = np_next_wids[_next_krank_ids]

                for idx in gram:
                    _log(idx if idx == -1 else self.tvcb_i2w[idx] + ' ',
                         nl=False)
                _log('=> ', nl=False)
                for wid in _next_krank_wids:
                    _log('{}({}) '.format(self.tvcb_i2w[wid],
                                          np_lm_next_neg_logps[wid]),
                         nl=False)
                _log('')
                self.approx_items.append(None)
            else:
                # TODO sort the row dimension by average scores
                debug('sort by averge scores')
                _y_emb_im1, _avg_hi = self.fn_nh(y_im1, _avg_sim1)
                _, _avg_ai = self.fn_na(self.context, self.uh, _avg_hi)
                _avg_si = self.fn_ns(_avg_hi, _avg_ai)
                _avg_moi = self.fn_mo(_y_emb_im1, _avg_ai, _avg_si)
                _avg_scores_i = self.fn_pws(_avg_moi,
                                            self.ptv)  # the larger the better
                _avg_scores_i_flat = _avg_scores_i.flatten()
                _next_krank_ids = part_sort(-_avg_scores_i_flat,
                                            self.k - cnt_transed)
                _next_krank_wids = _next_krank_ids
                _cube_krank_scores_i = _avg_scores_i_flat[_next_krank_wids]
                #_avg_ces_i = self.fn_ce(_avg_scores_i).flatten()
                #_cube_krank_scores_i = _avg_ces_i[_next_krank_wids]

                self.approx_items.append(
                    (_y_emb_im1, _avg_hi, _avg_ai, _avg_si, _avg_moi,
                     _avg_scores_i, _next_krank_wids))

            # add cnt for error The truth value of an array with more than one element is ambiguous
            for i, tup in enumerate(leq_class):
                subcube.append([
                    tup +
                    (_avg_sim1, None if _cube_lm_krank_ces_i is None else
                     _cube_lm_krank_ces_i[j], None if
                     _cube_krank_scores_i is None else _cube_krank_scores_i[j],
                     wid, i, j, whichsubcub, each_subcube_rowsz)
                    for j, wid in enumerate(_next_krank_wids)
                ])
                subcube_line_mergeout.append(None)

            cube.append(subcube)
            self.cube_lines_mergeout.append(subcube_line_mergeout)

        # print created cube before generating current beam for debug ...
        debug(
            '\n################################ CUBE ################################'
        )
        nsubcube = len(cube)
        debug('MERGE => ', nl=False)
        for subcube_id in xrange(nsubcube):
            nmergings = len(cube[subcube_id])
            debug('{} '.format(nmergings), nl=False)
        debug('')
        for subcube_id in xrange(nsubcube):
            subcube = cube[subcube_id]
            nmergings = len(subcube)
            debug('Group: {} contains {} mergings:'.format(
                subcube_id, nmergings))
            for mergeid in xrange(nmergings):
                line_in_subcube = subcube[mergeid]
                first_item = line_in_subcube[0]
                score_im1, y_im1 = first_item[0], first_item[2]
                y_im1_w = None if y_im1 == -1 else self.tvcb_i2w[y_im1]
                debug('{}={}({: >7}) => '.format(y_im1, y_im1_w,
                                                 format(score_im1, '0.2f')),
                      nl=False)
                for cubetup in line_in_subcube:
                    wid = cubetup[-5]
                    lm_score = cubetup[-7]
                    model_score = cubetup[-6]
                    debug('{}={}({: >5}&+{: >5}={: >5}) | '.format(
                        wid, self.tvcb_i2w[wid],
                        None if lm_score is None else format(lm_score, '0.2f'),
                        None if model_score is None else format(
                            model_score, '0.2f'),
                        None if model_score is None else format(
                            score_im1 + model_score, '0.2f')),
                          nl=False)
                debug('')
        debug(
            '######################################################################'
        )

        return cube
Example #5
0
    def create_cube_batch(self, bidx, eq_classes):
        # eq_classes: (score_im1, y_im1, hi, ai, loc_in_prevb) NEW
        cube = []
        cnt_transed = len(self.translations)
        batch_y_im1, batch_s_im1, batch_y_emb = [], [], []
        for whichsubcub, leq_class in eq_classes.iteritems():  # sub cube

            each_subcube_rowsz = len(leq_class)
            score_im1_r0, s_im1_r0, y_im1, y_emb_im1, _ = leq_class[0]
            if len(s_im1_r0.shape) == 2:
                s_im1_r0 = s_im1_r0[0]
            subcube_line_cache = []
            _cube_krank_scores_i = None

            batch_y_im1.append(y_im1)
            batch_s_im1.append(s_im1_r0)
            batch_y_emb.append(y_emb_im1[0])
            self.push_subcube_approx_cache.append(None)

        np_batch_s_im1 = numpy.array(batch_s_im1, dtype='float32')
        #np_batch_y_im1 = numpy.array(batch_y_im1)
        np_batch_y_emb = numpy.array(batch_y_emb, dtype='float32')
        subcube_num = len(batch_y_im1)
        ctx = numpy.tile(self.context, [subcube_num, 1])
        uh = numpy.tile(self.uh, [subcube_num, 1])
        if np_batch_s_im1.shape[0] == 1 and 3 == len(np_batch_s_im1.shape):
            np_batch_s_im1 = np_batch_s_im1[0]

        _avg_si, _avg_hi, _avg_ai, _avg_scores_i = None, None, None, None
        if self.ifsplit:
            _avg_hi = self.fn_nh(np_batch_y_emb, np_batch_s_im1)
            _, _avg_ai = self.fn_na(ctx, uh, _avg_hi)
            next_states = self.fn_ns(_avg_hi, _avg_ai)
            _avg_moi = self.fn_mo(np_batch_y_emb, _avg_ai, next_states)
            _avg_scores_i = self.fn_pws(_avg_moi,
                                        self.ptv)  # the larger the better
            next_probs = self.fn_ce(_avg_scores_i)
        else:
            next_probs, next_states = self.fn_next(
                *[batch_y_im1, ctx, np_batch_s_im1])

        for which in range(len(eq_classes)):
            _avg_sim1, leq_class, next_prob = batch_s_im1[which], \
                    eq_classes[which], next_probs[which]
            _avg_si = next_states if len(
                next_states) == 1 else next_states[which]
            each_subcube_rowsz = len(leq_class)
            next_prob_flat = next_prob.flatten()
            _next_krank_wids = part_sort(-next_prob_flat,
                                         self.k - len(self.translations))
            k_avg_loss_flat = -numpy.log(next_prob_flat[_next_krank_wids])

            self.pop_subcube_approx_cache.append(
                (_avg_ai, _avg_si, k_avg_loss_flat))
            # add cnt for error The truth value of an array with more than one element is ambiguous
            subcube = []
            for i, tup in enumerate(leq_class):
                #if i > 1: break
                subcube.append([
                    tup + (_avg_sim1, k_avg_loss_flat[j], wid, i, j, which,
                           each_subcube_rowsz)
                    for j, wid in enumerate(_next_krank_wids)
                ])
                subcube_line_cache.append(None)

            cube.append(subcube)
            self.subcube_lines_cache.append(subcube_line_cache)

        self.printCube(cube)

        return cube
Example #6
0
    def original_trans(self, x):

        x = x[0] if self.ifvalid else x  # numpy ndarray
        # subdict set [0,2,6,29999, 333]
        self.ptv = numpy.asarray(
            x[1], dtype='int32') if self.ifvalid and self.ifmv else None

        # k is the beam size we have
        x = numpy.asarray(x, dtype='int64')
        if x.ndim == 1:
            x = x[None, :]
        src_sent_len = x.shape[1]
        maxlen = src_sent_len * 2
        x = x.T

        sample = []
        sample_score = []

        live_k = 1
        dead_k = 0

        hyp_samples = [[]] * live_k
        hyp_scores = numpy.zeros(live_k).astype('float32')
        hyp_states = []

        # get initial state of decoder rnn and encoder context
        s_im1, ctx0, c_x0 = self.fn_init(x)
        y_im1 = [-1]  # indicator for the first target word (bos target)

        for ii in xrange(maxlen):
            # (src_sent_len, 1, 2*src_nhids) -> (src_sent_len, live_k, 2*src_nhids)
            ctx = numpy.tile(ctx0, [live_k, 1])
            debug('ctx')
            debug(ctx)
            c_x = numpy.tile(c_x0, [live_k, 1])
            debug('y_im1.................................................')
            debug(y_im1)
            debug('s_im1.................................................')
            debug(s_im1)
            yemb_im1, hi = self.fn_nh(y_im1, s_im1)
            debug('hi.................................................')
            debug(hi)
            pi, ai = self.fn_na(ctx, c_x, hi)
            debug('pi.................................................')
            debug(pi)
            debug('ai.................................................')
            debug(ai)
            s_im1 = s_i = self.fn_ns(hi, ai)  # note, s_im1 should be updated!
            debug('si')
            debug(s_i)
            mo = self.fn_mo(yemb_im1, ai, s_i)
            next_scores = self.fn_pws(mo, self.ptv)  # the larger the better

            next_ces = -next_scores if self.ifscore else self.fn_ce(
                next_scores)
            #cand_scores = hyp_scores[:, None] - numpy.log(next_scores)
            cand_scores = hyp_scores[:, None] + next_ces
            debug(str(ii) + ' ===============================================')
            debug('ce... i')
            debug(next_ces)
            cand_flat = cand_scores.flatten()
            # ranks_flat = cand_flat.argsort()[:(k-dead_k)]
            # we do not need to generate k candidate here, because we just need to generate k-dead_k
            # more candidates ending with eos, so for each previous candidate we just need to expand
            # k-dead_k candidates
            ranks_flat = part_sort(cand_flat, self.k - dead_k)
            # print ranks_flat, cand_flat[ranks_flat[1]], cand_flat[ranks_flat[8]]

            voc_size = next_scores.shape[1]
            trans_indices = ranks_flat // voc_size
            word_indices = ranks_flat % voc_size
            costs = cand_flat[ranks_flat]
            debug('ce... prev i')
            debug(costs)

            new_hyp_samples = []
            new_hyp_scores = numpy.zeros(self.k - dead_k).astype('float32')
            new_hyp_states = []

            for idx, [ti, wi] in enumerate(zip(trans_indices, word_indices)):
                new_hyp_samples.append(hyp_samples[ti] + [wi])
                new_hyp_scores[idx] = copy.copy(costs[idx])
                new_hyp_states.append(copy.copy(
                    s_i[ti]))  # here should be s_i !!!

            # check the finished samples
            new_live_k = 0
            hyp_samples = []
            hyp_scores = []
            hyp_states = []
            # current beam, if the hyposise ends with eos, we do not
            for idx in xrange(len(new_hyp_samples)):
                if new_hyp_samples[idx][-1] == self.eos_id:
                    sample.append(new_hyp_samples[idx])
                    sample_score.append(new_hyp_scores[idx])
                    # print new_hyp_scores[idx], new_hyp_samples[idx]
                    dead_k += 1
                else:
                    new_live_k += 1
                    hyp_samples.append(new_hyp_samples[idx])
                    hyp_scores.append(new_hyp_scores[idx])
                    hyp_states.append(new_hyp_states[idx])
            hyp_scores = numpy.array(hyp_scores)
            live_k = new_live_k
            debug('hyp_scores... prev i')
            debug(hyp_scores)
            debug('hyp_samples... prev i')
            for hyp_sample in hyp_samples:
                debug(hyp_sample)

            if new_live_k < 1:
                break
            if dead_k >= self.k:
                break

            y_im1 = numpy.array([w[-1] for w in hyp_samples])
            s_im1 = numpy.array(hyp_states)

        if live_k > 0:
            for idx in xrange(live_k):
                sample.append(hyp_samples[idx])
                sample_score.append(hyp_scores[idx])

        if self.ifnorm:
            lengths = numpy.array([len(s) for s in sample])
            avg_sample_score = sample_score / lengths
        else:
            avg_sample_score = sample_score
        sidx = numpy.argmin(avg_sample_score)

        best_sum_loss = sample_score[sidx]
        best_avg_loss = avg_sample_score[sidx]
        best_trans = sample[sidx]

        _log(
            '@source length[{}], translation length(with eos)[{}], maxlen[{}], avg loss'
            '[{}]={}/{}'.format(src_sent_len, len(best_trans), maxlen,
                                avg_sample_score[sidx], sample_score[sidx],
                                lengths[sidx]))
        _log('init[{}] nh[{}] na[{}] ns[{}] mo[{}] ws[{}] ps[{}] p[{}]'.format(
            *self.lqc))
        return _filter_reidx(self.bos_id, self.eos_id, best_trans,
                             self.tvcb_i2w, self.ifmv, self.ptv)
Example #7
0
    sys.stderr.write('use {}-gram langauge model\n'.format(lm.order))

    state_in = kenlm.State()
    lm.NullContextWrite(state_in)
    v_prev_ngram_w = ['it', 'is', 'revealed']
    v_prev_ngram_w = ['bolivia', 'holds', 'presidential', 'and']
    v_prev_ngram_w = ['organization', 'of', 'american', 'states']
    v_prev_ngram_w = ['according', 'the']

    probs, wids = vocab_prob_given_ngram(
        lm, v_prev_ngram_w, trg_vocab, trg_vocab_i2w, given=False, wid=False)

    np_probs = numpy.asarray(probs)
    np_wids = numpy.asarray(wids)
    probs_id = part_sort(-np_probs, 10)
    # print probs_id
    print np_probs[probs_id]
    print np_wids[probs_id]
    for i in np_wids[probs_id]:
        print trg_vocab_i2w[i],

    # print probs
    '''
    i = 0
    _k_rank_idx = part_sort(nprobs, 10)
    _k_ith_neg_log_prob = nprobs[_k_rank_idx]
    print _k_ith_neg_log_prob
    for idx in _k_rank_idx:
        print words[idx],
    print
Example #8
0
    def beam_search_comb(self, np_src_sent):

        maxlen = self.maxlen
        hyp_scores = np.zeros(1).astype('float32')
        s_init, ctx0, c_x0 = self.fn_init(
            np_src_sent)  # np_src_sent (sl, 1), beam==1
        detail = False
        y_emb_im1 = self.fn_emb([-1])
        init_beam_sm(self.beam,
                     cnt=maxlen,
                     init_state=s_init[0],
                     init_y_emb_im1=y_emb_im1)
        for i in range(1, maxlen + 1):
            # beam search here
            if (i - 1) % 10 == 0:
                debug(str(i - 1))

            prevb = self.beam[i - 1]
            len_prevb = len(prevb)
            cands = []
            # batch states of previous beam
            s_im1 = np.array([b[1] for b in prevb])
            yemb_im1 = np.array([b[3][0] for b in prevb])
            # (src_sent_len, 1, 2*src_nhids) -> (src_sent_len, len_prevb, 2*src_nhids)
            context = np.tile(ctx0, [len_prevb, 1])
            c_x = np.tile(c_x0, [len_prevb, 1])

            if self.ifsplit:
                #yemb_im1, hi = self.fn_nh(y_im1, s_im1)
                #pi, ai = self.fn_na(context, c_x, hi)
                #si = self.fn_ns(hi, ai)
                #mo = self.fn_mo(yemb_im1, ai, si)
                #next_scores = self.fn_pws(mo, self.ptv)
                #next_probs = -next_scores if self.ifscore else self.fn_ce(next_scores)
                hi = self.fn_nh(yemb_im1, s_im1)
                pi, ai = self.fn_na(context, c_x, hi)
                si = self.fn_ns(hi, ai)
                mo = self.fn_mo(yemb_im1, ai, si)
                next_scores = self.fn_pws(mo, self.ptv)
                next_probs = -next_scores if self.ifscore else self.fn_ce(
                    next_scores)
            else:
                y_im1 = np.array([b[2] for b in prevb])
                next_probs, si = self.fn_next(*[y_im1, context, s_im1])

            next_ces = -np.log(next_probs)
            cand_scores = hyp_scores[:, None] + next_ces
            cand_scores_flat = cand_scores.flatten()
            ranks_flat = part_sort(cand_scores_flat,
                                   self.k - len(self.translations))
            voc_size = next_ces.shape[1]
            prevb_id = ranks_flat // voc_size
            word_indices = ranks_flat % voc_size
            costs = cand_scores_flat[ranks_flat]

            for b in zip(costs, si[prevb_id], word_indices, prevb_id):
                if b[2] == self.eos_id:
                    if self.ifnorm:
                        self.translations.append(((b[0] / i), b[0]) + b[2:] +
                                                 (i, ))
                    else:
                        self.translations.append((b[0], ) + b[2:] + (i, ))
                    if len(self.translations) == self.k:
                        # output sentence, early stop, best one in k
                        debug('early stop! see {} samples ending with EOS.'.
                              format(self.k))
                        avg_bp = format(self.locrt[0] / self.locrt[1], '0.3f')
                        debug('average location of back pointers [{}/{}={}]'.
                              format(self.locrt[0], self.locrt[1], avg_bp))
                        sorted_samples = sorted(self.translations,
                                                key=lambda tup: tup[0])
                        best_sample = sorted_samples[0]
                        debug('translation length(with EOS) [{}]'.format(
                            best_sample[-1]))
                        for sample in sorted_samples:  # tuples
                            debug('{}'.format(sample))
                        return back_tracking(self.beam, best_sample, detail)
                else:
                    # should calculate when generate item in current beam
                    self.locrt[0] += (b[-1] + 1)
                    self.locrt[1] += 1
                    self.beam[i].append(
                        (b[0], b[1], b[2], self.fn_emb([b[2]]), b[3]))
            debug('beam {} ----------------------------'.format(i))
            for b in self.beam[i]:
                debug(b[0:1] + b[2:])  # do not output state
            hyp_scores = np.array([b[0] for b in self.beam[i]])

        # no early stop, back tracking
        avg_bp = format(self.locrt[0] / self.locrt[1], '0.3f')
        debug('average location of back pointers [{}/{}={}]'.format(
            self.locrt[0], self.locrt[1], avg_bp))
        if len(self.translations) == 0:
            debug('no early stop, no candidates ends with EOS, selecting from '
                  'len {} candidates, may not end with EOS.'.format(maxlen))
            best_sample = ((self.beam[maxlen][0][0], ) +
                           self.beam[maxlen][0][2:] + (maxlen, ))
            debug('translation length(with EOS) [{}]'.format(best_sample[-1]))
            return back_tracking(self.beam, best_sample, detail)
        else:
            debug(
                'no early stop, not enough {} candidates end with EOS, selecting the best '
                'sample ending with EOS from {} samples.'.format(
                    self.k, len(self.translations)))
            sorted_samples = sorted(self.translations, key=lambda tup: tup[0])
            best_sample = sorted_samples[0]
            debug('translation length(with EOS) [{}]'.format(best_sample[-1]))
            for sample in sorted_samples:  # tuples
                debug('{}'.format(sample))
            return back_tracking(self.beam, best_sample, detail)
Example #9
0
    def beam_search(self, np_src_sent):

        maxlen = self.maxlen
        s_init, context, c_x = self.fn_init(
            np_src_sent)  # np_src_sent (sl, 1), beam==1
        # (1, trg_nhids), (src_len, 1, src_nhids*2)
        detail = False
        y_emb_im1 = self.fn_emb([-1])
        init_beam_sm(self.beam,
                     cnt=maxlen,
                     init_state=s_init,
                     init_y_emb_im1=y_emb_im1)

        for i in range(1, maxlen + 1):
            if (i - 1) % 10 == 0:
                debug(str(i - 1))
            cands = []
            for j in xrange(len(self.beam[i - 1])):  # size of last beam
                # (45.32, (beam, trg_nhids), -1, 0)
                #accum_loss_im1, accum_im1, _, s_im1, y_im1, bp_im1 = self.beam[i - 1][j]
                accum_im1, s_im1, y_im1, yemb_im1, bp_im1 = self.beam[i - 1][j]

                if self.ifsplit:
                    #yemb_im1, hi = self.fn_nh(y_im1, s_im1)
                    #pi, ai = self.fn_na(context, c_x, hi)
                    # pi: (src_len, ) sum == 1
                    #si = self.fn_ns(hi, ai)
                    #mo = self.fn_mo(yemb_im1, ai, si)
                    #next_scores = self.fn_pws(mo, self.ptv)
                    #next_probs = -next_scores if self.ifscore else self.fn_ce(next_scores)
                    hi = self.fn_nh(yemb_im1, s_im1)
                    _, ai = self.fn_na(context, c_x, hi)
                    # pi: (src_len, ) sum == 1
                    si = self.fn_ns(hi, ai)
                    mo = self.fn_mo(yemb_im1, ai, si)
                    next_scores = self.fn_pws(mo, self.ptv)
                    next_probs = -next_scores if self.ifscore else self.fn_ce(
                        next_scores)
                else:
                    next_probs, si = self.fn_next(*[y_im1, context, s_im1])

                next_ces = -np.log(next_probs)
                next_ces_flat = next_ces.flatten()  # (1,vocsize) -> (vocsize,)
                ranks_idx_flat = part_sort(next_ces_flat,
                                           self.k - len(self.translations))
                #ranks_idx_flat = part_sort(next_ces_flat, self.k)
                k_avg_loss_flat = next_ces_flat[
                    ranks_idx_flat]  # -log_p_y_given_x
                # for idx in ranks_idx_flat:
                #    print self.tvcb_i2w[idx],
                # print '\n'

                accum_i = accum_im1 + k_avg_loss_flat
                #accum_loss_i = self.loss_with_nlcp(accum_i, pi, bp_im1, j, i)
                #cands += [(accum_loss_i[idx], accum_i[idx], pi, si, wid, j)
                #          for idx, wid in enumerate(ranks_idx_flat)]
                cands += [(accum_i[idx], si, wid, self.fn_emb([wid]), j)
                          for idx, wid in enumerate(ranks_idx_flat)]

            k_ranks_flat = part_sort(
                np.asarray([cand[0] for cand in cands] + [np.inf]),
                self.k - len(self.translations))
            #k_ranks_flat = part_sort(np.asarray(
            #    [cand[0] for cand in cands] + [np.inf]), self.k)
            k_sorted_cands = [cands[r] for r in k_ranks_flat]

            for b in k_sorted_cands:
                if b[2] == self.eos_id:
                    debug('add: {}'.format(((b[0] / i), b[0]) + b[-2:] +
                                           (i, )))
                    if self.ifnorm:
                        self.translations.append(((b[0] / i), b[0]) + b[-2:] +
                                                 (i, ))
                    else:
                        self.translations.append((b[0], ) + b[-2:] + (i, ))
                    if len(self.translations) == self.k:
                        # output sentence, early stop, best one in k
                        debug('early stop! see {} samples ending with EOS.'.
                              format(self.k))
                        avg_bp = format(self.locrt[0] / self.locrt[1], '0.3f')
                        debug('average location of back pointers [{}/{}={}]'.
                              format(self.locrt[0], self.locrt[1], avg_bp))
                        sorted_samples = sorted(self.translations,
                                                key=lambda tup: tup[0])
                        best_sample = sorted_samples[0]
                        debug('translation length(with EOS) [{}]'.format(
                            best_sample[-1]))
                        for sample in sorted_samples:  # tuples
                            debug('{}'.format(sample))

                        return back_tracking(self.beam, best_sample, detail)
                else:
                    # should calculate when generate item in current beam
                    self.locrt[0] += (b[-1] + 1)
                    self.locrt[1] += 1
                    self.beam[i].append(b)
            debug('beam {} ----------------------------'.format(i))
            for b in self.beam[i]:
                debug(b[0:2] + b[-2:])  # do not output state

        # no early stop, back tracking
        avg_bp = format(self.locrt[0] / self.locrt[1], '0.3f')
        debug('average location of back pointers [{}/{}={}]'.format(
            self.locrt[0], self.locrt[1], avg_bp))
        if len(self.translations) == 0:
            debug('no early stop, no candidates ends with EOS, selecting from '
                  'len {} candidates, may not end with EOS.'.format(maxlen))
            best_sample = (self.beam[maxlen][0][0],
                           ) + self.beam[maxlen][0][-2:] + (maxlen, )
            debug('translation length(with EOS) [{}]'.format(best_sample[-1]))
            return back_tracking(self.beam, best_sample, detail)
        else:
            debug(
                'no early stop, not enough {} candidates end with EOS, selecting the best '
                'sample ending with EOS from {} samples.'.format(
                    self.k, len(self.translations)))
            sorted_samples = sorted(self.translations, key=lambda tup: tup[0])
            best_sample = sorted_samples[0]
            debug('translation length(with EOS) [{}]'.format(best_sample[-1]))
            for sample in sorted_samples:  # tuples
                debug('{}'.format(sample))
            return back_tracking(self.beam, best_sample, detail)