Ejemplo n.º 1
0
def beamsearch(model, seq, mask=None, beamsize=10, normalize=False,
               maxlen=None, minlen=None, dtype=None):
    size = beamsize
    dtype = dtype or theano.config.floatX

    # get vocabulary from the first model
    vocab = model.option["vocabulary"][1][1]
    eosid = model.option["eosid"]
    bosid = model.option["bosid"]

    if maxlen == None:
        maxlen = seq.shape[0] * 3

    if minlen == None:
        minlen = seq.shape[0] / 2

    # encoding source
    if mask is None:
        mask = numpy.ones(seq.shape, dtype)

    annotation, states, mapped_annot = model.encode(seq, mask)

    initial_beam = beam(size)
    # bosid must be 0
    initial_beam.candidate = [[bosid]]
    initial_beam.score = numpy.zeros([1], dtype)

    hypo_list = []
    beam_list = [initial_beam]
    cond = lambda x: x[-1] == eosid

    for k in range(maxlen):
        # get previous results
        prev_beam = beam_list[-1]
        candidate = prev_beam.candidate
        num = len(candidate)
        last_words = numpy.array(map(lambda t: t[-1], candidate), "int32")

        # compute context first, then compute word distribution
        batch_mask = numpy.repeat(mask, num, 1)
        batch_annot = numpy.repeat(annotation, num, 1)
        batch_mannot = numpy.repeat(mapped_annot, num, 1)

        outputs = model.predict(last_words, states, batch_annot, batch_mannot,
                                batch_mask)
        prob_dists, contexts, alpha = outputs
        logprobs = numpy.log(prob_dists)

        # do not generate eos symbol
        if k < minlen:
            logprobs[:, eosid] = -numpy.inf

        # force to add eos symbol
        if k == maxlen - 1:
            # copy
            eosprob = logprobs[:, eosid].copy()
            logprobs[:, :] = -numpy.inf
            logprobs[:, eosid] = eosprob

        next_beam = beam(size)
        outputs = next_beam.prune(logprobs, cond, prev_beam)

        # translation complete
        hypo_list.extend(outputs[0])
        batch_indices, word_indices = outputs[1:]
        size -= len(outputs[0])

        if size == 0:
            break

        # generate next state
        candidate = next_beam.candidate
        num = len(candidate)
        last_words = numpy.array(map(lambda t: t[-1], candidate), "int32")

        states = select_nbest(states, batch_indices)
        contexts = select_nbest(contexts, batch_indices)
        states = model.generate(last_words, states, contexts)

        beam_list.append(next_beam)

    # postprocessing
    if len(hypo_list) == 0:
        score_list = [0.0]
        hypo_list = [[eosid]]
    else:
        score_list = [item[1] for item in hypo_list]
        # exclude bos symbol
        hypo_list = [item[0][1:] for item in hypo_list]

    for i, (trans, score) in enumerate(zip(hypo_list, score_list)):
        count = len(trans)
        if count > 0:
            if normalize:
                score_list[i] = score / count
            else:
                score_list[i] = score

    # sort
    hypo_list = numpy.array(hypo_list)[numpy.argsort(score_list)]
    score_list = numpy.array(sorted(score_list))

    output = []

    for trans, score in zip(hypo_list, score_list):
        trans = map(lambda x: vocab[x], trans)
        output.append((trans, score))

    return output
Ejemplo n.º 2
0
def beamsearch(models,
               seq,
               mask=None,
               beamsize=10,
               normalize=False,
               maxlen=None,
               minlen=None,
               arithmetic=False,
               dtype=None):
    dtype = dtype or theano.config.floatX

    if not isinstance(models, (list, tuple)):
        models = [models]

    num_models = len(models)

    # get vocabulary from the first model
    option = models[0].option
    vocab = option["vocabulary"][1][1]
    eosid = option["eosid"]
    bosid = option["bosid"]

    if maxlen is None:
        maxlen = seq.shape[0] * 3

    if minlen is None:
        minlen = seq.shape[0] / 2

    # encoding source
    if mask is None:
        mask = numpy.ones(seq.shape, dtype)

    outputs = [model.encode(seq, mask) for model in models]

    states = [item[0] for item in outputs]
    annotations0 = [item[1] for item in outputs]
    annotations1 = [item[2] for item in outputs]
    mapped_annots0 = [item[3] for item in outputs]
    mapped_annots1 = [item[4] for item in outputs]
    soft_masks = [item[5] for item in outputs]

    # sys.stderr.write("l-src={}\nl-soft-tgt={}\n".format(numpy.sum(mask), numpy.sum(soft_masks[0])))
    # soft_prob = soft_probs[0]
    # soft_mask = soft_masks[0]
    # soft_y = numpy.argmax(soft_prob, 1)

    initial_beam = beam(beamsize)
    size = beamsize
    # bosid must be 0
    initial_beam.candidates = [[bosid]]
    initial_beam.scores = numpy.zeros([1], dtype)

    hypo_list = []
    beam_list = [initial_beam]
    done_predicate = lambda x: x[-1] == eosid

    for k in range(maxlen):
        # get previous results
        prev_beam = beam_list[-1]
        candidates = prev_beam.candidates
        num = len(candidates)
        last_words = numpy.array(map(lambda cand: cand[-1], candidates),
                                 "int32")

        # compute context first, then compute word distribution
        batch_mask = numpy.repeat(mask, num, 1)
        batch_annots0 = map(numpy.repeat, annotations0, [num] * num_models,
                            [1] * num_models)
        batch_annots1 = map(numpy.repeat, annotations1, [num] * num_models,
                            [1] * num_models)
        batch_mannots0 = map(numpy.repeat, mapped_annots0, [num] * num_models,
                             [1] * num_models)
        batch_mannots1 = map(numpy.repeat, mapped_annots1, [num] * num_models,
                             [1] * num_models)
        batch_soft_mask = map(numpy.repeat, soft_masks, [num] * num_models,
                              [1] * num_models)

        # predict returns [probs, context, alpha]
        outputs = [
            model.predict(last_words, state, annot0, mannot0, batch_mask,
                          annot1, mannot1, softmask)
            for model, state, annot0, annot1, mannot0, mannot1, softmask in
            zip(models, states, batch_annots0, batch_annots1, batch_mannots0,
                batch_mannots1, batch_soft_mask)
        ]
        prob_dists = [item[0] for item in outputs]

        # search nbest given word distribution
        if arithmetic:
            logprobs = numpy.log(sum(prob_dists) / num_models)
        else:
            # geometric mean
            logprobs = sum(numpy.log(prob_dists)) / num_models

        if k < minlen:
            logprobs[:, eosid] = -numpy.inf  # make sure eos won't be selected

        # force to add eos symbol
        if k == maxlen - 1:
            # copy
            eosprob = logprobs[:, eosid].copy()
            logprobs[:, :] = -numpy.inf
            logprobs[:, eosid] = eosprob  # make sure eos will be selected

        next_beam = beam(size)
        finished, remain_beam_indices = next_beam.prune(
            logprobs, done_predicate, prev_beam)

        hypo_list.extend(finished)  # completed translation
        size -= len(finished)

        if size == 0:  # reach k completed translation before maxlen
            break

        # generate next state
        candidates = next_beam.candidates
        num = len(candidates)
        last_words = numpy.array(map(lambda t: t[-1], candidates), "int32")

        if option["decoder"] == "GruSimple":
            contexts = [item[1] for item in outputs]
            states = select_nbest(
                states, remain_beam_indices
            )  # select corresponding states for each model
            contexts = select_nbest(contexts, remain_beam_indices)

            states = [
                model.generate(last_words, state, context)
                for model, state, context in zip(models, states, contexts)
            ]
        elif option["decoder"] == "GruCond":
            states = [item[1] for item in outputs]
            states = select_nbest(
                states, remain_beam_indices
            )  # select corresponding states for each model

        beam_list.append(next_beam)

    # postprocessing
    if len(hypo_list) == 0:
        score_list = [0.0]
        hypo_list = [[eosid]]
    else:
        score_list = [item[1] for item in hypo_list]
        # exclude bos symbol
        hypo_list = [item[0][1:] for item in hypo_list]

    for i, (trans, score) in enumerate(zip(hypo_list, score_list)):
        count = len(trans)
        if count > 0:
            if normalize:
                score_list[i] = score / count
            else:
                score_list[i] = score

    # sort
    hypo_list = numpy.array(hypo_list)[numpy.argsort(score_list)]
    score_list = numpy.array(sorted(score_list))

    output = []

    for trans, score in zip(hypo_list, score_list):
        trans = map(lambda x: vocab[x], trans)
        output.append((trans, score))

    return output
def beamsearch(model,
               seq,
               seqlen=None,
               beamsize=10,
               normalize=False,
               maxlen=None,
               minlen=None):
    size = beamsize
    vocabulary = model.option["vocabulary"]
    eos_symbol = model.option["eos"]

    hidden_size = model.option["hidden"]
    num_layers = model.option["num_layers"]

    encode = model.encode
    predict = model.predict
    vocab = vocabulary[1][1]
    eosid = vocabulary[1][0][eos_symbol]

    batch_dim = 0
    time_dim = 1

    if seqlen is None:
        seq_len = np.array([seq.shape[time_dim]])
    else:
        seq_len = seqlen

    if maxlen is None:
        maxlen = seq_len[0] * 3

    if minlen is None:
        minlen = seq_len[0] / 2

    annotation, enc_attn_mask = encode(seq, seq_len)
    batch = annotation.shape[batch_dim]
    state = {
        "layer_%d" % i: {
            "key": np.zeros([batch, 0, hidden_size], "float32"),
            "value": np.zeros([batch, 0, hidden_size], "float32")
        }
        for i in range(num_layers)
    }

    initial_beam = beam(size)
    initial_beam.candidate = [[eosid]]
    initial_beam.alignment = [[-1]]
    initial_beam.score = np.zeros([1], "float32")

    hypo_list = []
    beam_list = [initial_beam]

    for k in range(maxlen):
        if size == 0:
            break

        prev_beam = beam_list[-1]
        candidate = prev_beam.candidate
        num = len(prev_beam.candidate)
        #last_words = np.array(map(lambda t: t[-1], candidate), "int32")
        partial_translation = np.array(candidate, "int32")

        batch_annot = np.repeat(annotation, num, batch_dim)
        batch_mask = np.repeat(enc_attn_mask, num, batch_dim)

        prob_dist, state = predict(partial_translation, state, batch_annot,
                                   batch_mask)

        logprobs = np.log(prob_dist)

        # force to don't select eos
        if k < minlen:
            logprobs[:, eosid] = -np.inf

        # force to add eos symbol
        if k == maxlen - 1:
            # copy
            eosprob = logprobs[:, eosid].copy()
            logprobs[:, :] = -np.inf
            logprobs[:, eosid] = eosprob

        next_beam = beam(size)
        outputs = next_beam.prune(logprobs, lambda x: x[-1] == eosid,
                                  prev_beam)

        hypo_list.extend(outputs[0])
        batch_indices, word_indices = outputs[1:]
        size -= len(outputs[0])

        new_state = {}
        for key, value in state.items():
            state_k = select_nbest(value["key"], batch_indices)
            state_v = select_nbest(value["value"], batch_indices)

            new_state[key] = {"key": state_k, "value": state_v}

        state = new_state
        beam_list.append(next_beam)

    # postprocessing
    if len(hypo_list) == 0:
        score_list = [0.0]
        hypo_list = [[eosid]]
    else:
        score_list = [item[1] for item in hypo_list]
        # exclude bos symbol
        hypo_list = [item[0][1:] for item in hypo_list]

    for i, (trans, score) in enumerate(zip(hypo_list, score_list)):
        count = len(trans)
        if count > 0:
            if normalize:
                length_penalty = (float(5. + count))**1.0 / (5. + 1.)
                score_list[i] = score / length_penalty
            else:
                score_list[i] = score

    # sort
    hypo_list = np.array(hypo_list)[np.argsort(score_list)]
    score_list = np.array(sorted(score_list))

    output = []

    for trans, score in zip(hypo_list, score_list):
        trans = map(lambda x: vocab[x], trans)
        output.append((trans, score))

    return output
Ejemplo n.º 4
0
def beamsearch(model, seq, length=None, beamsize=10, normalize=False,
               maxlen=None, minlen=None):
    size = beamsize

    # get vocabulary from the first model
    vocab = model.option["vocabulary"][1][1]
    eosid = model.option["eosid"]
    bosid = model.option["bosid"]

    time_dim = 0
    batch_dim = 1

    if maxlen == None:
        maxlen = seq.shape[time_dim] * 3

    if minlen == None:
        minlen = seq.shape[time_dim] / 2

    if length is None:
        seq_len = np.array([seq.shape[time_dim]])
    else:
        seq_len = length

    annotation, initial_state = model.encode(seq, seq_len)
    mapped_states = model.precompute(annotation)

    initial_beam = beam(size)
    # </s>
    initial_beam.candidate = [[bosid]]
    initial_beam.score = np.zeros([1], "float32")

    hypo_list = []
    beam_list = [initial_beam]
    cond = lambda hypo: hypo[-1] == eosid

    state = initial_state

    for k in range(maxlen):
        prev_beam = beam_list[-1]
        candidate = prev_beam.candidate
        num = len(candidate)
        last_words = np.array(map(lambda t: t[-1], candidate), "int32")

        # prediction
        batch_seq_len = np.repeat(seq_len, num, 0)
        batch_annot = np.repeat(annotation, num, batch_dim)
        batch_mannot = np.repeat(mapped_states, num, batch_dim)
        alpha, context = model.align(state, batch_annot, batch_mannot,
                                     batch_seq_len)
        prob_dist = model.predict(last_words, state, context)

        # select nbest
        logprobs = np.log(prob_dist)

        if k < minlen:
            logprobs[:, eosid] = -np.inf

        # force to add eos symbol
        if k == maxlen - 1:
            # copy
            eosprob = logprobs[:, eosid].copy()
            logprobs[:, :] = -np.inf
            logprobs[:, eosid] = eosprob

        next_beam = beam(size)
        outputs = next_beam.prune(logprobs, cond, prev_beam)

        # translation complete
        hypo_list.extend(outputs[0])
        batch_indices, word_indices = outputs[1:]
        size -= len(outputs[0])

        if size == 0:
            break

        state = select_nbest(state, batch_indices)
        context = select_nbest(context, batch_indices)

        # generate next state
        candidate = next_beam.candidate
        num = len(candidate)
        current_words = np.array(map(lambda t: t[-1], candidate), "int32")
        state = model.generate(current_words, state, context)
        beam_list.append(next_beam)

    # postprocessing
    score_list = [item[1] for item in hypo_list]
    # remove BOS symbol
    hypo_list = [item[0][1:] for item in hypo_list]

    for i, (trans, score) in enumerate(zip(hypo_list, score_list)):
        count = len(trans) - 1
        if count > 0:
            if normalize:
                score_list[i] = score / count
            else:
                score_list[i] = score

    hypo_list = np.array(hypo_list)[np.argsort(score_list)]
    score_list = np.array(sorted(score_list))

    output = []

    for trans, score in zip(hypo_list, score_list):
        trans = map(lambda x: vocab[x], trans)
        output.append((trans, score))

    return output
Ejemplo n.º 5
0
def beamsearch(model,
               tvocab,
               itvocab,
               eos_symbol,
               seq,
               seqlen=None,
               beamsize=10,
               normalize=False,
               maxlen=None,
               minlen=None):
    size = beamsize
    encode = model.encode
    predict = model.predict

    eosid = tvocab[eos_symbol]

    time_dim = 0
    batch_dim = 1

    if seqlen is None:
        seq_len = np.array([seq.shape[time_dim]])
    else:
        seq_len = seqlen

    if maxlen is None:
        maxlen = seq_len[0] * 3

    if minlen is None:
        minlen = seq_len[0] / 2

    annotation, mapped_states, initial_state, attn_mask = encode(seq, seq_len)
    state = initial_state

    initial_beam = beam(size)
    initial_beam.candidate = [[eosid]]
    initial_beam.score = np.zeros([1], "float32")

    hypo_list = []
    beam_list = [initial_beam]

    for k in range(maxlen):
        if size == 0:
            break

        prev_beam = beam_list[-1]
        candidate = prev_beam.candidate
        num = len(prev_beam.candidate)
        last_words = np.array(map(lambda t: t[-1], candidate), "int32")

        batch_annot = np.repeat(annotation, num, batch_dim)
        batch_mannot = np.repeat(mapped_states, num, batch_dim)
        batch_mask = np.repeat(attn_mask, num, batch_dim)

        prob_dist, state, alpha = predict(last_words, state, batch_annot,
                                          batch_mannot, batch_mask)

        logprobs = np.log(prob_dist)

        if k < minlen:
            logprobs[:, eosid] = -np.inf

        # force to add eos symbol
        if k == maxlen - 1:
            # copy
            eosprob = logprobs[:, eosid].copy()
            logprobs[:, :] = -np.inf
            logprobs[:, eosid] = eosprob

        next_beam = beam(size)
        outputs = next_beam.prune(logprobs, lambda x: x[-1] == eosid,
                                  prev_beam)

        hypo_list.extend(outputs[0])
        batch_indices, word_indices = outputs[1:]
        size -= len(outputs[0])
        state = select_nbest(state, batch_indices)
        beam_list.append(next_beam)

    # postprocessing
    if len(hypo_list) == 0:
        score_list = [0.0]
        hypo_list = [[eosid]]
    else:
        score_list = [item[1] for item in hypo_list]
        # exclude bos symbol
        hypo_list = [item[0][1:] for item in hypo_list]

    for i, (trans, score) in enumerate(zip(hypo_list, score_list)):
        count = len(trans)
        if count > 0:
            if normalize:
                score_list[i] = score / count
            else:
                score_list[i] = score

    # sort
    hypo_list = np.array(hypo_list)[np.argsort(score_list)]
    score_list = np.array(sorted(score_list))

    output = []

    for trans, score in zip(hypo_list, score_list):
        trans = map(lambda x: itvocab[x], trans)
        output.append((trans, score))

    return output