コード例 #1
0
ファイル: dynety.py プロジェクト: switchfootsid/baseline
def show_examples_dynet(model, es, rlut1, rlut2, embed2, mxlen, sample,
                        prob_clip, max_examples, reverse):
    si = np.random.randint(0, len(es))

    batch_dict = es[si]

    src_array = batch_dict['src']
    tgt_array = batch_dict['dst']
    src_len = batch_dict['src_len']
    if max_examples > 0:
        max_examples = min(max_examples, src_array.shape[0])
        src_array = src_array[0:max_examples]
        tgt_array = tgt_array[0:max_examples]
        src_len = src_len[0:max_examples]

    for src_len_i, src_i, tgt_i in zip(src_len, src_array, tgt_array):

        print(
            '========================================================================'
        )
        src_len_i = np.array(src_len_i, ndmin=1)
        sent = lookup_sentence(rlut1, src_i, reverse=reverse)
        print('[OP] %s' % sent)
        sent = lookup_sentence(rlut2, tgt_i)
        print('[Actual] %s' % sent)
        src_dict = {'src': src_i.reshape(1, -1), 'src_len': src_len_i}
        dst_i = model.run(src_dict)[0]
        sent = lookup_sentence(rlut2, dst_i)
        print('Guess: %s' % sent)
        print(
            '------------------------------------------------------------------------'
        )
コード例 #2
0
ファイル: torchy.py プロジェクト: NickRuiz/baseline
def show_examples_pytorch(model, es, rlut1, rlut2, embed2, mxlen, sample, prob_clip, max_examples, reverse):
    si = np.random.randint(0, len(es))

    batch_dict = es[si]

    src_array = batch_dict['src']
    tgt_array = batch_dict['dst']
    src_len = batch_dict['src_len']


    if max_examples > 0:
        max_examples = min(max_examples, src_array.size(0))
        src_array = src_array[0:max_examples]
        tgt_array = tgt_array[0:max_examples]
        src_len = src_len[0:max_examples]

    # TODO: fix this, check for GPU first
    src_array = src_array.cuda()
    
    for src_len_i, src_i, tgt_i in zip(src_len, src_array, tgt_array):

        print('========================================================================')
        src_len_i = torch.ones(1).fill_(src_len_i).type_as(src_len)

        sent = lookup_sentence(rlut1, src_i.cpu().numpy(), reverse=reverse)
        print('[OP] %s' % sent)
        sent = lookup_sentence(rlut2, tgt_i.cpu().numpy())
        print('[Actual] %s' % sent)
        src_dict = {'src': torch.autograd.Variable(src_i.view(1, -1), requires_grad=False),
                    'src_len': torch.autograd.Variable(src_len_i, requires_grad=False)}
        dst_i = model.run(src_dict)[0][0]
        dst_i = [idx.item() for idx in dst_i]
        sent = lookup_sentence(rlut2, dst_i)
        print('Guess: %s' % sent)
        print('------------------------------------------------------------------------')
コード例 #3
0
def show_examples_tf(model, es, rlut1, rlut2, embed2, mxlen, sample, prob_clip,
                     max_examples, reverse):
    si = np.random.randint(0, len(es))

    batch_dict = es[si]
    src_array = batch_dict['src']
    tgt_array = batch_dict['dst']
    src_len = batch_dict['src_len']

    if max_examples > 0:
        max_examples = min(max_examples, src_array.shape[0])
        src_array = src_array[0:max_examples]
        tgt_array = tgt_array[0:max_examples]
        src_len = src_len[0:max_examples]

    GO = embed2.vocab['<GO>']
    EOS = embed2.vocab['<EOS>']

    for src_len_i, src_i, tgt_i in zip(src_len, src_array, tgt_array):

        print(
            '========================================================================'
        )

        sent = lookup_sentence(rlut1, src_i, reverse=reverse)
        print('[OP] %s' % sent)
        sent = lookup_sentence(rlut2, tgt_i)
        print('[Actual] %s' % sent)
        dst_i = np.zeros((1, mxlen))
        src_i = src_i[np.newaxis, :]
        src_len_i = np.array([src_len_i])
        next_value = GO
        for j in range(mxlen):
            dst_i[0, j] = next_value
            tgt_len_i = np.array([j + 1])
            output = model.step({
                'src': src_i,
                'src_len': src_len_i,
                'dst': dst_i,
                'dst_len': tgt_len_i
            })[j]
            if sample is False:
                next_value = np.argmax(output)
            else:
                # This is going to zero out low prob. events so they are not
                # sampled from
                next_value = beam_multinomial(prob_clip, output)

            if next_value == EOS:
                break

        sent = lookup_sentence(rlut2, dst_i.squeeze())
        print('Guess: %s' % sent)
        print(
            '------------------------------------------------------------------------'
        )
コード例 #4
0
ファイル: tfy.py プロジェクト: tanthml/baseline
def show_examples_tf(model, es, rlut1, rlut2, vocab, mxlen, sample, prob_clip,
                     max_examples, reverse):
    si = np.random.randint(0, len(es))

    batch_dict = es[si]
    i = 0
    src_lengths_key = model.src_lengths_key
    src_key = src_lengths_key.split('_')[0]
    while True:

        example = {}
        for k in batch_dict.keys():
            if i >= len(batch_dict[k]):
                return
            example[k] = batch_dict[k][i]
        print(
            '========================================================================'
        )

        src_i = example[src_key]
        src_len_i = example[src_lengths_key]
        tgt_i = example['tgt']

        sent = lookup_sentence(rlut1, src_i, reverse=reverse)
        print('[OP] %s' % sent)
        sent = lookup_sentence(rlut2, tgt_i)
        print('[Actual] %s' % sent)
        tgt_i = np.zeros((1, mxlen))
        example['tgt'] = tgt_i
        src_i = src_i[np.newaxis, :]
        example[src_key] = src_i
        example[src_lengths_key] = np.array([src_len_i])
        next_value = Offsets.GO
        for j in range(mxlen):
            tgt_i[0, j] = next_value
            tgt_len_i = np.array([j + 1])
            example['tgt_lengths'] = tgt_len_i
            output = model.step(example).squeeze()[j]
            if sample is False:
                next_value = np.argmax(output)
            else:
                # This is going to zero out low prob. events so they are not
                # sampled from
                next_value = beam_multinomial(prob_clip, output)

            if next_value == Offsets.EOS:
                break

        sent = lookup_sentence(rlut2, tgt_i.squeeze())
        print('Guess: %s' % sent)
        print(
            '------------------------------------------------------------------------'
        )
        i += 1
        if i == max_examples:
            return
コード例 #5
0
def show_examples_pytorch(model, es, rlut1, rlut2, embed2, mxlen, sample, prob_clip, max_examples, reverse):
    si = np.random.randint(0, len(es))

    src_array, tgt_array, src_len, _ = es[si]

    if max_examples > 0:
        max_examples = min(max_examples, src_array.size(0))
        src_array = src_array[0:max_examples]
        tgt_array = tgt_array[0:max_examples]
        src_len = src_len[0:max_examples]

    GO = embed2.vocab['<GO>']
    EOS = embed2.vocab['<EOS>']

    # TODO: fix this, check for GPU first
    src_array = src_array.cuda()
    
    for src_len,src_i,tgt_i in zip(src_len, src_array, tgt_array):

        print('========================================================================')

        sent = lookup_sentence(rlut1, src_i.cpu().numpy(), reverse=reverse)
        print('[OP] %s' % sent)
        sent = lookup_sentence(rlut2, tgt_i)
        print('[Actual] %s' % sent)
        dst_i = torch.zeros(1, mxlen).long()
        #if use_gpu:
        dst_i = dst_i.cuda()

        next_value = GO
        src_i = src_i.view(1, -1)
        for j in range(mxlen):
            dst_i[0,j] = next_value
            probv = model((torch.autograd.Variable(src_i), torch.autograd.Variable(dst_i)))
            output = probv.squeeze()[j]
            if sample is False:
                _, next_value = torch.max(output, 0)
                next_value = int(next_value.data[0])
            else:
                probs = output.data.exp()
                # This is going to zero out low prob. events so they are not
                # sampled from
                best, ids = probs.topk(prob_clip, 0, largest=True, sorted=True)
                probs.zero_()
                probs.index_copy_(0, ids, best)
                probs.div_(torch.sum(probs))
                fv = torch.multinomial(probs, 1)[0]
                next_value = fv

            if next_value == EOS:
                break

        sent = lookup_sentence(rlut2, dst_i.squeeze())
        print('Guess: %s' % sent)
        print('------------------------------------------------------------------------')
コード例 #6
0
ファイル: tfy.py プロジェクト: dpressel/baseline
def show_examples_tf(model, es, rlut1, rlut2, vocab, mxlen, sample, prob_clip, max_examples, reverse):
    si = np.random.randint(0, len(es))

    batch_dict = es[si]
    i = 0
    src_lengths_key = model.src_lengths_key
    src_key = src_lengths_key.split('_')[0]
    while True:

        example = {}
        for k in batch_dict.keys():
            if i >= len(batch_dict[k]):
                return
            example[k] = batch_dict[k][i]
        print('========================================================================')

        src_i = example[src_key]
        src_len_i = example[src_lengths_key]
        tgt_i = example['tgt']

        sent = lookup_sentence(rlut1, src_i, reverse=reverse)
        print('[OP] %s' % sent)
        sent = lookup_sentence(rlut2, tgt_i)
        print('[Actual] %s' % sent)
        tgt_i = np.zeros((1, mxlen))
        example['tgt'] = tgt_i
        src_i = src_i[np.newaxis, :]
        example[src_key] = src_i
        example[src_lengths_key] = np.array([src_len_i])
        next_value = Offsets.GO
        for j in range(mxlen):
            tgt_i[0, j] = next_value
            tgt_len_i = np.array([j+1])
            example['tgt_lengths'] = tgt_len_i
            output = model.step(example).squeeze()[j]
            if sample is False:
                next_value = np.argmax(output)
            else:
                # This is going to zero out low prob. events so they are not
                # sampled from
                next_value = beam_multinomial(prob_clip, output)

            if next_value == Offsets.EOS:
                break

        sent = lookup_sentence(rlut2, tgt_i.squeeze())
        print('Guess: %s' % sent)
        print('------------------------------------------------------------------------')
        i += 1
        if i == max_examples:
            return
コード例 #7
0
ファイル: torchy.py プロジェクト: LGDkobe24/baseline
def show_examples_pytorch(model, es, rlut1, rlut2, embed2, mxlen, sample,
                          prob_clip, max_examples, reverse):
    si = np.random.randint(0, len(es))

    batch_dict = es[si]

    src_array = batch_dict['src']
    tgt_array = batch_dict['dst']
    src_len = batch_dict['src_len']

    if max_examples > 0:
        max_examples = min(max_examples, src_array.size(0))
        src_array = src_array[0:max_examples]
        tgt_array = tgt_array[0:max_examples]
        src_len = src_len[0:max_examples]

    # TODO: fix this, check for GPU first
    src_array = src_array.cuda()

    for src_len, src_i, tgt_i in zip(src_len, src_array, tgt_array):

        print(
            '========================================================================'
        )

        sent = lookup_sentence(rlut1, src_i.cpu().numpy(), reverse=reverse)
        print('[OP] %s' % sent)
        sent = lookup_sentence(rlut2, tgt_i)
        print('[Actual] %s' % sent)

        dst_i, scores = beam_decode(
            model,
            torch.autograd.Variable(src_i.view(1, -1), requires_grad=False), 1)
        sent = lookup_sentence(rlut2, dst_i[0])
        print('Guess: %s' % sent)
        print(
            '------------------------------------------------------------------------'
        )
コード例 #8
0
ファイル: services.py プロジェクト: wenshuoliu/baseline
 def format_output(self, predicted, K=1, **kwargs):
     results = []
     B = len(predicted)
     for i in range(B):
         N = len(predicted[i])
         n_best_result = []
         for n in range(min(K, N)):
             n_best = predicted[i][n]
             out = lookup_sentence(self.tgt_idx_to_token, n_best).split()
             if K == 1:
                 results += [out]
             else:
                 n_best_result += [out]
         if K > 1:
             results.append(n_best_result)
     return results
コード例 #9
0
    def predict(self, tokens, K=1, **kwargs):
        tokens_seq, mxlen, mxwlen = self.batch_input(tokens)
        self.set_vectorizer_lens(mxlen, mxwlen)
        examples = self.vectorize(tokens_seq)

        kwargs['beam'] = int(kwargs.get('beam', K))
        outcomes = self.model.predict(examples, **kwargs)

        results = []
        B = len(outcomes)
        for i in range(B):
            N = len(outcomes[i])
            n_best_result = []
            for n in range(min(K, N)):
                n_best = outcomes[i][n]
                out = lookup_sentence(self.tgt_idx_to_token, n_best).split()
                if K == 1:
                    results += [out]
                else:
                    n_best_result += [out]
            if K > 1:
                results.append(n_best_result)
        return results
コード例 #10
0
ファイル: services.py プロジェクト: dpressel/baseline
    def predict(self, tokens, K=1, **kwargs):
        tokens_seq, mxlen, mxwlen = self.batch_input(tokens)
        self.set_vectorizer_lens(mxlen, mxwlen)
        examples = self.vectorize(tokens_seq)

        kwargs['beam'] = kwargs.get('beam', K)
        outcomes = self.model.predict(examples, **kwargs)

        results = []
        B = len(outcomes)
        for i in range(B):
            N = len(outcomes[i])
            n_best_result = []
            for n in range(min(K, N)):
                n_best = outcomes[i][n]
                out = lookup_sentence(self.tgt_idx_to_token, n_best).split()
                if K == 1:
                    results += [out]
                else:
                    n_best_result += [out]
            if K > 1:
                results.append(n_best_result)
        return results