예제 #1
0
def model_perplexity(
    model,
    src,
    src_test,
    trg,
    trg_test,
    config,
    loss_criterion,
    src_valid=None,
    trg_valid=None,
    verbose=False,
):
    """Compute model perplexity."""
    # Get source minibatch
    losses = []
    for j in xrange(0,
                    len(src_test['data']) // 100,
                    config['data']['batch_size']):
        input_lines_src, output_lines_src, lens_src, mask_src = get_minibatch(
            src_test['data'],
            src['word2id'],
            j,
            config['data']['batch_size'],
            config['data']['max_src_length'],
            add_start=True,
            add_end=True)
        input_lines_src = Variable(input_lines_src.data, volatile=True)
        output_lines_src = Variable(input_lines_src.data, volatile=True)
        mask_src = Variable(mask_src.data, volatile=True)

        # Get target minibatch
        input_lines_trg_gold, output_lines_trg_gold, lens_src, mask_src = (
            get_minibatch(trg_test['data'],
                          trg['word2id'],
                          j,
                          config['data']['batch_size'],
                          config['data']['max_trg_length'],
                          add_start=True,
                          add_end=True))
        input_lines_trg_gold = Variable(input_lines_trg_gold.data,
                                        volatile=True)
        output_lines_trg_gold = Variable(output_lines_trg_gold.data,
                                         volatile=True)
        mask_src = Variable(mask_src.data, volatile=True)

        decoder_logit = model(input_lines_src, input_lines_trg_gold)

        loss = loss_criterion(
            decoder_logit.contiguous().view(-1, decoder_logit.size(2)),
            output_lines_trg_gold.view(-1))

        losses.append(loss.data[0])

    return np.exp(np.mean(losses))
예제 #2
0
    def translate(self):
        """Translate the whole dataset."""
        trg_preds = []
        trg_gold = []
        output_res = open(self.output,'w')
        for j in xrange(
            0, len(self.src['data']),
            self.config['data']['batch_size']
        ):
            """Decode a single minibatch."""
            print('Decoding %d out of %d ' % (j, len(self.src['data'])))
            hypotheses, scores = decoder.decode_batch(j)
            all_hyp_inds = [[x[0] for x in hyp] for hyp in hypotheses]
            all_preds = [
                ' '.join([trg['id2word'][x] for x in hyp[:-1]])
                for hyp in all_hyp_inds
            ]



            # Get target minibatch
            input_lines_trg_gold, output_lines_trg_gold, lens_src, mask_src = (
                get_minibatch(
                    self.trg['data'], self.tgt_dict, j,
                    self.config['data']['batch_size'],
                    self.config['data']['max_trg_length'],
                    is_gui=False, add_start=True, add_end=True
                )
            )

            output_lines_trg_gold = output_lines_trg_gold.data.cpu().numpy()
            all_gold_inds = [[x for x in hyp] for hyp in output_lines_trg_gold]
            all_gold = [
                ' '.join([trg['id2word'][x] for x in hyp[:-1]])
                for hyp in all_gold_inds
            ]

            trg_preds += all_preds
            trg_gold += all_gold

        output_res.writelines('\n'.join(trg_preds))
        bleu_score = get_bleu(trg_preds, trg_gold)
        output_res.close()
        print('BLEU : %.5f ' % (bleu_score))
예제 #3
0
    def translate(self):
        """Translate the whole dataset."""
        trg_preds = []
        trg_gold = []
        for j in range(
            0, len(self.src['data']),
            self.config['data']['batch_size']
        ):
            """Decode a single minibatch."""
            print('Decoding %d out of %d ' % (j, len(self.src['data'])))
            hypotheses, scores = self.decode_batch(j)
            all_hyp_inds = [[x[0] for x in hyp] for hyp in hypotheses]
            all_preds = [
                ' '.join([self.trg['id2word'][x] for x in hyp])
                for hyp in all_hyp_inds
            ]

            # Get target minibatch
            input_lines_trg_gold, output_lines_trg_gold, lens_src, mask_src = (
                get_minibatch(
                    self.trg['data'], self.tgt_dict, j,
                    self.config['data']['batch_size'],
                    self.config['data']['max_trg_length'],
                    add_start=True, add_end=True, use_cuda=self.use_cuda
                )
            )

            output_lines_trg_gold = output_lines_trg_gold.data.cpu().numpy()
            all_gold_inds = [[x for x in hyp] for hyp in output_lines_trg_gold]
            all_gold = [
                ' '.join([self.trg['id2word'][x] for x in hyp])
                for hyp in all_gold_inds
            ]

            trg_preds += all_preds
            trg_gold += all_gold
        print("investigate some preds and golds.....")
        print("trg_preds: ", trg_preds[0])
        print("trg_gold: ", trg_gold[0])
        bleu_score = get_bleu(trg_preds, trg_gold)

        # print('BLEU : %.5f ' % (bleu_score))
        return bleu_score
예제 #4
0
    def translate(self):
        """Translate the whole dataset."""
        trg_preds = []
        trg_gold = []
        for j in xrange(
            0, len(self.src['data']),
            self.config['data']['batch_size']
        ):
            """Decode a single minibatch."""
            #print 'Decoding %d out of %d ' % (j, len(self.src['data']))
            hypotheses, scores = decoder.decode_batch(j)
            all_hyp_inds = [[x[0] for x in hyp] for hyp in hypotheses]
            all_preds = [
                ' '.join([trg['id2word'][x.item()] for x in hyp])
                for hyp in all_hyp_inds
            ]

            # Get target minibatch
            input_lines_trg_gold, output_lines_trg_gold, lens_src, mask_src = (
                get_minibatch(
                    self.trg['data'], self.tgt_dict, j,
                    self.config['data']['batch_size'],
                    self.config['data']['max_trg_length'],
                    add_start=True, add_end=True
                )
            )

            output_lines_trg_gold = output_lines_trg_gold.data.cpu().numpy()
            all_gold_inds = [[x for x in hyp] for hyp in output_lines_trg_gold]
            all_gold = [
                ' '.join([trg['id2word'][x] for x in hyp])
                for hyp in all_gold_inds
            ]

            trg_preds += all_preds
            trg_gold += all_gold
            for p in all_preds:
               print p.replace('<s>', '').replace('</s>', '').strip()
예제 #5
0
    def decode_batch(self, idx):
        """Decode a minibatch."""
        # Get source minibatch
        input_lines_src, output_lines_src, lens_src, mask_src = get_minibatch(
            self.src['data'],
            self.src_dict,
            idx,
            self.config['data']['batch_size'],
            self.config['data']['max_src_length'],
            add_start=True,
            add_end=True)
        #print(self.src_dict)
        '''
        lines = [
                ['<s>'] + line + ['</s>']
                for line in self.src['data'][idx:idx + self.config['data']['max_src_length']]
                ]
        lines = [line[:self.config['data']['max_src_length']] for line in lines]
        lens = [len(line) for line in lines]
        max_len = max(lens)
        word2ind = self.src_dict
        input_lines = [
                [word2ind[w] if w in word2ind else word2ind['<unk>'] for w in line[:-1]] +
                [word2ind['<pad>']] * (max_len - len(line))
                for line in lines
                ]
        #print(len(input_lines))
        #print(input_lines_src[0])
        '''
        #id2word_src = {v: k for k, v in self.src_dict.iteritems()}
        #inp = input_lines_src[0].data.cpu().numpy().tolist()
        #print([inv_dict[a] for a in inp])
        beam_size = self.beam_size

        #  (1) run the encoder on the src

        context_h, (
            context_h_t,
            context_c_t) = self.get_hidden_representation(input_lines_src)

        context_h = context_h.transpose(0, 1)  # Make things sequence first.

        #  (3) run the decoder to generate sentences, using beam search

        batch_size = context_h.size(1)

        # Expand tensors for each beam.
        context = Variable(context_h.data.repeat(1, beam_size, 1))
        #print context.size()
        dec_states = [
            Variable(context_h_t.data.repeat(1, beam_size, 1)),
            Variable(context_c_t.data.repeat(1, beam_size, 1))
        ]

        finite_state_machines = [
            FSMBeamSearch(input_lines_src[k][1:]) for k in range(batch_size)
        ]

        beam = [
            Beam(beam_size,
                 self.tgt_dict,
                 self.id2word_src,
                 trg['id2word'],
                 cuda=True) for k in range(batch_size)
        ]
        '''fsms = []
        for k in range(batch_size):
            tuple = input_lines_src.data[k][1:]
            constraint_ids = filter(lambda s: s != self.src_dict["EmptyParameter"], tuple)
            fsm = FSMBeamSearch(tuple, constraint_ids, beam_size, self.tgt_dict, 
                self.id2word_src, trg['id2word'], cuda=True)
            fsms.append(fsm)'''

        dec_out = self.get_init_state_decoder(dec_states[0].squeeze(0))
        dec_states[0] = dec_out
        #print(dec_states[0].size())

        batch_idx = list(range(batch_size))
        remaining_sents = batch_size

        for i in range(batch_size):
            # initial state is 0000
            # and if some parameters are EmptyParameter
            # if tuple is (xxx, yyy, EmptyParameter, EmptyParameter)
            # then the state will be 1100
            state = 0
            input_event = [
                self.id2word_src[e]
                for e in input_lines_src[i].data.cpu().numpy().tolist()
            ][1:]
            #print(input_event)
            for j, evt in enumerate(input_event):
                if evt == "EmptyParameter" or evt == "<unk>":
                    state = state | (1 << j)
            #this state will be the initial state
            # print(i, state, input_event)
            finite_state_machines[i].beams[state] = beam[i]
            finite_state_machines[i].evt_tokens = input_event
            finite_state_machines[i].num_states += 1
            finite_state_machines[i].dec_states[state] = [
                dec_states[0][i * beam_size:(i + 1) * beam_size, :].clone(),
                dec_states[1][:, i * beam_size:(i + 1) * beam_size, :].clone()
            ]
            finite_state_machines[i].context[state] = context[0][
                i * beam_size:(i + 1) * beam_size, :].clone()

        for i in range(self.config['data']['max_trg_length']):
            #print(i)
            current_states = []
            for b in finite_state_machines:
                if not b.done:
                    current_states += b.get_current_state()
            input = torch.stack(current_states).t().contiguous().view(1, -1)

            trg_emb = self.model.trg_embedding(Variable(input).transpose(1, 0))
            #print trg_emb.size()
            #print dec_states[0].size(), dec_states[1].size()
            #print context.size()
            trg_h, (trg_h_t, trg_c_t) = self.model.decoder(
                trg_emb, (dec_states[0].squeeze(0), dec_states[1].squeeze(0)),
                context)

            dec_states = (trg_h_t.unsqueeze(0), trg_c_t.unsqueeze(0))

            dec_out = trg_h_t.squeeze(1).view(-1, self.model.trg_hidden_dim)
            #print dec_out.size()
            out = F.softmax(self.model.decoder2vocab(dec_out)).unsqueeze(0)

            word_lk = out.view(beam_size, len(current_states),
                               -1).transpose(0, 1).contiguous()

            active = []
            cur = 0
            for b in range(batch_size):
                if finite_state_machines[b].done:
                    continue

                idx = batch_idx[b]
                #print(idx, len(lines), input_lines_src.size())
                cur_state_size = finite_state_machines[b].num_states
                # print(b, cur_state_size)
                # print(dec_states[0][:,cur*self.beam_size:(cur+cur_state_size)*self.beam_size,:].size(), dec_states[1][:,cur*self.beam_size:(cur+cur_state_size)*self.beam_size,:].size())
                # print(context[:,cur*self.beam_size:(cur+cur_state_size)*self.beam_size,:].size())
                if not finite_state_machines[b].advance(
                        word_lk.data[cur:(cur + cur_state_size)],
                        input_lines_src[idx],
                    [
                        dec_states[0][:, cur *
                                      self.beam_size:(cur + cur_state_size) *
                                      self.beam_size, :].clone(),
                        dec_states[1][:, cur *
                                      self.beam_size:(cur + cur_state_size) *
                                      self.beam_size, :].clone()
                    ], context[:, cur * self.beam_size:(cur + cur_state_size) *
                               self.beam_size, :].clone()):
                    active += [b]

                cur += cur_state_size

                # for dec_state in dec_states:  # iterate over h, c
                #     # layers x beam*sent x dim
                #     #print dec_state.size(1), dec_state.size(2), dec_state.size(3)
                #     state_size = dec_state.size(1) * dec_state.size(3) if self.model.nlayers_trg > 1 else dec_state.size(2)
                #     sent_states = dec_state.view(
                #         -1, beam_size, len(current_states), state_size
                #     )[:, :, idx]
                #     sent_states.data.copy_(
                #         sent_states.data.index_select(
                #             1,
                #             beam[b].get_current_origin()
                #         )
                #     )

            if not active:
                break

            # in this section, the sentences that are still active are
            # compacted so that the decoder is not run on completed sentences
            active_idx = torch.cuda.LongTensor([batch_idx[k] for k in active])
            batch_idx = {beam: idx for idx, beam in enumerate(active)}

            # def update_active(t):
            #     # select only the remaining active sentences
            #     view = t.data.view(
            #         -1, remaining_sents,
            #         self.model.decoder.hidden_size
            #     )
            #     new_size = list(t.size())
            #     new_size[-2] = new_size[-2] * len(active_idx) \
            #         // remaining_sents
            #     return Variable(view.index_select(
            #         1, active_idx
            #     ).view(*new_size))

            new_dec_states0 = None
            new_dec_states1 = None
            new_context = None
            #update active dec_states
            for k in range(batch_size):
                if not finite_state_machines[k].done:
                    fsm_dec_states0, fsm_dec_states1 = finite_state_machines[
                        k].get_dec_states()
                    if new_dec_states0 is None:
                        new_dec_states0 = fsm_dec_states0.clone()
                    else:
                        new_dec_states0 = torch.cat(
                            (new_dec_states0, fsm_dec_states0.clone()), 1)

                    if new_dec_states1 is None:
                        new_dec_states1 = fsm_dec_states1.clone()
                    else:
                        new_dec_states1 = torch.cat(
                            (new_dec_states1.clone(), fsm_dec_states1.clone()),
                            1)

                    fsm_context = finite_state_machines[k].get_context()
                    if new_context is None:
                        new_context = fsm_context.clone()
                    else:
                        new_context = torch.cat(
                            (new_context, fsm_context.clone()), 1)

            dec_states = (new_dec_states0, new_dec_states1)
            context = new_context

            remaining_sents = len(active)

        #  (4) package everything up

        allHyp, allScores = [], []
        n_best = 1

        for b in range(batch_size):
            hyps, scores = finite_state_machines[b].get_hyp()
            hyps = [(hyp[0].item(), ) for hyp in hyps]
            allScores += [scores]
            allHyp += [hyps]

        return allHyp, allScores
예제 #6
0
    def translate(self):
        """Evaluate model."""
        preds = []
        ground_truths = []
        for j in xrange(0, len(self.src['data']),
                        self.config['data']['batch_size']):

            #print 'Decoding : %d out of %d ' % (j, len(self.src['data']))
            # Get source minibatch
            input_lines_src, output_lines_src, lens_src, mask_src = (
                get_minibatch(self.src['data'],
                              self.src['word2id'],
                              j,
                              self.config['data']['batch_size'],
                              self.config['data']['max_src_length'],
                              add_start=True,
                              add_end=True))

            input_lines_src = Variable(input_lines_src.data, volatile=True)
            output_lines_src = Variable(output_lines_src.data, volatile=True)
            mask_src = Variable(mask_src.data, volatile=True)

            # Get target minibatch
            input_lines_trg_gold, output_lines_trg_gold, lens_src, mask_src = (
                get_minibatch(self.trg['data'],
                              self.trg['word2id'],
                              j,
                              self.config['data']['batch_size'],
                              self.config['data']['max_trg_length'],
                              add_start=True,
                              add_end=True))

            input_lines_trg_gold = Variable(input_lines_trg_gold.data,
                                            volatile=True)
            output_lines_trg_gold = Variable(output_lines_trg_gold.data,
                                             volatile=True)
            mask_src = Variable(mask_src.data, volatile=True)

            # Initialize target with <s> for every sentence
            input_lines_trg = Variable(torch.LongTensor(
                [[trg['word2id']['<s>']]
                 for i in xrange(input_lines_src.size(0))]),
                                       volatile=True).cuda()

            # Decode a minibatch greedily add beam search decoding
            input_lines_trg = self.decode_minibatch(input_lines_src,
                                                    input_lines_trg,
                                                    output_lines_trg_gold)

            # Copy minibatch outputs to cpu and convert ids to words
            input_lines_trg = input_lines_trg.data.cpu().numpy()
            input_lines_trg = [[self.trg['id2word'][x] for x in line]
                               for line in input_lines_trg]

            # Do the same for gold sentences
            output_lines_trg_gold = output_lines_trg_gold.data.cpu().numpy()
            output_lines_trg_gold = [[self.trg['id2word'][x] for x in line]
                                     for line in output_lines_trg_gold]

            # Process outputs
            for sentence_pred, sentence_real, sentence_real_src in zip(
                    input_lines_trg, output_lines_trg_gold, output_lines_src):
                if '</s>' in sentence_pred:
                    index = sentence_pred.index('</s>')
                else:
                    index = len(sentence_pred)
                preds.append(['<s>'] + sentence_pred[:index + 1])

                if '</s>' in sentence_real:
                    index = sentence_real.index('</s>')
                else:
                    index = len(sentence_real)

                ground_truths.append(['<s>'] + sentence_real[:index + 1])
                #trg_preds += preds
                #trg_gold += all_gold
                for p in preds:
                    print " ".join(p).replace('</s>', '').strip()
예제 #7
0
    def predict(self, semantic_frame):

        intent = semantic_frame['diaact']
        if 'request' in semantic_frame['diaact']:
            slots = [intent] + [
                k for k in list(semantic_frame['request_slots'].keys())
            ]
            slot2word = semantic_frame['request_slots']
        else:
            slots = [intent] + [
                k for k in list(semantic_frame['inform_slots'].keys())
            ]
            slot2word = semantic_frame['inform_slots']

        src_new = {
            'data': [slots],
            'word2id': self.src_word2id,
            'id2word': self.src_id2word
        }
        trg_new = {
            'data': [['人生', '好', '困難', '到底', '該', '怎麼辦', '呢']],
            'word2id': self.trg_word2id,
            'id2word': self.trg_id2word
        }

        preds = []

        input_lines_src, output_lines_src, lens_src, mask_src = get_minibatch(
            src_new['data'],
            src_new['word2id'],
            0,
            1,
            10,
            add_start=True,
            add_end=True)
        input_lines_trg_gold, output_lines_trg_gold, lens_trg, mask_trg = get_minibatch(
            trg_new['data'],
            trg_new['word2id'],
            0,
            1,
            20,
            add_start=True,
            add_end=True)

        # Initialize target with <s> for every sentence
        input_lines_trg = Variable(
            torch.LongTensor([[self.trg_word2id['<s>']]
                              for i in range(input_lines_src.size(0))]))

        input_lines_trg = decode_minibatch(self.config, self.model,
                                           input_lines_src, input_lines_trg,
                                           output_lines_trg_gold)

        # Copy minibatch outputs to cpu and convert ids to words
        input_lines_trg = input_lines_trg.data.cpu().numpy()
        input_lines_trg = [[self.trg_id2word[x] for x in line]
                           for line in input_lines_trg]

        output_lines_trg_gold = output_lines_trg_gold.data.cpu().numpy()
        output_lines_trg_gold = [[self.trg_id2word[x] for x in line]
                                 for line in output_lines_trg_gold]

        # Process outputs
        for sentence_pred, sentence_real, sentence_real_src in zip(
                input_lines_trg, output_lines_trg_gold, output_lines_src):
            if '</s>' in sentence_pred:
                index = sentence_pred.index('</s>')
            else:
                index = len(sentence_pred)

            preds.append([
                slot2word[word] if word in slot2word.keys() else word
                for word in sentence_pred[:index + 1]
            ])
            """
            print("Predict: {}".format(' '.join(sentence_pred[:index + 1])))
            if '</s>' in sentence_real:
                index = sentence_real.index('</s>')
            else:
                index = len(sentence_real)
            print("RealAns: {}".format(' '.join(['<s>'] + sentence_real[:index + 1])))
            print('===========================================')
            """

        return ''.join(preds[0][1:-1])
예제 #8
0
    def decode_batch(self, idx):
        """Decode a minibatch."""
        # Get source minibatch
        input_lines_src, output_lines_src, lens_src, mask_src = get_minibatch(
            self.src['data'],
            self.src_dict,
            idx,
            self.config['data']['batch_size'],
            self.config['data']['max_src_length'],
            add_start=True,
            add_end=True)

        beam_size = self.beam_size

        #  (1) run the encoder on the src

        context_h, (
            context_h_t,
            context_c_t) = self.get_hidden_representation(input_lines_src)

        context_h = context_h.transpose(0, 1)  # Make things sequence first.

        #  (3) run the decoder to generate sentences, using beam search

        batch_size = context_h.size(1)

        # Expand tensors for each beam.
        context = Variable(context_h.data.repeat(1, beam_size, 1))
        dec_states = [
            Variable(context_h_t.data.repeat(1, beam_size, 1)),
            Variable(context_c_t.data.repeat(1, beam_size, 1))
        ]

        beam = [
            Beam(beam_size, self.tgt_dict, cuda=True)
            for k in range(batch_size)
        ]

        dec_out = self.get_init_state_decoder(dec_states[0].squeeze(0))
        dec_states[0] = dec_out

        batch_idx = list(range(batch_size))
        remaining_sents = batch_size

        for i in range(self.config['data']['max_trg_length']):

            input = torch.stack([
                b.get_current_state() for b in beam if not b.done
            ]).t().contiguous().view(1, -1)

            trg_emb = self.model.trg_embedding(Variable(input).transpose(1, 0))
            trg_h, (trg_h_t, trg_c_t) = self.model.decoder(
                trg_emb, (dec_states[0].squeeze(0), dec_states[1].squeeze(0)),
                context)

            dec_states = (trg_h_t.unsqueeze(0), trg_c_t.unsqueeze(0))

            dec_out = trg_h_t.squeeze(1)
            out = F.softmax(self.model.decoder2vocab(dec_out)).unsqueeze(0)

            word_lk = out.view(beam_size, remaining_sents,
                               -1).transpose(0, 1).contiguous()

            active = []
            for b in range(batch_size):
                if beam[b].done:
                    continue

                idx = batch_idx[b]
                if not beam[b].advance(word_lk.data[idx]):
                    active += [b]

                for dec_state in dec_states:  # iterate over h, c
                    # layers x beam*sent x dim
                    sent_states = dec_state.view(-1, beam_size,
                                                 remaining_sents,
                                                 dec_state.size(2))[:, :, idx]
                    sent_states.data.copy_(
                        sent_states.data.index_select(
                            1, beam[b].get_current_origin()))

            if not active:
                break

            # in this section, the sentences that are still active are
            # compacted so that the decoder is not run on completed sentences
            active_idx = torch.cuda.LongTensor([batch_idx[k] for k in active])
            batch_idx = {beam: idx for idx, beam in enumerate(active)}

            def update_active(t):
                # select only the remaining active sentences
                view = t.data.view(-1, remaining_sents,
                                   self.model.decoder.hidden_size)
                new_size = list(t.size())
                new_size[-2] = new_size[-2] * len(active_idx) \
                    // remaining_sents
                return Variable(
                    view.index_select(1, active_idx).view(*new_size))

            dec_states = (update_active(dec_states[0]),
                          update_active(dec_states[1]))
            dec_out = update_active(dec_out)
            context = update_active(context)

            remaining_sents = len(active)

        #  (4) package everything up

        allHyp, allScores = [], []
        n_best = 1

        for b in range(batch_size):
            scores, ks = beam[b].sort_best()

            allScores += [scores[:n_best]]
            hyps = zip(*[beam[b].get_hyp(k) for k in ks[:n_best]])
            allHyp += [hyps]

        return allHyp, allScores
예제 #9
0
    def translate(self):
        """Evaluate model."""
        preds = []
        ground_truths = []
        out_put = open(self.output,'w')

        for j in xrange(0, len(self.src['data']),self.config['data']['batch_size']):

            print('Decoding : %d out of %d ' % (j, len(self.src['data'])))
            # Get source minibatch
            input_lines_src, output_lines_src, lens_src, mask_src = (
                get_minibatch(
                    self.src['data'], self.src['word2id'], j,
                    self.config['data']['batch_size'],
                    self.config['data']['max_src_length'],
                    is_gui=False, add_start=True, add_end=True
                )
            )
            #if input_lines_src.size(0) != self.config['data']['batch_size']:
            #    break

            input_lines_src = Variable(input_lines_src.data, volatile=True)
            output_lines_src = Variable(output_lines_src.data, volatile=True)
            mask_src = Variable(mask_src.data, volatile=True)

            # Get target minibatch
            input_lines_trg_gold, output_lines_trg_gold, lens_src, mask_src = (
                get_minibatch(
                    self.trg['data'], self.trg['word2id'], j,
                    self.config['data']['batch_size'],
                    self.config['data']['max_trg_length'],
                    is_gui=False, add_start=True, add_end=True
                )
            )

            input_lines_trg_gold = Variable(input_lines_trg_gold.data, volatile=True)
            output_lines_trg_gold = Variable(output_lines_trg_gold.data, volatile=True)
            mask_src = Variable(mask_src.data, volatile=True)

            input_lines_gui, output_lines_gui, lens_gui, mask_gui, input_type_gui = get_minibatch(
                self.gui['data'], self.gui['word2id'], j, self.config['data']['batch_size'],
                self.config['data']['max_gui_length'], is_gui=True, add_start=True, add_end=True, line_types=self.gui['type']
            )

            input_lines_gui = Variable(input_lines_gui.data, volatile=True)
            output_lines_gui = Variable(output_lines_gui.data, volatile=True)
            input_type_gui = Variable(input_type_gui.data, volatile=True)
            mask_gui = Variable(mask_gui.data, volatile=True)


            # Initialize target with <s> for every sentence
            input_lines_trg = Variable(torch.LongTensor(
                [
                    [trg['word2id']['<s>']]
                    for i in xrange(input_lines_src.size(0))
                ]
            ), volatile=True).cuda()

            # Decode a minibatch greedily __TODO__ add beam search decoding
            input_lines_trg = self.decode_minibatch(
                input_lines_src, input_lines_trg,
                input_lines_gui,
                input_type_gui,
                output_lines_trg_gold
            )

            # Copy minibatch outputs to cpu and convert ids to words
            input_lines_trg = input_lines_trg.data.cpu().numpy()
            input_lines_trg = [
                [self.trg['id2word'][x] for x in line]
                for line in input_lines_trg
            ]

            # Do the same for gold sentences
            output_lines_trg_gold = output_lines_trg_gold.data.cpu().numpy()
            output_lines_trg_gold = [
                [self.trg['id2word'][x] for x in line]
                for line in output_lines_trg_gold
            ]

            # Process outputs
            for sentence_pred, sentence_real, sentence_real_src in zip(
                input_lines_trg,
                output_lines_trg_gold,
                output_lines_src
            ):
                if '</s>' in sentence_pred:
                    index = sentence_pred.index('</s>')
                else:
                    index = len(sentence_pred)

                preds.append(sentence_pred[:index + 1])

                out_put.writelines(' '.join(sentence_pred[1:index]) + '\n')
                print('Predicted : %s ' % (' '.join(sentence_pred[1:index])))


                if '</s>' in sentence_real:
                    index = sentence_real.index('</s>')
                else:
                    index = len(sentence_real)

                ground_truths.append(['<s>'] + sentence_real[:index + 1])

                print('-----------------------------------------------')
                print('Real : %s ' % (' '.join(sentence_real[:index])))

                print('===============================================')

            #print(preds)

        bleu_score = get_bleu(preds, ground_truths)
        print('BLEU score : %.5f ' % (bleu_score))
        out_put.close()
예제 #10
0
def main(args):
    data_utils.download_and_extract_tar(DATA_URL, args.data_dir)
    data_utils.download_and_extract_tar(tf_utils.INCEPTION_URL, args.model_dir)

    dataset = data_utils.build_dataset_object(os.path.join(
        args.data_dir, 'Images'),
                                              test_percent=0.1,
                                              training_percent=0.1,
                                              force_rebuild=args.force_rebuild)

    model_path = (os.path.join(args.model_dir, 'classify_image_graph_def.pb'))
    pretrained_model = tf_utils.PretrainedModel(model_path)

    features_size = pretrained_model.features_tensor.get_shape()[0]
    num_labels = len(dataset['label_to_index'])

    input_tensor, label_tensor, train_step, mean_loss, accuracy = \
        add_new_layer(features_size, num_labels)

    saver = tf.train.Saver()

    with tf.Session() as sess:
        sess.run(tf.initialize_all_variables())

        checkpoint_state = tf.train.get_checkpoint_state(args.model_dir)
        if checkpoint_state and checkpoint_state.model_checkpoint_path:
            print('Loading checkpoint')
            saver.restore(sess, checkpoint_state.model_checkpoint_path)

        print('Setting up validation')
        images_validation, labels_validation = \
            pretrained_model.run(sess, dataset['validation'], dataset['label_to_index'], args.model_dir)

        for i in range(args.steps):
            images = data_utils.get_minibatch(dataset['train'],
                                              args.batch_size)
            features, labels = \
                pretrained_model.run(sess, images, dataset['label_to_index'], args.model_dir)
            loss, _ = sess.run([mean_loss, train_step],
                               feed_dict={
                                   input_tensor: features,
                                   label_tensor: labels
                               })

            if args.verbose:
                sys.stdout.write('\rStep: %i - Loss: %f' % (i + 1, loss))
                sys.stdout.flush()

            if (i + 1) % args.checkpoint_interval == 0:
                print()
                print('Saving checkpoint - Step: %i' % (i + 1))
                checkpoint_path = os.path.join(args.model_dir,
                                               'model.checkpoint')
                saver.save(sess, checkpoint_path, global_step=i + 1)

                print('Running validation')
                validation_accuracy = sess.run(accuracy,
                                               feed_dict={
                                                   input_tensor:
                                                   images_validation,
                                                   label_tensor:
                                                   labels_validation
                                               })
                print('Validation accuracy: %f%%' %
                      (float(validation_accuracy) * 100.0))

        print()
        print('Saving model')
        saver.save(sess, os.path.join(args.model_dir, 'model.graph'))

        print('Running test')
        images_test, labels_test = \
            pretrained_model.run(sess, dataset['test'], dataset['label_to_index'], args.model_dir)
        test_accuracy = sess.run(accuracy,
                                 feed_dict={
                                     input_tensor: images_test,
                                     label_tensor: labels_test
                                 })
        print('Test accuracy: %f%%' % (float(test_accuracy) * 100.0))
예제 #11
0
    def decode_batch(self, idx):
        """Decode a minibatch."""
        # Get source minibatch
        input_lines_src, output_lines_src, lens_src, mask_src = get_minibatch(
            self.src['data'],
            self.src_dict,
            idx,
            self.config['data']['batch_size'],
            self.config['data']['max_src_length'],
            add_start=True,
            add_end=True)
        #print(self.src_dict)
        '''
        lines = [
                ['<s>'] + line + ['</s>']
                for line in self.src['data'][idx:idx + self.config['data']['max_src_length']]
                ]
        lines = [line[:self.config['data']['max_src_length']] for line in lines]
        lens = [len(line) for line in lines]
        max_len = max(lens)
        word2ind = self.src_dict
        input_lines = [
                [word2ind[w] if w in word2ind else word2ind['<unk>'] for w in line[:-1]] +
                [word2ind['<pad>']] * (max_len - len(line))
                for line in lines
                ]
        #print(len(input_lines))
        #print(input_lines_src[0])
        '''
        #id2word_src = {v: k for k, v in self.src_dict.iteritems()}
        #inp = input_lines_src[0].data.cpu().numpy().tolist()
        #print([inv_dict[a] for a in inp])
        beam_size = self.beam_size

        #  (1) run the encoder on the src

        context_h, (
            context_h_t,
            context_c_t) = self.get_hidden_representation(input_lines_src)

        context_h = context_h.transpose(0, 1)  # Make things sequence first.

        #  (3) run the decoder to generate sentences, using beam search

        batch_size = context_h.size(1)

        # Expand tensors for each beam.
        context = Variable(context_h.data.repeat(1, beam_size, 1))
        #print context.size()
        dec_states = [
            Variable(context_h_t.data.repeat(1, beam_size, 1)),
            Variable(context_c_t.data.repeat(1, beam_size, 1))
        ]

        beam = [
            MonteCarlo(beam_size,
                       self.tgt_dict,
                       self.id2word_src,
                       trg['id2word'],
                       self.config['data']['max_trg_length'],
                       self.model,
                       cuda=True) for k in range(batch_size)
        ]

        dec_out = self.get_init_state_decoder(dec_states[0].squeeze(0))
        dec_states[0] = dec_out
        #print(dec_states[0].size())

        batch_idx = list(range(batch_size))
        remaining_sents = batch_size

        for i in range(self.config['data']['max_trg_length']):
            #print(i)
            input = torch.stack([
                b.get_current_state() for b in beam if not b.done
            ]).t().contiguous().view(1, -1)

            trg_emb = self.model.trg_embedding(Variable(input).transpose(1, 0))
            #print trg_emb.size()
            #print dec_states[0].size(), dec_states[1].size()
            #print context.size()
            trg_h, (trg_h_t, trg_c_t) = self.model.decoder(
                trg_emb, (dec_states[0].squeeze(0), dec_states[1].squeeze(0)),
                context)

            dec_states = (trg_h_t.unsqueeze(0), trg_c_t.unsqueeze(0))

            dec_out = trg_h_t.squeeze(1).view(-1, self.model.trg_hidden_dim)
            #print dec_out.size()
            out = F.softmax(self.model.decoder2vocab(dec_out)).unsqueeze(0)

            word_lk = out.view(beam_size, remaining_sents,
                               -1).transpose(0, 1).contiguous()

            active = []
            cur = 0
            for b in range(batch_size):
                if beam[b].done:
                    continue

                idx = batch_idx[b]
                #print(idx, len(lines), input_lines_src.size())
                if not beam[b].advance(
                        word_lk.data[idx], input_lines_src[idx], [
                            dec_states[0][:, cur * self.beam_size:(cur + 1) *
                                          self.beam_size, :],
                            dec_states[1][:, cur * self.beam_size:(cur + 1) *
                                          self.beam_size, :]
                        ], context[:, cur * self.beam_size:(cur + 1) *
                                   self.beam_size, :]):
                    active += [b]
                    cur += 1

                for dec_state in dec_states:  # iterate over h, c
                    # layers x beam*sent x dim
                    #print dec_state.size(1), dec_state.size(2), dec_state.size(3)
                    state_size = dec_state.size(1) * dec_state.size(
                        3) if self.model.nlayers_trg > 1 else dec_state.size(2)
                    sent_states = dec_state.view(-1, beam_size,
                                                 remaining_sents,
                                                 state_size)[:, :, idx]
                    sent_states.data.copy_(
                        sent_states.data.index_select(
                            1, beam[b].get_current_origin()))

            if not active:
                break

            # in this section, the sentences that are still active are
            # compacted so that the decoder is not run on completed sentences
            active_idx = torch.cuda.LongTensor([batch_idx[k] for k in active])
            batch_idx = {beam: idx for idx, beam in enumerate(active)}

            def update_active(t):
                # select only the remaining active sentences
                view = t.data.view(-1, remaining_sents,
                                   self.model.decoder.hidden_size)
                new_size = list(t.size())
                new_size[-2] = new_size[-2] * len(active_idx) \
                    // remaining_sents
                return Variable(
                    view.index_select(1, active_idx).view(*new_size))

            dec_states = (update_active(dec_states[0]),
                          update_active(dec_states[1]))
            dec_out = update_active(dec_out)
            context = update_active(context)

            remaining_sents = len(active)

        #  (4) package everything up

        allHyp, allScores = [], []
        n_best = 1

        for b in range(batch_size):
            scores, ks = beam[b].sort_best()
            #print(ks)

            allScores += [scores[:n_best]]
            hyps = zip(*[beam[b].get_hyp(k) for k in ks[:n_best]])
            #print "Element in batch " + str(hyps)
            #print(hyps)
            allHyp += [hyps]

        return allHyp, allScores
예제 #12
0
def evaluate_model(
    model, src, src_test, trg,
    trg_test, config, src_valid=None, trg_valid=None,
    verbose=True, metric='bleu', use_cuda=False
):
    """Evaluate model.
    :param model: the model object
    :param src:
    :param src_test:
    :param trg:
    :param trg_test:
    :param config: the config object
    :param src_valid:
    :param trg_valid:
    :param verbose:
    :param metric:
    :param use_cuda:
    :return:
    """
    preds = []
    ground_truths = []
    for j in range(0, len(src_test['data']), config['data']['batch_size']):
        # Get source minibatch
        input_lines_src, output_lines_src, lens_src, _ = get_minibatch(
            src_test['data'], src['word2id'], j, config['data']['batch_size'],
            config['data']['max_src_length'], add_start=True, add_end=True,
            use_cuda=use_cuda)

        # Get target minibatch
        input_lines_trg_gold, output_lines_trg_gold, lens_src, _ = (
            get_minibatch(
                trg_test['data'], trg['word2id'], j,
                config['data']['batch_size'], config['data']['max_trg_length'],
                add_start=True, add_end=True, use_cuda=use_cuda
            ))

        # Initialize target with <s> for every sentence
        input_lines_trg = Variable(torch.LongTensor(
            [
                [trg['word2id']['<s>']]
                for i in range(input_lines_src.size(0))
            ]
        ))
        if use_cuda:
            input_lines_trg = input_lines_trg.cuda()

        # print("input_lines_src: ", input_lines_src.size(), "input_lines_trg: ", input_lines_trg.size())
        # input_lines_src: [80, 49],   "input_lines_trg: " [80, 1]
        # Decode a minibatch greedily __TODO__ add beam search decoding
        input_lines_trg = decode_minibatch(
            config, model, input_lines_src,
            input_lines_trg, output_lines_trg_gold,
            use_cuda=use_cuda
        )
        # save gpu memory(in vain)
        input_lines_src = input_lines_src.data.cpu().numpy()
        del input_lines_src
        output_lines_src = output_lines_src.data.cpu().numpy()
        input_lines_trg_gold = input_lines_trg_gold.data.cpu().numpy()
        del input_lines_trg_gold

        # Copy minibatch outputs to cpu and convert ids to words
        input_lines_trg = input_lines_trg.data.cpu().numpy()
        input_lines_trg = [
            [trg['id2word'][x] for x in line]
            for line in input_lines_trg
        ]

        # Do the same for gold sentences
        output_lines_trg_gold = output_lines_trg_gold.data.cpu().numpy()
        output_lines_trg_gold = [
            [trg['id2word'][x] for x in line]
            for line in output_lines_trg_gold
        ]
        print("input_lines_trg: ", input_lines_trg[0])
        print("the length  of a sent", len(input_lines_trg[0]))
        # Process outputs
        for sentence_pred, sentence_real, sentence_real_src in zip(
            input_lines_trg,
            output_lines_trg_gold,
            output_lines_src
        ):
            # 去除开始和结束符, 构造完整的句子sentence, 以便计算bleu值
            if '<s>' in sentence_pred:
                index = sentence_pred.index('<s>')
                sentence_pred = sentence_pred[index+1:]
            if '</s>' in sentence_pred:
                index = sentence_pred.index('</s>')
                sentence_pred = sentence_pred[:index]
            preds.append(sentence_pred)

            if '<s>' in sentence_real:
                index = sentence_real.index('<s>')
                sentence_real = sentence_real[index+1:]
            if '</s>' in sentence_real:
                index = sentence_real.index('</s>')
                sentence_real = sentence_real[: index]
            ground_truths.append(sentence_real)

    print("call the get_bleu method to calc bleu score.....")
    print("preds: ", preds[0])
    print("ground_truths: ", ground_truths[0])
    return get_bleu(preds, ground_truths)
예제 #13
0
파일: dialog.py 프로젝트: morning-dews/ABQG
batch_index = range(0, len(src['data']), batch_size)
for i in xrange(1000):
    losses = []
    #flag_continue = False

    shuffle(batch_index)
    # flag_continue = False
    for j in batch_index:

        #---------------------------------------------------------------
        # get mini batch
        input_lines_src, _, lens_src, mask_src = get_minibatch(src['data'],
                                                               src['word2id'],
                                                               j,
                                                               batch_size,
                                                               max_length_src,
                                                               is_gui=False,
                                                               add_start=True,
                                                               add_end=False)

        #if input_lines_src.size(0) != batch_size:
        #flag_continue = True
        #break

        input_lines_trg, output_lines_trg, lens_trg, mask_trg = get_minibatch(
            trg['data'],
            trg['word2id'],
            j,
            batch_size,
            max_length_trg,
            is_gui=False,
예제 #14
0
def evaluate_model(
    model, src, src_test, trg,
    trg_test, config, src_valid=None, trg_valid=None,
    verbose=True, metric='bleu'
):
    """Evaluate model."""
    preds = []
    ground_truths = []
    for j in xrange(0, len(src_test['data']), config['data']['batch_size']):

        # Get source minibatch
        input_lines_src, output_lines_src, lens_src, mask_src = get_minibatch(
            src_test['data'], src['word2id'], j, config['data']['batch_size'],
            config['data']['max_src_length'], add_start=True, add_end=True
        )

        # Get target minibatch
        input_lines_trg_gold, output_lines_trg_gold, lens_src, mask_src = (
            get_minibatch(
                trg_test['data'], trg['word2id'], j,
                config['data']['batch_size'], config['data']['max_trg_length'],
                add_start=True, add_end=True
            )
        )

        # Initialize target with <s> for every sentence
        input_lines_trg = Variable(torch.LongTensor(
            [
                [trg['word2id']['<s>']]
                for i in xrange(input_lines_src.size(0))
            ]
        ))

        # Decode a minibatch greedily __TODO__ add beam search decoding
        input_lines_trg = decode_minibatch(
            config, model, input_lines_src,
            input_lines_trg, output_lines_trg_gold
        )

        # Copy minibatch outputs to cpu and convert ids to words
        input_lines_trg = input_lines_trg.data.cpu().numpy()
        input_lines_trg = [
            [trg['id2word'][x] for x in line]
            for line in input_lines_trg
        ]

        # Do the same for gold sentences
        output_lines_trg_gold = output_lines_trg_gold.data.cpu().numpy()
        output_lines_trg_gold = [
            [trg['id2word'][x] for x in line]
            for line in output_lines_trg_gold
        ]

        # Process outputs
        for sentence_pred, sentence_real, sentence_real_src in zip(
            input_lines_trg,
            output_lines_trg_gold,
            output_lines_src
        ):
            if '</s>' in sentence_pred:
                index = sentence_pred.index('</s>')
            else:
                index = len(sentence_pred)
            preds.append(['<s>'] + sentence_pred[:index + 1])

            if verbose:
                print ' '.join(['<s>'] + sentence_pred[:index + 1])

            if '</s>' in sentence_real:
                index = sentence_real.index('</s>')
            else:
                index = len(sentence_real)
            if verbose:
                print ' '.join(['<s>'] + sentence_real[:index + 1])
            if verbose:
                print '--------------------------------------'
            ground_truths.append(['<s>'] + sentence_real[:index + 1])

    return get_bleu(preds, ground_truths)
예제 #15
0
def evaluate_model(model,
                   src,
                   src_test,
                   trg,
                   trg_test,
                   config,
                   src_valid=None,
                   trg_valid=None,
                   verbose=True,
                   metric='bleu'):
    """Evaluate model."""
    preds = []
    ground_truths = []
    for j in xrange(0, len(src_test['data']), config['data']['batch_size']):

        input_lines_src, output_lines_src, lens_src, mask_src = get_minibatch(
            src_test['data'],
            src['word2id'],
            j,
            config['data']['batch_size'],
            config['data']['max_src_length'],
            add_start=True,
            add_end=True)

        input_lines_trg_gold, output_lines_trg_gold, lens_src, mask_src = get_minibatch(
            trg_test['data'],
            trg['word2id'],
            j,
            config['data']['batch_size'],
            config['data']['max_src_length'],
            add_start=True,
            add_end=True)

        input_lines_trg = Variable(
            torch.LongTensor([[trg['word2id']['<s>']]
                              for i in xrange(input_lines_src.size(0))
                              ])).cuda()

        for i in xrange(config['data']['max_src_length']):

            decoder_logit = model(input_lines_src, input_lines_trg)
            word_probs = model.decode(decoder_logit)
            decoder_argmax = word_probs.data.cpu().numpy().argmax(axis=-1)
            next_preds = Variable(torch.from_numpy(decoder_argmax[:,
                                                                  -1])).cuda()

            input_lines_trg = torch.cat(
                (input_lines_trg, next_preds.unsqueeze(1)), 1)

        input_lines_trg = input_lines_trg.data.cpu().numpy()
        input_lines_trg = [[trg['id2word'][x] for x in line]
                           for line in input_lines_trg]

        output_lines_trg_gold = output_lines_trg_gold.data.cpu().numpy()
        output_lines_trg_gold = [[trg['id2word'][x] for x in line]
                                 for line in output_lines_trg_gold]

        for sentence_pred, sentence_real, sentence_real_src in zip(
                input_lines_trg, output_lines_trg_gold, output_lines_src):
            if '</s>' in sentence_pred:
                index = sentence_pred.index('</s>')
            else:
                index = len(sentence_pred)
            preds.append(['<s>'] + sentence_pred[:index + 1])

            if verbose:
                print ' '.join(['<s>'] + sentence_pred[:index + 1])

            if '</s>' in sentence_real:
                index = sentence_real.index('</s>')
            else:
                index = len(sentence_real)
            if verbose:
                print ' '.join(['<s>'] + sentence_real[:index + 1])
            if verbose:
                print '--------------------------------------'
            ground_truths.append(['<s>'] + sentence_real[:index + 1])

            if '</s>' in sentence_real_src:
                index = sentence_real_src.index('</s>')
            else:
                index = len(sentence_real_src)

    return get_bleu(preds, ground_truths)
예제 #16
0
    lr = config['training']['lrate']
    optimizer = optim.Adam(model.parameters(), lr=lr)
elif config['training']['optimizer'] == 'adadelta':
    optimizer = optim.Adadelta(model.parameters())
elif config['training']['optimizer'] == 'sgd':
    lr = config['training']['lrate']
    optimizer = optim.SGD(model.parameters(), lr=lr)
else:
    raise NotImplementedError("Learning method not recommend for task")

for i in range(1000):
    losses = []
    for j in range(0, len(src['data']), batch_size):

        input_lines_src, _, lens_src, mask_src = get_minibatch(
            src['data'], src['word2id'], j,
            batch_size, max_length_src, add_start=True, add_end=False
        )

        input_lines_trg, output_lines_trg, lens_trg, mask_trg = get_minibatch(
            trg['data'], trg['word2id'], j,
            batch_size, max_length_trg, add_start=True, add_end=True
        )

        decoder_logit = model(input_lines_src, input_lines_trg)
        optimizer.zero_grad()

        loss = loss_criterion(
            decoder_logit.contiguous().view(-1, vocab_size),
            output_lines_trg.view(-1)
        )
        losses.append(loss.data[0])
예제 #17
0
"""

optimizer = optim.Adam(model.parameters(), lr=config['training']['lrate'])


epoch = 1000
for i in range(epoch):

    losses = []

    for j in range(0, len(src_train['data']), batch_size):
        slots = src_train['data'][j][1:]
        shuffle(slots)
        src_train['data'][j][1:] = slots

        input_lines_src, output_lines_src, lens_src, mask_src = get_minibatch(src_train['data'], src_train['word2id'], j, batch_size, config['data']['max_src_length'])
        input_lines_trg, output_lines_trg, lens_trg, mask_trg = get_minibatch(trg_train['data'], trg_train['word2id'], j, batch_size, config['data']['max_trg_length'])


        decoder_logit = model(input_lines_src, input_lines_trg)
        optimizer.zero_grad()


        loss = loss_criterion(
                decoder_logit.contiguous().view(-1, trg_vocab_size),
                output_lines_trg.view(-1)
        )
        losses.append(loss.data[0])
        loss.backward()
        optimizer.step()