def model_perplexity( model, src, src_test, trg, trg_test, config, loss_criterion, src_valid=None, trg_valid=None, verbose=False, ): """Compute model perplexity.""" # Get source minibatch losses = [] for j in xrange(0, len(src_test['data']) // 100, config['data']['batch_size']): input_lines_src, output_lines_src, lens_src, mask_src = get_minibatch( src_test['data'], src['word2id'], j, config['data']['batch_size'], config['data']['max_src_length'], add_start=True, add_end=True) input_lines_src = Variable(input_lines_src.data, volatile=True) output_lines_src = Variable(input_lines_src.data, volatile=True) mask_src = Variable(mask_src.data, volatile=True) # Get target minibatch input_lines_trg_gold, output_lines_trg_gold, lens_src, mask_src = ( get_minibatch(trg_test['data'], trg['word2id'], j, config['data']['batch_size'], config['data']['max_trg_length'], add_start=True, add_end=True)) input_lines_trg_gold = Variable(input_lines_trg_gold.data, volatile=True) output_lines_trg_gold = Variable(output_lines_trg_gold.data, volatile=True) mask_src = Variable(mask_src.data, volatile=True) decoder_logit = model(input_lines_src, input_lines_trg_gold) loss = loss_criterion( decoder_logit.contiguous().view(-1, decoder_logit.size(2)), output_lines_trg_gold.view(-1)) losses.append(loss.data[0]) return np.exp(np.mean(losses))
def translate(self): """Translate the whole dataset.""" trg_preds = [] trg_gold = [] output_res = open(self.output,'w') for j in xrange( 0, len(self.src['data']), self.config['data']['batch_size'] ): """Decode a single minibatch.""" print('Decoding %d out of %d ' % (j, len(self.src['data']))) hypotheses, scores = decoder.decode_batch(j) all_hyp_inds = [[x[0] for x in hyp] for hyp in hypotheses] all_preds = [ ' '.join([trg['id2word'][x] for x in hyp[:-1]]) for hyp in all_hyp_inds ] # Get target minibatch input_lines_trg_gold, output_lines_trg_gold, lens_src, mask_src = ( get_minibatch( self.trg['data'], self.tgt_dict, j, self.config['data']['batch_size'], self.config['data']['max_trg_length'], is_gui=False, add_start=True, add_end=True ) ) output_lines_trg_gold = output_lines_trg_gold.data.cpu().numpy() all_gold_inds = [[x for x in hyp] for hyp in output_lines_trg_gold] all_gold = [ ' '.join([trg['id2word'][x] for x in hyp[:-1]]) for hyp in all_gold_inds ] trg_preds += all_preds trg_gold += all_gold output_res.writelines('\n'.join(trg_preds)) bleu_score = get_bleu(trg_preds, trg_gold) output_res.close() print('BLEU : %.5f ' % (bleu_score))
def translate(self): """Translate the whole dataset.""" trg_preds = [] trg_gold = [] for j in range( 0, len(self.src['data']), self.config['data']['batch_size'] ): """Decode a single minibatch.""" print('Decoding %d out of %d ' % (j, len(self.src['data']))) hypotheses, scores = self.decode_batch(j) all_hyp_inds = [[x[0] for x in hyp] for hyp in hypotheses] all_preds = [ ' '.join([self.trg['id2word'][x] for x in hyp]) for hyp in all_hyp_inds ] # Get target minibatch input_lines_trg_gold, output_lines_trg_gold, lens_src, mask_src = ( get_minibatch( self.trg['data'], self.tgt_dict, j, self.config['data']['batch_size'], self.config['data']['max_trg_length'], add_start=True, add_end=True, use_cuda=self.use_cuda ) ) output_lines_trg_gold = output_lines_trg_gold.data.cpu().numpy() all_gold_inds = [[x for x in hyp] for hyp in output_lines_trg_gold] all_gold = [ ' '.join([self.trg['id2word'][x] for x in hyp]) for hyp in all_gold_inds ] trg_preds += all_preds trg_gold += all_gold print("investigate some preds and golds.....") print("trg_preds: ", trg_preds[0]) print("trg_gold: ", trg_gold[0]) bleu_score = get_bleu(trg_preds, trg_gold) # print('BLEU : %.5f ' % (bleu_score)) return bleu_score
def translate(self): """Translate the whole dataset.""" trg_preds = [] trg_gold = [] for j in xrange( 0, len(self.src['data']), self.config['data']['batch_size'] ): """Decode a single minibatch.""" #print 'Decoding %d out of %d ' % (j, len(self.src['data'])) hypotheses, scores = decoder.decode_batch(j) all_hyp_inds = [[x[0] for x in hyp] for hyp in hypotheses] all_preds = [ ' '.join([trg['id2word'][x.item()] for x in hyp]) for hyp in all_hyp_inds ] # Get target minibatch input_lines_trg_gold, output_lines_trg_gold, lens_src, mask_src = ( get_minibatch( self.trg['data'], self.tgt_dict, j, self.config['data']['batch_size'], self.config['data']['max_trg_length'], add_start=True, add_end=True ) ) output_lines_trg_gold = output_lines_trg_gold.data.cpu().numpy() all_gold_inds = [[x for x in hyp] for hyp in output_lines_trg_gold] all_gold = [ ' '.join([trg['id2word'][x] for x in hyp]) for hyp in all_gold_inds ] trg_preds += all_preds trg_gold += all_gold for p in all_preds: print p.replace('<s>', '').replace('</s>', '').strip()
def decode_batch(self, idx): """Decode a minibatch.""" # Get source minibatch input_lines_src, output_lines_src, lens_src, mask_src = get_minibatch( self.src['data'], self.src_dict, idx, self.config['data']['batch_size'], self.config['data']['max_src_length'], add_start=True, add_end=True) #print(self.src_dict) ''' lines = [ ['<s>'] + line + ['</s>'] for line in self.src['data'][idx:idx + self.config['data']['max_src_length']] ] lines = [line[:self.config['data']['max_src_length']] for line in lines] lens = [len(line) for line in lines] max_len = max(lens) word2ind = self.src_dict input_lines = [ [word2ind[w] if w in word2ind else word2ind['<unk>'] for w in line[:-1]] + [word2ind['<pad>']] * (max_len - len(line)) for line in lines ] #print(len(input_lines)) #print(input_lines_src[0]) ''' #id2word_src = {v: k for k, v in self.src_dict.iteritems()} #inp = input_lines_src[0].data.cpu().numpy().tolist() #print([inv_dict[a] for a in inp]) beam_size = self.beam_size # (1) run the encoder on the src context_h, ( context_h_t, context_c_t) = self.get_hidden_representation(input_lines_src) context_h = context_h.transpose(0, 1) # Make things sequence first. # (3) run the decoder to generate sentences, using beam search batch_size = context_h.size(1) # Expand tensors for each beam. context = Variable(context_h.data.repeat(1, beam_size, 1)) #print context.size() dec_states = [ Variable(context_h_t.data.repeat(1, beam_size, 1)), Variable(context_c_t.data.repeat(1, beam_size, 1)) ] finite_state_machines = [ FSMBeamSearch(input_lines_src[k][1:]) for k in range(batch_size) ] beam = [ Beam(beam_size, self.tgt_dict, self.id2word_src, trg['id2word'], cuda=True) for k in range(batch_size) ] '''fsms = [] for k in range(batch_size): tuple = input_lines_src.data[k][1:] constraint_ids = filter(lambda s: s != self.src_dict["EmptyParameter"], tuple) fsm = FSMBeamSearch(tuple, constraint_ids, beam_size, self.tgt_dict, self.id2word_src, trg['id2word'], cuda=True) fsms.append(fsm)''' dec_out = self.get_init_state_decoder(dec_states[0].squeeze(0)) dec_states[0] = dec_out #print(dec_states[0].size()) batch_idx = list(range(batch_size)) remaining_sents = batch_size for i in range(batch_size): # initial state is 0000 # and if some parameters are EmptyParameter # if tuple is (xxx, yyy, EmptyParameter, EmptyParameter) # then the state will be 1100 state = 0 input_event = [ self.id2word_src[e] for e in input_lines_src[i].data.cpu().numpy().tolist() ][1:] #print(input_event) for j, evt in enumerate(input_event): if evt == "EmptyParameter" or evt == "<unk>": state = state | (1 << j) #this state will be the initial state # print(i, state, input_event) finite_state_machines[i].beams[state] = beam[i] finite_state_machines[i].evt_tokens = input_event finite_state_machines[i].num_states += 1 finite_state_machines[i].dec_states[state] = [ dec_states[0][i * beam_size:(i + 1) * beam_size, :].clone(), dec_states[1][:, i * beam_size:(i + 1) * beam_size, :].clone() ] finite_state_machines[i].context[state] = context[0][ i * beam_size:(i + 1) * beam_size, :].clone() for i in range(self.config['data']['max_trg_length']): #print(i) current_states = [] for b in finite_state_machines: if not b.done: current_states += b.get_current_state() input = torch.stack(current_states).t().contiguous().view(1, -1) trg_emb = self.model.trg_embedding(Variable(input).transpose(1, 0)) #print trg_emb.size() #print dec_states[0].size(), dec_states[1].size() #print context.size() trg_h, (trg_h_t, trg_c_t) = self.model.decoder( trg_emb, (dec_states[0].squeeze(0), dec_states[1].squeeze(0)), context) dec_states = (trg_h_t.unsqueeze(0), trg_c_t.unsqueeze(0)) dec_out = trg_h_t.squeeze(1).view(-1, self.model.trg_hidden_dim) #print dec_out.size() out = F.softmax(self.model.decoder2vocab(dec_out)).unsqueeze(0) word_lk = out.view(beam_size, len(current_states), -1).transpose(0, 1).contiguous() active = [] cur = 0 for b in range(batch_size): if finite_state_machines[b].done: continue idx = batch_idx[b] #print(idx, len(lines), input_lines_src.size()) cur_state_size = finite_state_machines[b].num_states # print(b, cur_state_size) # print(dec_states[0][:,cur*self.beam_size:(cur+cur_state_size)*self.beam_size,:].size(), dec_states[1][:,cur*self.beam_size:(cur+cur_state_size)*self.beam_size,:].size()) # print(context[:,cur*self.beam_size:(cur+cur_state_size)*self.beam_size,:].size()) if not finite_state_machines[b].advance( word_lk.data[cur:(cur + cur_state_size)], input_lines_src[idx], [ dec_states[0][:, cur * self.beam_size:(cur + cur_state_size) * self.beam_size, :].clone(), dec_states[1][:, cur * self.beam_size:(cur + cur_state_size) * self.beam_size, :].clone() ], context[:, cur * self.beam_size:(cur + cur_state_size) * self.beam_size, :].clone()): active += [b] cur += cur_state_size # for dec_state in dec_states: # iterate over h, c # # layers x beam*sent x dim # #print dec_state.size(1), dec_state.size(2), dec_state.size(3) # state_size = dec_state.size(1) * dec_state.size(3) if self.model.nlayers_trg > 1 else dec_state.size(2) # sent_states = dec_state.view( # -1, beam_size, len(current_states), state_size # )[:, :, idx] # sent_states.data.copy_( # sent_states.data.index_select( # 1, # beam[b].get_current_origin() # ) # ) if not active: break # in this section, the sentences that are still active are # compacted so that the decoder is not run on completed sentences active_idx = torch.cuda.LongTensor([batch_idx[k] for k in active]) batch_idx = {beam: idx for idx, beam in enumerate(active)} # def update_active(t): # # select only the remaining active sentences # view = t.data.view( # -1, remaining_sents, # self.model.decoder.hidden_size # ) # new_size = list(t.size()) # new_size[-2] = new_size[-2] * len(active_idx) \ # // remaining_sents # return Variable(view.index_select( # 1, active_idx # ).view(*new_size)) new_dec_states0 = None new_dec_states1 = None new_context = None #update active dec_states for k in range(batch_size): if not finite_state_machines[k].done: fsm_dec_states0, fsm_dec_states1 = finite_state_machines[ k].get_dec_states() if new_dec_states0 is None: new_dec_states0 = fsm_dec_states0.clone() else: new_dec_states0 = torch.cat( (new_dec_states0, fsm_dec_states0.clone()), 1) if new_dec_states1 is None: new_dec_states1 = fsm_dec_states1.clone() else: new_dec_states1 = torch.cat( (new_dec_states1.clone(), fsm_dec_states1.clone()), 1) fsm_context = finite_state_machines[k].get_context() if new_context is None: new_context = fsm_context.clone() else: new_context = torch.cat( (new_context, fsm_context.clone()), 1) dec_states = (new_dec_states0, new_dec_states1) context = new_context remaining_sents = len(active) # (4) package everything up allHyp, allScores = [], [] n_best = 1 for b in range(batch_size): hyps, scores = finite_state_machines[b].get_hyp() hyps = [(hyp[0].item(), ) for hyp in hyps] allScores += [scores] allHyp += [hyps] return allHyp, allScores
def translate(self): """Evaluate model.""" preds = [] ground_truths = [] for j in xrange(0, len(self.src['data']), self.config['data']['batch_size']): #print 'Decoding : %d out of %d ' % (j, len(self.src['data'])) # Get source minibatch input_lines_src, output_lines_src, lens_src, mask_src = ( get_minibatch(self.src['data'], self.src['word2id'], j, self.config['data']['batch_size'], self.config['data']['max_src_length'], add_start=True, add_end=True)) input_lines_src = Variable(input_lines_src.data, volatile=True) output_lines_src = Variable(output_lines_src.data, volatile=True) mask_src = Variable(mask_src.data, volatile=True) # Get target minibatch input_lines_trg_gold, output_lines_trg_gold, lens_src, mask_src = ( get_minibatch(self.trg['data'], self.trg['word2id'], j, self.config['data']['batch_size'], self.config['data']['max_trg_length'], add_start=True, add_end=True)) input_lines_trg_gold = Variable(input_lines_trg_gold.data, volatile=True) output_lines_trg_gold = Variable(output_lines_trg_gold.data, volatile=True) mask_src = Variable(mask_src.data, volatile=True) # Initialize target with <s> for every sentence input_lines_trg = Variable(torch.LongTensor( [[trg['word2id']['<s>']] for i in xrange(input_lines_src.size(0))]), volatile=True).cuda() # Decode a minibatch greedily add beam search decoding input_lines_trg = self.decode_minibatch(input_lines_src, input_lines_trg, output_lines_trg_gold) # Copy minibatch outputs to cpu and convert ids to words input_lines_trg = input_lines_trg.data.cpu().numpy() input_lines_trg = [[self.trg['id2word'][x] for x in line] for line in input_lines_trg] # Do the same for gold sentences output_lines_trg_gold = output_lines_trg_gold.data.cpu().numpy() output_lines_trg_gold = [[self.trg['id2word'][x] for x in line] for line in output_lines_trg_gold] # Process outputs for sentence_pred, sentence_real, sentence_real_src in zip( input_lines_trg, output_lines_trg_gold, output_lines_src): if '</s>' in sentence_pred: index = sentence_pred.index('</s>') else: index = len(sentence_pred) preds.append(['<s>'] + sentence_pred[:index + 1]) if '</s>' in sentence_real: index = sentence_real.index('</s>') else: index = len(sentence_real) ground_truths.append(['<s>'] + sentence_real[:index + 1]) #trg_preds += preds #trg_gold += all_gold for p in preds: print " ".join(p).replace('</s>', '').strip()
def predict(self, semantic_frame): intent = semantic_frame['diaact'] if 'request' in semantic_frame['diaact']: slots = [intent] + [ k for k in list(semantic_frame['request_slots'].keys()) ] slot2word = semantic_frame['request_slots'] else: slots = [intent] + [ k for k in list(semantic_frame['inform_slots'].keys()) ] slot2word = semantic_frame['inform_slots'] src_new = { 'data': [slots], 'word2id': self.src_word2id, 'id2word': self.src_id2word } trg_new = { 'data': [['人生', '好', '困難', '到底', '該', '怎麼辦', '呢']], 'word2id': self.trg_word2id, 'id2word': self.trg_id2word } preds = [] input_lines_src, output_lines_src, lens_src, mask_src = get_minibatch( src_new['data'], src_new['word2id'], 0, 1, 10, add_start=True, add_end=True) input_lines_trg_gold, output_lines_trg_gold, lens_trg, mask_trg = get_minibatch( trg_new['data'], trg_new['word2id'], 0, 1, 20, add_start=True, add_end=True) # Initialize target with <s> for every sentence input_lines_trg = Variable( torch.LongTensor([[self.trg_word2id['<s>']] for i in range(input_lines_src.size(0))])) input_lines_trg = decode_minibatch(self.config, self.model, input_lines_src, input_lines_trg, output_lines_trg_gold) # Copy minibatch outputs to cpu and convert ids to words input_lines_trg = input_lines_trg.data.cpu().numpy() input_lines_trg = [[self.trg_id2word[x] for x in line] for line in input_lines_trg] output_lines_trg_gold = output_lines_trg_gold.data.cpu().numpy() output_lines_trg_gold = [[self.trg_id2word[x] for x in line] for line in output_lines_trg_gold] # Process outputs for sentence_pred, sentence_real, sentence_real_src in zip( input_lines_trg, output_lines_trg_gold, output_lines_src): if '</s>' in sentence_pred: index = sentence_pred.index('</s>') else: index = len(sentence_pred) preds.append([ slot2word[word] if word in slot2word.keys() else word for word in sentence_pred[:index + 1] ]) """ print("Predict: {}".format(' '.join(sentence_pred[:index + 1]))) if '</s>' in sentence_real: index = sentence_real.index('</s>') else: index = len(sentence_real) print("RealAns: {}".format(' '.join(['<s>'] + sentence_real[:index + 1]))) print('===========================================') """ return ''.join(preds[0][1:-1])
def decode_batch(self, idx): """Decode a minibatch.""" # Get source minibatch input_lines_src, output_lines_src, lens_src, mask_src = get_minibatch( self.src['data'], self.src_dict, idx, self.config['data']['batch_size'], self.config['data']['max_src_length'], add_start=True, add_end=True) beam_size = self.beam_size # (1) run the encoder on the src context_h, ( context_h_t, context_c_t) = self.get_hidden_representation(input_lines_src) context_h = context_h.transpose(0, 1) # Make things sequence first. # (3) run the decoder to generate sentences, using beam search batch_size = context_h.size(1) # Expand tensors for each beam. context = Variable(context_h.data.repeat(1, beam_size, 1)) dec_states = [ Variable(context_h_t.data.repeat(1, beam_size, 1)), Variable(context_c_t.data.repeat(1, beam_size, 1)) ] beam = [ Beam(beam_size, self.tgt_dict, cuda=True) for k in range(batch_size) ] dec_out = self.get_init_state_decoder(dec_states[0].squeeze(0)) dec_states[0] = dec_out batch_idx = list(range(batch_size)) remaining_sents = batch_size for i in range(self.config['data']['max_trg_length']): input = torch.stack([ b.get_current_state() for b in beam if not b.done ]).t().contiguous().view(1, -1) trg_emb = self.model.trg_embedding(Variable(input).transpose(1, 0)) trg_h, (trg_h_t, trg_c_t) = self.model.decoder( trg_emb, (dec_states[0].squeeze(0), dec_states[1].squeeze(0)), context) dec_states = (trg_h_t.unsqueeze(0), trg_c_t.unsqueeze(0)) dec_out = trg_h_t.squeeze(1) out = F.softmax(self.model.decoder2vocab(dec_out)).unsqueeze(0) word_lk = out.view(beam_size, remaining_sents, -1).transpose(0, 1).contiguous() active = [] for b in range(batch_size): if beam[b].done: continue idx = batch_idx[b] if not beam[b].advance(word_lk.data[idx]): active += [b] for dec_state in dec_states: # iterate over h, c # layers x beam*sent x dim sent_states = dec_state.view(-1, beam_size, remaining_sents, dec_state.size(2))[:, :, idx] sent_states.data.copy_( sent_states.data.index_select( 1, beam[b].get_current_origin())) if not active: break # in this section, the sentences that are still active are # compacted so that the decoder is not run on completed sentences active_idx = torch.cuda.LongTensor([batch_idx[k] for k in active]) batch_idx = {beam: idx for idx, beam in enumerate(active)} def update_active(t): # select only the remaining active sentences view = t.data.view(-1, remaining_sents, self.model.decoder.hidden_size) new_size = list(t.size()) new_size[-2] = new_size[-2] * len(active_idx) \ // remaining_sents return Variable( view.index_select(1, active_idx).view(*new_size)) dec_states = (update_active(dec_states[0]), update_active(dec_states[1])) dec_out = update_active(dec_out) context = update_active(context) remaining_sents = len(active) # (4) package everything up allHyp, allScores = [], [] n_best = 1 for b in range(batch_size): scores, ks = beam[b].sort_best() allScores += [scores[:n_best]] hyps = zip(*[beam[b].get_hyp(k) for k in ks[:n_best]]) allHyp += [hyps] return allHyp, allScores
def translate(self): """Evaluate model.""" preds = [] ground_truths = [] out_put = open(self.output,'w') for j in xrange(0, len(self.src['data']),self.config['data']['batch_size']): print('Decoding : %d out of %d ' % (j, len(self.src['data']))) # Get source minibatch input_lines_src, output_lines_src, lens_src, mask_src = ( get_minibatch( self.src['data'], self.src['word2id'], j, self.config['data']['batch_size'], self.config['data']['max_src_length'], is_gui=False, add_start=True, add_end=True ) ) #if input_lines_src.size(0) != self.config['data']['batch_size']: # break input_lines_src = Variable(input_lines_src.data, volatile=True) output_lines_src = Variable(output_lines_src.data, volatile=True) mask_src = Variable(mask_src.data, volatile=True) # Get target minibatch input_lines_trg_gold, output_lines_trg_gold, lens_src, mask_src = ( get_minibatch( self.trg['data'], self.trg['word2id'], j, self.config['data']['batch_size'], self.config['data']['max_trg_length'], is_gui=False, add_start=True, add_end=True ) ) input_lines_trg_gold = Variable(input_lines_trg_gold.data, volatile=True) output_lines_trg_gold = Variable(output_lines_trg_gold.data, volatile=True) mask_src = Variable(mask_src.data, volatile=True) input_lines_gui, output_lines_gui, lens_gui, mask_gui, input_type_gui = get_minibatch( self.gui['data'], self.gui['word2id'], j, self.config['data']['batch_size'], self.config['data']['max_gui_length'], is_gui=True, add_start=True, add_end=True, line_types=self.gui['type'] ) input_lines_gui = Variable(input_lines_gui.data, volatile=True) output_lines_gui = Variable(output_lines_gui.data, volatile=True) input_type_gui = Variable(input_type_gui.data, volatile=True) mask_gui = Variable(mask_gui.data, volatile=True) # Initialize target with <s> for every sentence input_lines_trg = Variable(torch.LongTensor( [ [trg['word2id']['<s>']] for i in xrange(input_lines_src.size(0)) ] ), volatile=True).cuda() # Decode a minibatch greedily __TODO__ add beam search decoding input_lines_trg = self.decode_minibatch( input_lines_src, input_lines_trg, input_lines_gui, input_type_gui, output_lines_trg_gold ) # Copy minibatch outputs to cpu and convert ids to words input_lines_trg = input_lines_trg.data.cpu().numpy() input_lines_trg = [ [self.trg['id2word'][x] for x in line] for line in input_lines_trg ] # Do the same for gold sentences output_lines_trg_gold = output_lines_trg_gold.data.cpu().numpy() output_lines_trg_gold = [ [self.trg['id2word'][x] for x in line] for line in output_lines_trg_gold ] # Process outputs for sentence_pred, sentence_real, sentence_real_src in zip( input_lines_trg, output_lines_trg_gold, output_lines_src ): if '</s>' in sentence_pred: index = sentence_pred.index('</s>') else: index = len(sentence_pred) preds.append(sentence_pred[:index + 1]) out_put.writelines(' '.join(sentence_pred[1:index]) + '\n') print('Predicted : %s ' % (' '.join(sentence_pred[1:index]))) if '</s>' in sentence_real: index = sentence_real.index('</s>') else: index = len(sentence_real) ground_truths.append(['<s>'] + sentence_real[:index + 1]) print('-----------------------------------------------') print('Real : %s ' % (' '.join(sentence_real[:index]))) print('===============================================') #print(preds) bleu_score = get_bleu(preds, ground_truths) print('BLEU score : %.5f ' % (bleu_score)) out_put.close()
def main(args): data_utils.download_and_extract_tar(DATA_URL, args.data_dir) data_utils.download_and_extract_tar(tf_utils.INCEPTION_URL, args.model_dir) dataset = data_utils.build_dataset_object(os.path.join( args.data_dir, 'Images'), test_percent=0.1, training_percent=0.1, force_rebuild=args.force_rebuild) model_path = (os.path.join(args.model_dir, 'classify_image_graph_def.pb')) pretrained_model = tf_utils.PretrainedModel(model_path) features_size = pretrained_model.features_tensor.get_shape()[0] num_labels = len(dataset['label_to_index']) input_tensor, label_tensor, train_step, mean_loss, accuracy = \ add_new_layer(features_size, num_labels) saver = tf.train.Saver() with tf.Session() as sess: sess.run(tf.initialize_all_variables()) checkpoint_state = tf.train.get_checkpoint_state(args.model_dir) if checkpoint_state and checkpoint_state.model_checkpoint_path: print('Loading checkpoint') saver.restore(sess, checkpoint_state.model_checkpoint_path) print('Setting up validation') images_validation, labels_validation = \ pretrained_model.run(sess, dataset['validation'], dataset['label_to_index'], args.model_dir) for i in range(args.steps): images = data_utils.get_minibatch(dataset['train'], args.batch_size) features, labels = \ pretrained_model.run(sess, images, dataset['label_to_index'], args.model_dir) loss, _ = sess.run([mean_loss, train_step], feed_dict={ input_tensor: features, label_tensor: labels }) if args.verbose: sys.stdout.write('\rStep: %i - Loss: %f' % (i + 1, loss)) sys.stdout.flush() if (i + 1) % args.checkpoint_interval == 0: print() print('Saving checkpoint - Step: %i' % (i + 1)) checkpoint_path = os.path.join(args.model_dir, 'model.checkpoint') saver.save(sess, checkpoint_path, global_step=i + 1) print('Running validation') validation_accuracy = sess.run(accuracy, feed_dict={ input_tensor: images_validation, label_tensor: labels_validation }) print('Validation accuracy: %f%%' % (float(validation_accuracy) * 100.0)) print() print('Saving model') saver.save(sess, os.path.join(args.model_dir, 'model.graph')) print('Running test') images_test, labels_test = \ pretrained_model.run(sess, dataset['test'], dataset['label_to_index'], args.model_dir) test_accuracy = sess.run(accuracy, feed_dict={ input_tensor: images_test, label_tensor: labels_test }) print('Test accuracy: %f%%' % (float(test_accuracy) * 100.0))
def decode_batch(self, idx): """Decode a minibatch.""" # Get source minibatch input_lines_src, output_lines_src, lens_src, mask_src = get_minibatch( self.src['data'], self.src_dict, idx, self.config['data']['batch_size'], self.config['data']['max_src_length'], add_start=True, add_end=True) #print(self.src_dict) ''' lines = [ ['<s>'] + line + ['</s>'] for line in self.src['data'][idx:idx + self.config['data']['max_src_length']] ] lines = [line[:self.config['data']['max_src_length']] for line in lines] lens = [len(line) for line in lines] max_len = max(lens) word2ind = self.src_dict input_lines = [ [word2ind[w] if w in word2ind else word2ind['<unk>'] for w in line[:-1]] + [word2ind['<pad>']] * (max_len - len(line)) for line in lines ] #print(len(input_lines)) #print(input_lines_src[0]) ''' #id2word_src = {v: k for k, v in self.src_dict.iteritems()} #inp = input_lines_src[0].data.cpu().numpy().tolist() #print([inv_dict[a] for a in inp]) beam_size = self.beam_size # (1) run the encoder on the src context_h, ( context_h_t, context_c_t) = self.get_hidden_representation(input_lines_src) context_h = context_h.transpose(0, 1) # Make things sequence first. # (3) run the decoder to generate sentences, using beam search batch_size = context_h.size(1) # Expand tensors for each beam. context = Variable(context_h.data.repeat(1, beam_size, 1)) #print context.size() dec_states = [ Variable(context_h_t.data.repeat(1, beam_size, 1)), Variable(context_c_t.data.repeat(1, beam_size, 1)) ] beam = [ MonteCarlo(beam_size, self.tgt_dict, self.id2word_src, trg['id2word'], self.config['data']['max_trg_length'], self.model, cuda=True) for k in range(batch_size) ] dec_out = self.get_init_state_decoder(dec_states[0].squeeze(0)) dec_states[0] = dec_out #print(dec_states[0].size()) batch_idx = list(range(batch_size)) remaining_sents = batch_size for i in range(self.config['data']['max_trg_length']): #print(i) input = torch.stack([ b.get_current_state() for b in beam if not b.done ]).t().contiguous().view(1, -1) trg_emb = self.model.trg_embedding(Variable(input).transpose(1, 0)) #print trg_emb.size() #print dec_states[0].size(), dec_states[1].size() #print context.size() trg_h, (trg_h_t, trg_c_t) = self.model.decoder( trg_emb, (dec_states[0].squeeze(0), dec_states[1].squeeze(0)), context) dec_states = (trg_h_t.unsqueeze(0), trg_c_t.unsqueeze(0)) dec_out = trg_h_t.squeeze(1).view(-1, self.model.trg_hidden_dim) #print dec_out.size() out = F.softmax(self.model.decoder2vocab(dec_out)).unsqueeze(0) word_lk = out.view(beam_size, remaining_sents, -1).transpose(0, 1).contiguous() active = [] cur = 0 for b in range(batch_size): if beam[b].done: continue idx = batch_idx[b] #print(idx, len(lines), input_lines_src.size()) if not beam[b].advance( word_lk.data[idx], input_lines_src[idx], [ dec_states[0][:, cur * self.beam_size:(cur + 1) * self.beam_size, :], dec_states[1][:, cur * self.beam_size:(cur + 1) * self.beam_size, :] ], context[:, cur * self.beam_size:(cur + 1) * self.beam_size, :]): active += [b] cur += 1 for dec_state in dec_states: # iterate over h, c # layers x beam*sent x dim #print dec_state.size(1), dec_state.size(2), dec_state.size(3) state_size = dec_state.size(1) * dec_state.size( 3) if self.model.nlayers_trg > 1 else dec_state.size(2) sent_states = dec_state.view(-1, beam_size, remaining_sents, state_size)[:, :, idx] sent_states.data.copy_( sent_states.data.index_select( 1, beam[b].get_current_origin())) if not active: break # in this section, the sentences that are still active are # compacted so that the decoder is not run on completed sentences active_idx = torch.cuda.LongTensor([batch_idx[k] for k in active]) batch_idx = {beam: idx for idx, beam in enumerate(active)} def update_active(t): # select only the remaining active sentences view = t.data.view(-1, remaining_sents, self.model.decoder.hidden_size) new_size = list(t.size()) new_size[-2] = new_size[-2] * len(active_idx) \ // remaining_sents return Variable( view.index_select(1, active_idx).view(*new_size)) dec_states = (update_active(dec_states[0]), update_active(dec_states[1])) dec_out = update_active(dec_out) context = update_active(context) remaining_sents = len(active) # (4) package everything up allHyp, allScores = [], [] n_best = 1 for b in range(batch_size): scores, ks = beam[b].sort_best() #print(ks) allScores += [scores[:n_best]] hyps = zip(*[beam[b].get_hyp(k) for k in ks[:n_best]]) #print "Element in batch " + str(hyps) #print(hyps) allHyp += [hyps] return allHyp, allScores
def evaluate_model( model, src, src_test, trg, trg_test, config, src_valid=None, trg_valid=None, verbose=True, metric='bleu', use_cuda=False ): """Evaluate model. :param model: the model object :param src: :param src_test: :param trg: :param trg_test: :param config: the config object :param src_valid: :param trg_valid: :param verbose: :param metric: :param use_cuda: :return: """ preds = [] ground_truths = [] for j in range(0, len(src_test['data']), config['data']['batch_size']): # Get source minibatch input_lines_src, output_lines_src, lens_src, _ = get_minibatch( src_test['data'], src['word2id'], j, config['data']['batch_size'], config['data']['max_src_length'], add_start=True, add_end=True, use_cuda=use_cuda) # Get target minibatch input_lines_trg_gold, output_lines_trg_gold, lens_src, _ = ( get_minibatch( trg_test['data'], trg['word2id'], j, config['data']['batch_size'], config['data']['max_trg_length'], add_start=True, add_end=True, use_cuda=use_cuda )) # Initialize target with <s> for every sentence input_lines_trg = Variable(torch.LongTensor( [ [trg['word2id']['<s>']] for i in range(input_lines_src.size(0)) ] )) if use_cuda: input_lines_trg = input_lines_trg.cuda() # print("input_lines_src: ", input_lines_src.size(), "input_lines_trg: ", input_lines_trg.size()) # input_lines_src: [80, 49], "input_lines_trg: " [80, 1] # Decode a minibatch greedily __TODO__ add beam search decoding input_lines_trg = decode_minibatch( config, model, input_lines_src, input_lines_trg, output_lines_trg_gold, use_cuda=use_cuda ) # save gpu memory(in vain) input_lines_src = input_lines_src.data.cpu().numpy() del input_lines_src output_lines_src = output_lines_src.data.cpu().numpy() input_lines_trg_gold = input_lines_trg_gold.data.cpu().numpy() del input_lines_trg_gold # Copy minibatch outputs to cpu and convert ids to words input_lines_trg = input_lines_trg.data.cpu().numpy() input_lines_trg = [ [trg['id2word'][x] for x in line] for line in input_lines_trg ] # Do the same for gold sentences output_lines_trg_gold = output_lines_trg_gold.data.cpu().numpy() output_lines_trg_gold = [ [trg['id2word'][x] for x in line] for line in output_lines_trg_gold ] print("input_lines_trg: ", input_lines_trg[0]) print("the length of a sent", len(input_lines_trg[0])) # Process outputs for sentence_pred, sentence_real, sentence_real_src in zip( input_lines_trg, output_lines_trg_gold, output_lines_src ): # 去除开始和结束符, 构造完整的句子sentence, 以便计算bleu值 if '<s>' in sentence_pred: index = sentence_pred.index('<s>') sentence_pred = sentence_pred[index+1:] if '</s>' in sentence_pred: index = sentence_pred.index('</s>') sentence_pred = sentence_pred[:index] preds.append(sentence_pred) if '<s>' in sentence_real: index = sentence_real.index('<s>') sentence_real = sentence_real[index+1:] if '</s>' in sentence_real: index = sentence_real.index('</s>') sentence_real = sentence_real[: index] ground_truths.append(sentence_real) print("call the get_bleu method to calc bleu score.....") print("preds: ", preds[0]) print("ground_truths: ", ground_truths[0]) return get_bleu(preds, ground_truths)
batch_index = range(0, len(src['data']), batch_size) for i in xrange(1000): losses = [] #flag_continue = False shuffle(batch_index) # flag_continue = False for j in batch_index: #--------------------------------------------------------------- # get mini batch input_lines_src, _, lens_src, mask_src = get_minibatch(src['data'], src['word2id'], j, batch_size, max_length_src, is_gui=False, add_start=True, add_end=False) #if input_lines_src.size(0) != batch_size: #flag_continue = True #break input_lines_trg, output_lines_trg, lens_trg, mask_trg = get_minibatch( trg['data'], trg['word2id'], j, batch_size, max_length_trg, is_gui=False,
def evaluate_model( model, src, src_test, trg, trg_test, config, src_valid=None, trg_valid=None, verbose=True, metric='bleu' ): """Evaluate model.""" preds = [] ground_truths = [] for j in xrange(0, len(src_test['data']), config['data']['batch_size']): # Get source minibatch input_lines_src, output_lines_src, lens_src, mask_src = get_minibatch( src_test['data'], src['word2id'], j, config['data']['batch_size'], config['data']['max_src_length'], add_start=True, add_end=True ) # Get target minibatch input_lines_trg_gold, output_lines_trg_gold, lens_src, mask_src = ( get_minibatch( trg_test['data'], trg['word2id'], j, config['data']['batch_size'], config['data']['max_trg_length'], add_start=True, add_end=True ) ) # Initialize target with <s> for every sentence input_lines_trg = Variable(torch.LongTensor( [ [trg['word2id']['<s>']] for i in xrange(input_lines_src.size(0)) ] )) # Decode a minibatch greedily __TODO__ add beam search decoding input_lines_trg = decode_minibatch( config, model, input_lines_src, input_lines_trg, output_lines_trg_gold ) # Copy minibatch outputs to cpu and convert ids to words input_lines_trg = input_lines_trg.data.cpu().numpy() input_lines_trg = [ [trg['id2word'][x] for x in line] for line in input_lines_trg ] # Do the same for gold sentences output_lines_trg_gold = output_lines_trg_gold.data.cpu().numpy() output_lines_trg_gold = [ [trg['id2word'][x] for x in line] for line in output_lines_trg_gold ] # Process outputs for sentence_pred, sentence_real, sentence_real_src in zip( input_lines_trg, output_lines_trg_gold, output_lines_src ): if '</s>' in sentence_pred: index = sentence_pred.index('</s>') else: index = len(sentence_pred) preds.append(['<s>'] + sentence_pred[:index + 1]) if verbose: print ' '.join(['<s>'] + sentence_pred[:index + 1]) if '</s>' in sentence_real: index = sentence_real.index('</s>') else: index = len(sentence_real) if verbose: print ' '.join(['<s>'] + sentence_real[:index + 1]) if verbose: print '--------------------------------------' ground_truths.append(['<s>'] + sentence_real[:index + 1]) return get_bleu(preds, ground_truths)
def evaluate_model(model, src, src_test, trg, trg_test, config, src_valid=None, trg_valid=None, verbose=True, metric='bleu'): """Evaluate model.""" preds = [] ground_truths = [] for j in xrange(0, len(src_test['data']), config['data']['batch_size']): input_lines_src, output_lines_src, lens_src, mask_src = get_minibatch( src_test['data'], src['word2id'], j, config['data']['batch_size'], config['data']['max_src_length'], add_start=True, add_end=True) input_lines_trg_gold, output_lines_trg_gold, lens_src, mask_src = get_minibatch( trg_test['data'], trg['word2id'], j, config['data']['batch_size'], config['data']['max_src_length'], add_start=True, add_end=True) input_lines_trg = Variable( torch.LongTensor([[trg['word2id']['<s>']] for i in xrange(input_lines_src.size(0)) ])).cuda() for i in xrange(config['data']['max_src_length']): decoder_logit = model(input_lines_src, input_lines_trg) word_probs = model.decode(decoder_logit) decoder_argmax = word_probs.data.cpu().numpy().argmax(axis=-1) next_preds = Variable(torch.from_numpy(decoder_argmax[:, -1])).cuda() input_lines_trg = torch.cat( (input_lines_trg, next_preds.unsqueeze(1)), 1) input_lines_trg = input_lines_trg.data.cpu().numpy() input_lines_trg = [[trg['id2word'][x] for x in line] for line in input_lines_trg] output_lines_trg_gold = output_lines_trg_gold.data.cpu().numpy() output_lines_trg_gold = [[trg['id2word'][x] for x in line] for line in output_lines_trg_gold] for sentence_pred, sentence_real, sentence_real_src in zip( input_lines_trg, output_lines_trg_gold, output_lines_src): if '</s>' in sentence_pred: index = sentence_pred.index('</s>') else: index = len(sentence_pred) preds.append(['<s>'] + sentence_pred[:index + 1]) if verbose: print ' '.join(['<s>'] + sentence_pred[:index + 1]) if '</s>' in sentence_real: index = sentence_real.index('</s>') else: index = len(sentence_real) if verbose: print ' '.join(['<s>'] + sentence_real[:index + 1]) if verbose: print '--------------------------------------' ground_truths.append(['<s>'] + sentence_real[:index + 1]) if '</s>' in sentence_real_src: index = sentence_real_src.index('</s>') else: index = len(sentence_real_src) return get_bleu(preds, ground_truths)
lr = config['training']['lrate'] optimizer = optim.Adam(model.parameters(), lr=lr) elif config['training']['optimizer'] == 'adadelta': optimizer = optim.Adadelta(model.parameters()) elif config['training']['optimizer'] == 'sgd': lr = config['training']['lrate'] optimizer = optim.SGD(model.parameters(), lr=lr) else: raise NotImplementedError("Learning method not recommend for task") for i in range(1000): losses = [] for j in range(0, len(src['data']), batch_size): input_lines_src, _, lens_src, mask_src = get_minibatch( src['data'], src['word2id'], j, batch_size, max_length_src, add_start=True, add_end=False ) input_lines_trg, output_lines_trg, lens_trg, mask_trg = get_minibatch( trg['data'], trg['word2id'], j, batch_size, max_length_trg, add_start=True, add_end=True ) decoder_logit = model(input_lines_src, input_lines_trg) optimizer.zero_grad() loss = loss_criterion( decoder_logit.contiguous().view(-1, vocab_size), output_lines_trg.view(-1) ) losses.append(loss.data[0])
""" optimizer = optim.Adam(model.parameters(), lr=config['training']['lrate']) epoch = 1000 for i in range(epoch): losses = [] for j in range(0, len(src_train['data']), batch_size): slots = src_train['data'][j][1:] shuffle(slots) src_train['data'][j][1:] = slots input_lines_src, output_lines_src, lens_src, mask_src = get_minibatch(src_train['data'], src_train['word2id'], j, batch_size, config['data']['max_src_length']) input_lines_trg, output_lines_trg, lens_trg, mask_trg = get_minibatch(trg_train['data'], trg_train['word2id'], j, batch_size, config['data']['max_trg_length']) decoder_logit = model(input_lines_src, input_lines_trg) optimizer.zero_grad() loss = loss_criterion( decoder_logit.contiguous().view(-1, trg_vocab_size), output_lines_trg.view(-1) ) losses.append(loss.data[0]) loss.backward() optimizer.step()