net.eval() # load parse generator network parse_args = parse_model['config_args'] parse_net = ParseNet(parse_args.d_nt, parse_args.d_hid, len(parse_gen_voc)) if args.gpu >= 0: parse_net.cuda() parse_net.load_state_dict(parse_model['state_dict']) parse_net.eval() # encode templates template_lens = [len(x.split()) for x in templates] np_templates = np.zeros((len(templates), max(template_lens)), dtype='int32') for z, template in enumerate(templates): np_templates[z, :template_lens[z]] = [parse_gen_voc[w] for w in templates[z].split()] if args.gpu >= 0: tp_templates = Variable(torch.from_numpy(np_templates).long().cuda()) tp_template_lens = torch.from_numpy(np.array(template_lens, dtype='int32')).long().cuda() else: tp_templates = Variable(torch.from_numpy(np_templates).long()) tp_template_lens = torch.from_numpy(np.array(template_lens, dtype='int32')).long() # instantiate BPE segmenter bpe_codes = codecs.open(args.bpe_codes, encoding='utf-8') bpe_vocab = codecs.open(args.bpe_vocab, encoding='utf-8') bpe_vocab = read_vocabulary(bpe_vocab, args.bpe_vocab_thresh) bpe = BPE(bpe_codes, '@@', bpe_vocab, None) # paraphrase the sst! encode_data(out_file=args.out_file)
parse_net.eval() # encode templates template_lens = [len(x.split()) for x in templates] np_templates = np.zeros((len(templates), max(template_lens)), dtype='int32') for z, template in enumerate(templates): np_templates[z, :template_lens[z]] = [ parse_gen_voc[w] for w in templates[z].split() ] tp_templates = Variable(torch.from_numpy(np_templates).long().cuda()) tp_template_lens = torch.from_numpy(np.array(template_lens, dtype='int32')).long().cuda() # instantiate BPE segmenter bpe_codes = codecs.open(bpe_codes, encoding='utf-8') bpe_vocab = codecs.open(bpe_vocab, encoding='utf-8') bpe_vocab = read_vocabulary(bpe_vocab, bpe_vocab_thresh) bpe = BPE(bpe_codes, '@@', bpe_vocab, None) def reverse_bpe(sent): x = [] cache = '' for w in sent: if w.endswith('@@'): cache += w.replace('@@', '') elif cache != '': x.append(cache + w) cache = '' else: x.append(w)