if lemma not in lemma_to_ix: lemma_to_ix[lemma] = len(lemma_to_ix) ix_to_lemma.append(lemma) ############################################# ## tags tags_info = Tag(tag_info_file, ix_to_lemma) SOS = tags_info.SOS EOS = tags_info.EOS mask_pool = StructuredMask(tags_info) ############################################## ## #mask_info = Mask(tags) ############################################# pretrain_to_ix = {UNK: 0} pretrain_embeddings = [[0. for i in range(100)]] # for UNK pretrain_data = readpretrain(pretrain_file) for one in pretrain_data: pretrain_to_ix[one[0]] = len(pretrain_to_ix) pretrain_embeddings.append([float(a) for a in one[1:]]) print "pretrain dict size:", len(pretrain_to_ix) dev_data = readfile(dev_file) tst_data = readfile(tst_file) print "word dict size: ", len(word_to_ix) print "lemma dict size: ", len(lemma_to_ix) print "global tag dict size: ", tags_info.tag_size WORD_EMBEDDING_DIM = 64 PRETRAIN_EMBEDDING_DIM = 100 LEMMA_EMBEDDING_DIM = 32
def load_model(self, data_dir): pretrain_file = data_dir+"/sskip.100.vectors" tag_info_file = data_dir+"/tag.info" word_list_file = data_dir+"/word.list" lemma_list_file = data_dir+"/lemma.list" model_file = data_dir+"/model" UNK = "<UNK>" self.word_to_ix = {} self.lemma_to_ix = {} self.ix_to_lemma = [] ix_to_word = [] for line in open(word_list_file): line = line.strip() self.word_to_ix[line] = len(ix_to_word) ix_to_word.append(line) for line in open(lemma_list_file): line = line.strip() self.lemma_to_ix[line] = len(self.ix_to_lemma) self.ix_to_lemma.append(line) ############################################# ## tags self.tags_info = Tag(tag_info_file, self.ix_to_lemma) SOS = self.tags_info.SOS EOS = self.tags_info.EOS self.outer_mask_pool = OuterMask(self.tags_info) self.rel_mask_pool = RelationMask(self.tags_info) self.var_mask_pool = VariableMask(self.tags_info) ############################################## ## #mask_info = Mask(tags) ############################################# self.pretrain_to_ix = {UNK:0} self.pretrain_embeddings = [ [0. for i in range(100)] ] # for UNK pretrain_data = readpretrain(pretrain_file) for one in pretrain_data: self.pretrain_to_ix[one[0]] = len(self.pretrain_to_ix) self.pretrain_embeddings.append([float(a) for a in one[1:]]) print "pretrain dict size:", len(self.pretrain_to_ix) print "word dict size: ", len(self.word_to_ix) print "lemma dict size: ", len(self.lemma_to_ix) print "global tag (w/o variables) dict size: ", self.tags_info.k_rel_start print "global tag (w variables) dict size: ", self.tags_info.tag_size WORD_EMBEDDING_DIM = 64 PRETRAIN_EMBEDDING_DIM = 100 LEMMA_EMBEDDING_DIM = 32 TAG_DIM = 128 INPUT_DIM = 100 ENCODER_HIDDEN_DIM = 256 DECODER_INPUT_DIM = 128 ATTENTION_HIDDEN_DIM = 256 self.encoder = EncoderRNN(len(self.word_to_ix), WORD_EMBEDDING_DIM, len(self.pretrain_to_ix), PRETRAIN_EMBEDDING_DIM, torch.FloatTensor(self.pretrain_embeddings), len(self.lemma_to_ix), LEMMA_EMBEDDING_DIM, INPUT_DIM, ENCODER_HIDDEN_DIM, n_layers=2, dropout_p=0.1) self.decoder = AttnDecoderRNN(self.outer_mask_pool, self.rel_mask_pool, self.var_mask_pool, self.tags_info, TAG_DIM, DECODER_INPUT_DIM, ENCODER_HIDDEN_DIM, ATTENTION_HIDDEN_DIM, n_layers=1, dropout_p=0.1) check_point = torch.load(model_file) self.encoder.load_state_dict(check_point["encoder"]) self.decoder.load_state_dict(check_point["decoder"]) if use_cuda: self.encoder = self.encoder.cuda() self.decoder = self.decoder.cuda() print "GPU", use_cuda return