if lemma not in lemma_to_ix:
            lemma_to_ix[lemma] = len(lemma_to_ix)
            ix_to_lemma.append(lemma)
#############################################
## tags
tags_info = Tag(tag_info_file, ix_to_lemma)
SOS = tags_info.SOS
EOS = tags_info.EOS
mask_pool = StructuredMask(tags_info)
##############################################
##
#mask_info = Mask(tags)
#############################################
pretrain_to_ix = {UNK: 0}
pretrain_embeddings = [[0. for i in range(100)]]  # for UNK
pretrain_data = readpretrain(pretrain_file)
for one in pretrain_data:
    pretrain_to_ix[one[0]] = len(pretrain_to_ix)
    pretrain_embeddings.append([float(a) for a in one[1:]])
print "pretrain dict size:", len(pretrain_to_ix)

dev_data = readfile(dev_file)
tst_data = readfile(tst_file)

print "word dict size: ", len(word_to_ix)
print "lemma dict size: ", len(lemma_to_ix)
print "global tag dict size: ", tags_info.tag_size

WORD_EMBEDDING_DIM = 64
PRETRAIN_EMBEDDING_DIM = 100
LEMMA_EMBEDDING_DIM = 32
Exemple #2
0
    def load_model(self, data_dir):

        pretrain_file = data_dir+"/sskip.100.vectors"
        tag_info_file = data_dir+"/tag.info"
        word_list_file = data_dir+"/word.list"
        lemma_list_file = data_dir+"/lemma.list"
        model_file = data_dir+"/model"
        UNK = "<UNK>"

        self.word_to_ix = {}
        self.lemma_to_ix = {}
        self.ix_to_lemma = []
        ix_to_word = []

        for line in open(word_list_file):
	    line = line.strip()
	    self.word_to_ix[line] = len(ix_to_word)
	    ix_to_word.append(line)

        for line in open(lemma_list_file):
	    line = line.strip()
	    self.lemma_to_ix[line] = len(self.ix_to_lemma)
	    self.ix_to_lemma.append(line)


#############################################
## tags
        self.tags_info = Tag(tag_info_file, self.ix_to_lemma)
        SOS = self.tags_info.SOS
        EOS = self.tags_info.EOS
        self.outer_mask_pool = OuterMask(self.tags_info)
        self.rel_mask_pool = RelationMask(self.tags_info)
        self.var_mask_pool = VariableMask(self.tags_info)
##############################################
##
#mask_info = Mask(tags)
#############################################
        self.pretrain_to_ix = {UNK:0}
        self.pretrain_embeddings = [ [0. for i in range(100)] ] # for UNK 
        pretrain_data = readpretrain(pretrain_file)
        for one in pretrain_data:
            self.pretrain_to_ix[one[0]] = len(self.pretrain_to_ix)
            self.pretrain_embeddings.append([float(a) for a in one[1:]])
    
	print "pretrain dict size:", len(self.pretrain_to_ix)


    	print "word dict size: ", len(self.word_to_ix)
    	print "lemma dict size: ", len(self.lemma_to_ix)
    	print "global tag (w/o variables) dict size: ", self.tags_info.k_rel_start
    	print "global tag (w variables) dict size: ", self.tags_info.tag_size

    	WORD_EMBEDDING_DIM = 64
    	PRETRAIN_EMBEDDING_DIM = 100
    	LEMMA_EMBEDDING_DIM = 32
    	TAG_DIM = 128
    	INPUT_DIM = 100
    	ENCODER_HIDDEN_DIM = 256
    	DECODER_INPUT_DIM = 128
    	ATTENTION_HIDDEN_DIM = 256

    	self.encoder = EncoderRNN(len(self.word_to_ix), WORD_EMBEDDING_DIM, len(self.pretrain_to_ix), PRETRAIN_EMBEDDING_DIM, torch.FloatTensor(self.pretrain_embeddings), len(self.lemma_to_ix), LEMMA_EMBEDDING_DIM, INPUT_DIM, ENCODER_HIDDEN_DIM, n_layers=2, dropout_p=0.1)
    	self.decoder = AttnDecoderRNN(self.outer_mask_pool, self.rel_mask_pool, self.var_mask_pool, self.tags_info, TAG_DIM, DECODER_INPUT_DIM, ENCODER_HIDDEN_DIM, ATTENTION_HIDDEN_DIM, n_layers=1, dropout_p=0.1)
    
    	check_point = torch.load(model_file)
    	self.encoder.load_state_dict(check_point["encoder"])
    	self.decoder.load_state_dict(check_point["decoder"])
    	if use_cuda:
	    self.encoder = self.encoder.cuda()
            self.decoder = self.decoder.cuda()

    	print "GPU", use_cuda

    	return