예제 #1
0
파일: lookuptable.py 프로젝트: tamhd/corelm
	def initialize(self, emb_path, vocab_path):
		L.info('Initializing lookup table')
		vm = VocabManager(vocab_path)
		w2v = W2VEmbReader(emb_path)
		U.xassert(w2v.get_emb_dim() == self.emb_matrix.shape[1], 'The embeddings dimension does not match with the given word embeddings')
		for i in range(self.emb_matrix.shape[0]):
			vec = w2v.get_emb_given_word(vm.get_word_given_id(i))
			if vec:
				self.emb_matrix[i] = vec
예제 #2
0
 def initialize(self, emb_path, vocab_path):
     L.info('Initializing lookup table')
     vm = VocabManager(vocab_path)
     w2v = W2VEmbReader(emb_path)
     U.xassert(
         w2v.get_emb_dim() == self.emb_matrix.shape[1],
         'The embeddings dimension does not match with the given word embeddings'
     )
     for i in range(self.emb_matrix.shape[0]):
         vec = w2v.get_emb_given_word(vm.get_word_given_id(i))
         if vec:
             self.emb_matrix[i] = vec
예제 #3
0
#

from dlm.io.ngramsReader import NgramsReader
from dlm.io.vocabReader import VocabManager

testset = NgramsReader(dataset_path=args.input_path, ngram_size=classifier.ngram_size, vocab_path=args.vocab_path)
vocab = VocabManager(args.vocab_path)

## Loading restricted vocab
restricted_ids = []
restricted_vocab = []
if args.restricted_vocab_path:
    with open(args.restricted_vocab_path) as restricted_vocab_file:
        for line in restricted_vocab_file:
            restricted_vocab.append(line.strip())
    restricted_ids = vocab.get_ids_given_word_list(restricted_vocab)


#########################
## Compiling theano function
#

evaluator = eval.Evaluator(testset, classifier)


if args.output_path:
    with open(args.output_path, "w") as output:
        for i in xrange(testset._get_num_samples()):
            out = evaluator.get_class(i, restricted_ids)
            output.write(vocab.get_word_given_id(out) + "\n")
예제 #4
0
#

from dlm.io.ngramsReader import NgramsReader
from dlm.io.vocabReader import VocabManager

testset = NgramsReader(dataset_path=args.input_path,
                       ngram_size=classifier.ngram_size,
                       vocab_path=args.vocab_path)
vocab = VocabManager(args.vocab_path)

## Loading restricted vocab
restricted_ids = []
restricted_vocab = []
if args.restricted_vocab_path:
    with open(args.restricted_vocab_path) as restricted_vocab_file:
        for line in restricted_vocab_file:
            restricted_vocab.append(line.strip())
    restricted_ids = vocab.get_ids_given_word_list(restricted_vocab)

#########################
## Compiling theano function
#

evaluator = eval.Evaluator(testset, classifier)

if args.output_path:
    with open(args.output_path, 'w') as output:
        for i in xrange(testset._get_num_samples()):
            out = evaluator.get_class(i, restricted_ids)
            output.write(vocab.get_word_given_id(out) + '\n')