def initialize(self, emb_path, vocab_path): L.info('Initializing lookup table') vm = VocabManager(vocab_path) w2v = W2VEmbReader(emb_path) U.xassert(w2v.get_emb_dim() == self.emb_matrix.shape[1], 'The embeddings dimension does not match with the given word embeddings') for i in range(self.emb_matrix.shape[0]): vec = w2v.get_emb_given_word(vm.get_word_given_id(i)) if vec: self.emb_matrix[i] = vec
def initialize(self, emb_path, vocab_path): L.info('Initializing lookup table') vm = VocabManager(vocab_path) w2v = W2VEmbReader(emb_path) U.xassert( w2v.get_emb_dim() == self.emb_matrix.shape[1], 'The embeddings dimension does not match with the given word embeddings' ) for i in range(self.emb_matrix.shape[0]): vec = w2v.get_emb_given_word(vm.get_word_given_id(i)) if vec: self.emb_matrix[i] = vec
# from dlm.io.ngramsReader import NgramsReader from dlm.io.vocabReader import VocabManager testset = NgramsReader(dataset_path=args.input_path, ngram_size=classifier.ngram_size, vocab_path=args.vocab_path) vocab = VocabManager(args.vocab_path) ## Loading restricted vocab restricted_ids = [] restricted_vocab = [] if args.restricted_vocab_path: with open(args.restricted_vocab_path) as restricted_vocab_file: for line in restricted_vocab_file: restricted_vocab.append(line.strip()) restricted_ids = vocab.get_ids_given_word_list(restricted_vocab) ######################### ## Compiling theano function # evaluator = eval.Evaluator(testset, classifier) if args.output_path: with open(args.output_path, "w") as output: for i in xrange(testset._get_num_samples()): out = evaluator.get_class(i, restricted_ids) output.write(vocab.get_word_given_id(out) + "\n")
# from dlm.io.ngramsReader import NgramsReader from dlm.io.vocabReader import VocabManager testset = NgramsReader(dataset_path=args.input_path, ngram_size=classifier.ngram_size, vocab_path=args.vocab_path) vocab = VocabManager(args.vocab_path) ## Loading restricted vocab restricted_ids = [] restricted_vocab = [] if args.restricted_vocab_path: with open(args.restricted_vocab_path) as restricted_vocab_file: for line in restricted_vocab_file: restricted_vocab.append(line.strip()) restricted_ids = vocab.get_ids_given_word_list(restricted_vocab) ######################### ## Compiling theano function # evaluator = eval.Evaluator(testset, classifier) if args.output_path: with open(args.output_path, 'w') as output: for i in xrange(testset._get_num_samples()): out = evaluator.get_class(i, restricted_ids) output.write(vocab.get_word_given_id(out) + '\n')