def load_data(self, debug=False): self.vocab = Vocab() self.vocab.construct(get_ptb_dataset('train')) self.encoded_train = np.array([self.vocab.encode(word) for word in get_ptb_dataset('train')],dtype=np.int32) self.encoded_test = np.array([self.vocab.encode(word) for word in get_ptb_dataset('test')],dtype=np.int32) if debug: num_debug = 1024 self.encoded_train = self.encoded_train[:num_debug] self.encoded_valid = self.encoded_valid[:num_debug] self.encoded_test = self.encoded_test[:num_debug]
def load_data(self, debug=False): """Loads starter word-vectors and train/dev/test data.""" self.vocab = Vocab() self.vocab.construct(get_ptb_dataset('train')) self.encoded_train = np.array( [self.vocab.encode(word) for word in get_ptb_dataset('train')], dtype=np.int32) self.encoded_valid = np.array( [self.vocab.encode(word) for word in get_ptb_dataset('valid')], dtype=np.int32) self.encoded_test = np.array( [self.vocab.encode(word) for word in get_ptb_dataset('test')], dtype=np.int32) if debug: num_debug = 1024 self.encoded_train = self.encoded_train[:num_debug] self.encoded_valid = self.encoded_valid[:num_debug] self.encoded_test = self.encoded_test[:num_debug]
def load_data(self, debug=False): """Loads starter word-vectors and train/dev/test data.""" self.vocab = Vocab() self.vocab.construct(get_ptb_dataset('train')) self.encoded_train = np.array( [self.vocab.encode(word) for word in get_ptb_dataset('train')], dtype=np.int32) self.encoded_valid = np.array( [self.vocab.encode(word) for word in get_ptb_dataset('valid')], dtype=np.int32) self.encoded_test = np.array( [self.vocab.encode(word) for word in get_ptb_dataset('test')], dtype=np.int32) if debug: num_debug = 1024 self.encoded_train = self.encoded_train[:num_debug] self.encoded_valid = self.encoded_valid[:num_debug] self.encoded_test = self.encoded_test[:num_debug]
def load_data(debug=False): """Loads starter word-vectors and train/dev/test data.""" vocab = Vocab() vocab.construct(get_ptb_dataset('train')) encoded_train = np.array( [vocab.encode(word) for word in get_ptb_dataset('train')], dtype=np.int32) encoded_valid = np.array( [vocab.encode(word) for word in get_ptb_dataset('valid')], dtype=np.int32) encoded_test = np.array( [vocab.encode(word) for word in get_ptb_dataset('test')], dtype=np.int32) if debug: num_debug = 1024 encoded_train = encoded_train[:num_debug] encoded_valid = encoded_valid[:num_debug] encoded_test = encoded_test[:num_debug] return encoded_train, encoded_valid, encoded_test, vocab
def load_data(self, debug=False): """Loads starter word-vectors and train/dev/test data.""" self.vocab = Vocab() self.vocab.construct(get_ptb_dataset('train')) self.encoded_train = np.array( [self.vocab.encode(word) for word in get_ptb_dataset('train')], dtype=np.int32) self.encoded_valid = np.array( [self.vocab.encode(word) for word in get_ptb_dataset('valid')], dtype=np.int32) self.encoded_test = np.array( [self.vocab.encode(word) for word in get_ptb_dataset('test')], dtype=np.int32) if debug: num_debug = 1024 self.encoded_train = self.encoded_train[:num_debug] self.encoded_valid = self.encoded_valid[:num_debug] self.encoded_test = self.encoded_test[:num_debug] # Load word vectors all_embeddings = np.loadtxt("data/ner/wordVectors.txt") all_words = np.genfromtxt("data/ner/vocab.txt",dtype='str') # L is the embedding matrix L = np.zeros((len(self.vocab), self.config.embed_size)) m = 0 for i in range(len(self.vocab)): word = self.vocab.index_to_word[i] index = np.where(all_words == word) if index[0].shape[0] == 0: m += 1 L[i,:] = all_embeddings[0,:] else: index = np.asscalar(index[0]) L[i,:] = all_embeddings[index,:] self.L = tf.constant(L, dtype = tf.float32)
import sys import time import numpy as np from copy import deepcopy from utils import calculate_perplexity, get_ptb_dataset, Vocab from utils import ptb_iterator, sample from model import LanguageModel import tensorflow as tf from tensorflow.contrib.seq2seq import sequence_loss debug = True """Loads starter word-vectors and train/dev/test data.""" vocab = Vocab() vocab.construct(get_ptb_dataset('train')) encoded_train = np.array( [vocab.encode(word) for word in get_ptb_dataset('train')], dtype=np.int32) encoded_valid = np.array( [vocab.encode(word) for word in get_ptb_dataset('valid')], dtype=np.int32) encoded_test = np.array( [vocab.encode(word) for word in get_ptb_dataset('test')], dtype=np.int32) if debug: num_debug = 1024 encoded_train = encoded_train[:num_debug] encoded_valid = encoded_valid[:num_debug] encoded_test = encoded_test[:num_debug] print '****** LOADED DATA' '''**********************************************************************************************************''' # Hyper Parameters
import sys import time import numpy as np from copy import deepcopy from utils import calculate_perplexity, get_ptb_dataset, Vocab from utils import ptb_iterator, sample from model import LanguageModel import tensorflow as tf from tensorflow.contrib.seq2seq import sequence_loss test_on_ptb = True generate_fun = True debug = True """Loads starter word-vectors and train/dev/test data.""" vocab = Vocab() vocab.construct(get_ptb_dataset('train')) encoded_train = np.array([vocab.encode(word) for word in get_ptb_dataset('train')],dtype=np.int32) encoded_valid = np.array([vocab.encode(word) for word in get_ptb_dataset('valid')],dtype=np.int32) encoded_test = np.array([vocab.encode(word) for word in get_ptb_dataset('test')],dtype=np.int32) if debug: num_debug = 1024 encoded_train = encoded_train[:num_debug] encoded_valid = encoded_valid[:num_debug] encoded_test = encoded_test[:num_debug] print '****** LOADED DATA' lr=0.01 batch_size = 1 embed_size = 50 hidden_size = 100