def __init__(self, d, V): # d + V are inherited from textlayer # Defined embeddings self.d = d self.V = V self.A = glorot_uniform((d, V)) self.C = glorot_uniform((d, V)) self.params = [self.A, self.C] self.updates = []
def positional_encoding(self, xi): """We therefore propose a second representation that encodes the position of words within the sentence. This takes the form: mi =SUMjlj * Axij , where * is an element-wise multiplication. lj is a column vector with the structure lkj = (1 − j/J) − (k/d)(1 − 2j/J) (assuming 1-based indexing), with J being the number of words in the sentence, and d is the dimension of the embedding. This sentence representation, which we call position encoding (PE), means that the order of the words now affects mi. The same representation is used for questions, memory inputs and memory outputs. """ if type(xi) == np.ndarray or type(xi) == list: # if it's L = glorot_uniform((len(xi), self.d)) J = [len(sentence) for sentence in xi] lj = (-L.T / J).T - (L / self.d) * (-2 * L.T).T return lj else: L = glorot_uniform((xi.shape[0], self.d)) J = [sentence.shape[0] for sentence in xi] lj = (-L.T / J).T - (L / self.d) * (-2 * L.T).T return lj
def positional_encoding(self, xi): """We therefore propose a second representation that encodes the position of words within the sentence. This takes the form: mi =SUMjlj * Axij , where * is an element-wise multiplication. lj is a column vector with the structure lkj = (1 − j/J) − (k/d)(1 − 2j/J) (assuming 1-based indexing), with J being the number of words in the sentence, and d is the dimension of the embedding. This sentence representation, which we call position encoding (PE), means that the order of the words now affects mi. The same representation is used for questions, memory inputs and memory outputs. """ if type(xi)==np.ndarray or type(xi)==list: # if it's L = glorot_uniform((len(xi), self.d)) J = [len(sentence) for sentence in xi] lj = (-L.T / J ).T - ( L / self.d ) * (-2 * L.T).T return lj else: L = glorot_uniform((xi.shape[0], self.d)) J = [sentence.shape[0] for sentence in xi] lj = (-L.T / J ).T - ( L / self.d ) * (-2 * L.T).T return lj
def __init__(self, name, latent_dim, depth, k, hidden_dim, exploration_probability, exploration_decay_rate): """ z = input (n, latent_dim) o = hidden representation (n, depth, hidden_dim) x = output (n,depth) (int) h = hidden input representation z*W o*U x*V """ self.latent_dim = latent_dim self.depth = depth self.k = k self.hidden_dim = hidden_dim # z = T.fmatrix("z") # input latent samples (n, latent_dim) self.exploration_probability = theano.shared(np.float32(exploration_probability), "{}_exploration_probability".format(name)) self.exploration_decay_rate = np.float32(exploration_decay_rate) # Hidden representation self.W_h = glorot_uniform((latent_dim, hidden_dim), "{}_W_h".format(name)) # z, (latent_dim, hidden_dim) self.U_h = glorot_uniform((hidden_dim, hidden_dim), "{}_U_h".format(name)) # h, (hidden_dim, hidden_dim) self.V_h = glorot_uniform((k + 2, hidden_dim), "{}_V_h".format(name)) # x, (x_k+2, hidden_dim) self.b_h = zero((hidden_dim,), "{}_b_h".format(name)) # (hidden_dim,) # Forget gate self.W_f = glorot_uniform((hidden_dim, hidden_dim), "{}_W_f".format(name)) # z, (latent_dim, hidden_dim) self.b_f = zero((hidden_dim,), "{}_b_f".format(name)) # (hidden_dim,) # Input gate self.W_i = glorot_uniform((hidden_dim, hidden_dim), "{}_W_i".format(name)) # z, (latent_dim, hidden_dim) self.b_i = zero((hidden_dim,), "{}_b_i".format(name)) # (hidden_dim,) # Write gate self.W_w = glorot_uniform((hidden_dim, hidden_dim), "{}_W_w".format(name)) # z, (latent_dim, hidden_dim) self.b_w = zero((hidden_dim,), "{}_b_w".format(name)) # (hidden_dim,) # Output self.W_o = glorot_uniform((hidden_dim, hidden_dim), "{}_W_i".format(name)) # z, (latent_dim, hidden_dim) self.b_o = zero((hidden_dim,), "{}_b_i".format(name)) # (hidden_dim,) # Hidden state self.W_j = glorot_uniform((hidden_dim, hidden_dim), "{}_W_j".format(name)) # z, (latent_dim, hidden_dim) self.b_j = zero((hidden_dim,), "{}_b_j".format(name)) # (hidden_dim,) # Value predictions self.W_v = glorot_uniform((hidden_dim, k + 1), "{}_W_v".format(name)) # z, (latent_dim, hidden_dim) self.b_v = zero((k + 1,), "{}_b_v".format(name)) # (hidden_dim,) self.params = [self.W_h, self.U_h, self.V_h, self.b_h, self.W_f, self.b_f, self.W_i, self.b_i, self.W_w, self.b_w, self.W_o, self.b_o, self.W_j, self.b_j, self.W_v, self.b_v]
def __init__(self, y_vocab, dim_word, dim, dim_ctx): self.y_vocab = y_vocab # 430 self.dim_word = dim_word # 1024 self.dim = dim # 512 self.dim_ctx = dim_ctx # 512 self.emb_dim = 512 ### image Embedding self.W_img_emb = initializations.glorot_uniform((self.dim_ctx, self.emb_dim)) self.b_img_emb = initializations.zero((self.emb_dim)) self.W_fr_emb = initializations.glorot_uniform((self.dim_word, self.emb_dim)) self.b_fr_emb = initializations.zero((self.emb_dim)) ### enc forward GRU ### self.W_gru = initializations.glorot_uniform((self.emb_dim, self.dim * 2)) self.U_gru = initializations.glorot_uniform((self.dim, self.dim * 2)) self.b_gru = initializations.zero((self.dim * 2)) self.W_gru_cdd = initializations.glorot_uniform((self.emb_dim, self.dim)) # cdd : candidate self.U_gru_cdd = initializations.glorot_uniform((self.dim, self.dim)) self.b_gru_cdd = initializations.zero((self.dim)) ### prediction ### self.W_pred = initializations.glorot_uniform((self.dim * 2, self.y_vocab)) self.b_pred = initializations.zero((self.y_vocab)) self.params = [self.W_img_emb, self.W_fr_emb, self.b_img_emb, self.b_fr_emb, self.W_gru, self.U_gru, self.b_gru, self.W_gru_cdd, self.U_gru_cdd, self.b_gru_cdd, self.W_pred, self.b_pred]
def get_weights(shape, token2idx, token2embedding): weights = glorot_uniform(shape).get_value() if token2embedding is not None: vocabulary, tokens = set(token2idx.keys()), set(token2embedding.keys()) for token_to_initialize in vocabulary.intersection(tokens): idx = token2idx[token_to_initialize] if idx < weights.shape[0]: weights[idx, :] = token2embedding[token_to_initialize] return weights
def unitary_ASB2016_init(shape, name=None): assert shape[0] == shape[1] N = shape[1] theta = initializations.uniform((3, N), scale=np.pi, name='{}_theta'.format(name)) reflection = initializations.glorot_uniform( (2, 2 * N), name='{}_reflection'.format(name)) idxperm = np.random.permutation(N) idxpermaug = np.concatenate((idxperm, N + idxperm)) Iaug = augLeft(np.concatenate((np.eye(N), np.zeros((N, N))), axis=0), module=np).astype(np.float32) Uaug = times_unitary_ASB2016(Iaug, N, [theta, reflection, idxpermaug]) return Uaug, theta, reflection, idxpermaug
def from_vocab(igor, vocab): print("using vocab and glove file to generate embedding matrix") remaining_vocab = set(vocab.keys()) embeddings = np.zeros((len(vocab), igor.embedding_size)) print("{} words to convert".format(len(remaining_vocab))) if igor.save_dir[-1] != "/": igor.save_dir += "/" if not path.exists(igor.save_dir): makedirs(igor.save_dir) if igor.from_url: assert hasattr(glove_urls, igor.target_glove), "You need to specify one of the glove variables" url = urlopen(getattr(glove_urls, igor.target_glove)) fileiter = ZipFile(StringIO(url.read())).open(file).readlines() else: assert os.path.exists(igor.target_glove), "You need to specify a real file" fileiter = open(igor.target_glove).readlines() count=0 for line in tqdm(fileiter): line = line.replace("\n","").split(" ") try: word, nums = line[0], [float(x.strip()) for x in line[1:]] if word in remaining_vocab: embeddings[vocab[word]] = np.array(nums) remaining_vocab.remove(word) except Exception as e: print("{} broke. exception: {}. line: {}.".format(word, e, x)) count+=1 print("{} words were not in glove; saving to oov.txt".format(len(remaining_vocab))) with open(path.join(igor.save_dir, "oov.txt"), "w") as fp: fp.write("\n".join(remaining_vocab)) for word in tqdm(remaining_vocab): embeddings[vocab[word]] = np.asarray(glorot_uniform((igor.embedding_size,)).eval()) vocab.save('embedding.vocab') with open(path.join(igor.save_dir, "embedding.npy"), "wb") as fp: np.save(fp, embeddings)
def __init__(self, test_file, train_file, d = 30): # houses embedding B. # deals with text # d = 30 to limit size of vectors # potentially add word2vec vectors self.d = d self.train_lines, self.test_lines = self.get_lines(train_file), self.get_lines(test_file) lines = np.concatenate([self.train_lines, self.test_lines], axis=0) self.vectorizer = CountVectorizer(lowercase=False) self.vectorizer.fit([x['text'] + ' ' + x['answer'] if 'answer' in x else x['text'] for x in lines]) self.analyze = self.vectorizer.build_analyzer() self.V = len(self.vectorizer.vocabulary_) #self.B = theano.shared((np.random.uniform(-0.1, 0.1,(self.d, self.V))).astype(np.float32)) self.B = glorot_uniform((self.d, self.V)) self.params = [ self.B ] self.updates = []
def __init__(self, name, depth, k, hidden_dim): self.depth = depth self.k = k self.hidden_dim = hidden_dim # Hidden representation self.W_h = glorot_uniform((hidden_dim, hidden_dim), "{}_W_h".format(name)) # h, (hidden_dim, hidden_dim) self.U_h = glorot_uniform((k + 1, hidden_dim), "{}_U_h".format(name)) # x, (k+1, hidden_dim) self.b_h = zero((hidden_dim,), "{}_b_h".format(name)) # (hidden_dim,) # Forget gate self.W_f = glorot_uniform((hidden_dim, hidden_dim), "{}_W_f".format(name)) # z, (latent_dim, hidden_dim) self.b_f = zero((hidden_dim,), "{}_b_f".format(name)) # (hidden_dim,) # Input gate self.W_i = glorot_uniform((hidden_dim, hidden_dim), "{}_W_i".format(name)) # z, (latent_dim, hidden_dim) self.b_i = zero((hidden_dim,), "{}_b_i".format(name)) # (hidden_dim,) # Write gate self.W_w = glorot_uniform((hidden_dim, hidden_dim), "{}_W_w".format(name)) # z, (latent_dim, hidden_dim) self.b_w = zero((hidden_dim,), "{}_b_w".format(name)) # (hidden_dim,) # Output self.W_o = glorot_uniform((hidden_dim, hidden_dim), "{}_W_i".format(name)) # z, (latent_dim, hidden_dim) self.b_o = zero((hidden_dim,), "{}_b_i".format(name)) # (hidden_dim,) # Hidden state self.W_j = glorot_uniform((hidden_dim, hidden_dim), "{}_W_j".format(name)) # z, (latent_dim, hidden_dim) self.b_j = zero((hidden_dim,), "{}_b_j".format(name)) # (hidden_dim,) # y predictions self.W_y = glorot_uniform((hidden_dim, 1), "{}_W_y".format(name)) # z, (latent_dim, hidden_dim) self.b_y = zero((1,), "{}_b_y".format(name)) # (hidden_dim,) self.clip_params = [self.W_h, self.U_h, self.W_f, self.W_i, self.W_w, self.W_o, self.W_j, self.W_y] self.params = [self.W_h, self.U_h, self.b_h, self.W_f, self.b_f, self.W_i, self.b_i, self.W_w, self.b_w, self.W_o, self.b_o, self.W_j, self.b_j, self.W_y, self.b_y]
def make_hash_embeddings(igor, vocab): assert os.path.exists(igor.target_glove), "You need to specify a real file" fileiter = open(igor.target_glove).readlines() hash_vocab = Vocabulary() hash_vocab.use_mask = True hash_vocab.add(hash_vocab.mask_symbol) hash_vocab.add(hash_vocab.unk_symbol) word2hash = {} for word, v_id in vocab.items(): ids = hash_vocab.add_many(hash_word(word)) word2hash[v_id] = ids embeddings = np.zeros((len(hash_vocab), igor.embedding_size)) remaining_vocab = set(vocab.keys()) remaining_hashes = set(hash_vocab.values()) for line in tqdm(fileiter): line = line.replace("\n","").split(" ") word, nums = line[0], [float(x.strip()) for x in line[1:]] word_hash = hash_word(word) if word in remaining_vocab: hash_ids = word2hash[vocab[word]] remaining_vocab.remove(word) remaining_hashes.difference_update(hash_ids) embeddings[hash_ids] += np.array(nums) / len(hash_ids) print("{} words were not seen. {} hashes were not seen".format(len(remaining_vocab), len(remaining_hashes))) for hash_id in remaining_hashes: embeddings[hash_id] = np.asarray(glorot_uniform((igor.embedding_size,)).eval()) glove_name = igor.target_glove[igor.target_glove.find("glove"):].replace("/","") hash_vocab.save('hash_embedding_{}.vocab'.format(glove_name)) with open(path.join(igor.save_dir, "hash_embedding_{}.npy".format(glove_name)), "wb") as fp: np.save(fp, embeddings) with open(path.join(igor.save_dir, "word2hash.json".format(glove_name)), "w") as fp: json.dump(word2hash, fp)
def __init__(self, test_file, train_file, d=30): # houses embedding B. # deals with text # d = 30 to limit size of vectors # potentially add word2vec vectors self.d = d self.train_lines, self.test_lines = self.get_lines( train_file), self.get_lines(test_file) lines = np.concatenate([self.train_lines, self.test_lines], axis=0) self.vectorizer = CountVectorizer(lowercase=False) self.vectorizer.fit([ x['text'] + ' ' + x['answer'] if 'answer' in x else x['text'] for x in lines ]) self.analyze = self.vectorizer.build_analyzer() self.V = len(self.vectorizer.vocabulary_) #self.B = theano.shared((np.random.uniform(-0.1, 0.1,(self.d, self.V))).astype(np.float32)) self.B = glorot_uniform((self.d, self.V)) self.params = [self.B] self.updates = []
def __init__(self, n_words=1000, n_embedding=100, lr=0.01, margin=0.1, momentum=0.9, word_to_id=None): self.n_embedding = n_embedding self.n_lstm_embed = n_embedding self.word_embed = n_embedding self.lr = lr self.momentum = momentum self.margin = margin self.n_words = n_words self.n_D = 3 * self.n_words + 3 self.word_to_id = word_to_id self.id_to_word = dict((v, k) for k, v in word_to_id.iteritems()) # Question x = T.vector('x') phi_x = T.vector('phi_x') # True statements phi_f1_1 = T.vector('phi_f1_1') phi_f2_1 = T.vector('phi_f2_1') # False statements phi_f1_2 = T.vector('phi_f1_2') phi_f2_2 = T.vector('phi_f2_2') # Supporting memories m0 = T.vector('m0') m1 = T.vector('m1') phi_m0 = T.vector('phi_m0') phi_m1 = T.vector('phi_m1') # True word r = T.vector('r') # Word sequence words = T.ivector('words') # Scoring function self.U_O = init_shared_normal(n_embedding, self.n_D, 0.01) # Word embedding self.L = glorot_uniform((self.n_words, self.word_embed)) self.Lprime = glorot_uniform((self.n_words, self.n_lstm_embed)) # LSTM self.W_i = glorot_uniform((self.word_embed, self.n_lstm_embed)) self.U_i = orthogonal((self.n_lstm_embed, self.n_lstm_embed)) self.b_i = shared_zeros((self.n_lstm_embed)) self.W_f = glorot_uniform((self.word_embed, self.n_lstm_embed)) self.U_f = orthogonal((self.n_lstm_embed, self.n_lstm_embed)) self.b_f = shared_zeros((self.n_lstm_embed)) self.W_c = glorot_uniform((self.word_embed, self.n_lstm_embed)) self.U_c = orthogonal((self.n_lstm_embed, self.n_lstm_embed)) self.b_c = shared_zeros((self.n_lstm_embed)) self.W_o = glorot_uniform((self.word_embed, self.n_lstm_embed)) self.U_o = orthogonal((self.n_lstm_embed, self.n_lstm_embed)) self.b_o = shared_zeros((self.n_lstm_embed)) mem_cost = self.calc_cost(phi_x, phi_f1_1, phi_f1_2, phi_f2_1, phi_f2_2, phi_m0) lstm_output = self.lstm_cost(words) self.predict_function_r = theano.function(inputs=[words], outputs=lstm_output, allow_input_downcast=True) lstm_cost = -T.sum(T.mul(r, T.log(lstm_output))) cost = mem_cost + lstm_cost params = [ self.U_O, self.W_i, self.U_i, self.b_i, self.W_f, self.U_f, self.b_f, self.W_c, self.U_c, self.b_c, self.W_o, self.U_o, self.b_o, self.L, self.Lprime ] grads = T.grad(cost, params) # Parameter updates updates = self.get_updates(params, grads, method='adagrad') l_rate = T.scalar('l_rate') # Theano functions self.train_function = theano.function( inputs=[ phi_x, phi_f1_1, phi_f1_2, phi_f2_1, phi_f2_2, phi_m0, r, words, theano.Param(l_rate, default=self.lr) ], outputs=cost, updates=updates, on_unused_input='warn', allow_input_downcast=True, ) #mode='FAST_COMPILE') #mode='DebugMode') #mode=theano.compile.MonitorMode(pre_func=inspect_inputs,post_func=inspect_outputs)) # Candidate statement for prediction phi_f = T.vector('phi_f') score_o = self.calc_score_o(phi_x, phi_f) self.predict_function_o = theano.function(inputs=[phi_x, phi_f], outputs=score_o)
from keras import backend as K from keras.datasets import mnist from keras.utils.np_utils import to_categorical from keras.metrics import categorical_accuracy from keras.initializations import glorot_uniform, zero import numpy as np # inputs and targets are placeholders input_dim = 28 * 28 output_dim = 10 x = K.placeholder(name="x", shape=(None, input_dim)) ytrue = K.placeholder(name="y", shape=(None, output_dim)) # model parameters are variables hidden_dim = 128 W1 = glorot_uniform((input_dim, hidden_dim)) b1 = zero((hidden_dim, )) W2 = glorot_uniform((hidden_dim, output_dim)) b2 = zero((output_dim, )) params = [W1, b1, W2, b2] # two-layer model hidden = K.sigmoid(K.dot(x, W1) + b1) ypred = K.softmax(K.dot(hidden, W2) + b2) # categorical cross entropy loss loss = K.mean(K.categorical_crossentropy(ytrue, ypred), axis=None) # categorical accuracy accuracy = categorical_accuracy(ytrue, ypred)
def __init__(self, d, V): self.W = glorot_uniform((V, d)) self.params = [self.W]
def __init__(self, d, V): self.W = glorot_uniform((V, d)) self.params = [ self.W ]
def build(self, input_shape): input_dim = input_shape[1] # initial_weight_value = glorot_uniform((input_dim, self.n_output_channels)) # self.W = K.variable(initial_weight_value) self.W = glorot_uniform((input_dim, self.n_output_channels), name='W') self.trainable_weights = [self.W]
def __init__(self, n_words=1000, n_embedding=100, lr=0.01, margin=0.1, momentum=0.9, word_to_id=None): self.n_embedding = n_embedding self.n_lstm_embed = n_embedding self.word_embed = n_embedding self.lr = lr self.momentum = momentum self.margin = margin self.n_words = n_words self.n_D = 3 * self.n_words + 3 self.word_to_id = word_to_id self.id_to_word = dict((v, k) for k, v in word_to_id.iteritems()) # Question x = T.vector('x') phi_x = T.vector('phi_x') # True statements phi_f1_1 = T.vector('phi_f1_1') phi_f2_1 = T.vector('phi_f2_1') # False statements phi_f1_2 = T.vector('phi_f1_2') phi_f2_2 = T.vector('phi_f2_2') # Supporting memories m0 = T.vector('m0') m1 = T.vector('m1') phi_m0 = T.vector('phi_m0') phi_m1 = T.vector('phi_m1') # True word r = T.vector('r') # Word sequence words = T.ivector('words') # Scoring function self.U_O = init_shared_normal(n_embedding, self.n_D, 0.01) # Word embedding self.L = glorot_uniform((self.n_words, self.word_embed)) self.Lprime = glorot_uniform((self.n_words, self.n_lstm_embed)) # LSTM self.W_i = glorot_uniform((self.word_embed, self.n_lstm_embed)) self.U_i = orthogonal((self.n_lstm_embed, self.n_lstm_embed)) self.b_i = shared_zeros((self.n_lstm_embed)) self.W_f = glorot_uniform((self.word_embed, self.n_lstm_embed)) self.U_f = orthogonal((self.n_lstm_embed, self.n_lstm_embed)) self.b_f = shared_zeros((self.n_lstm_embed)) self.W_c = glorot_uniform((self.word_embed, self.n_lstm_embed)) self.U_c = orthogonal((self.n_lstm_embed, self.n_lstm_embed)) self.b_c = shared_zeros((self.n_lstm_embed)) self.W_o = glorot_uniform((self.word_embed, self.n_lstm_embed)) self.U_o = orthogonal((self.n_lstm_embed, self.n_lstm_embed)) self.b_o = shared_zeros((self.n_lstm_embed)) mem_cost = self.calc_cost(phi_x, phi_f1_1, phi_f1_2, phi_f2_1, phi_f2_2, phi_m0) lstm_output = self.lstm_cost(words) self.predict_function_r = theano.function(inputs = [words], outputs = lstm_output, allow_input_downcast=True) lstm_cost = -T.sum(T.mul(r, T.log(lstm_output))) cost = mem_cost + lstm_cost params = [ self.U_O, self.W_i, self.U_i, self.b_i, self.W_f, self.U_f, self.b_f, self.W_c, self.U_c, self.b_c, self.W_o, self.U_o, self.b_o, self.L, self.Lprime ] grads = T.grad(cost, params) # Parameter updates updates = self.get_updates(params, grads, method='adagrad') l_rate = T.scalar('l_rate') # Theano functions self.train_function = theano.function( inputs = [phi_x, phi_f1_1, phi_f1_2, phi_f2_1, phi_f2_2, phi_m0, r, words, theano.Param(l_rate, default=self.lr)], outputs = cost, updates = updates, on_unused_input='warn', allow_input_downcast=True, ) #mode='FAST_COMPILE') #mode='DebugMode') #mode=theano.compile.MonitorMode(pre_func=inspect_inputs,post_func=inspect_outputs)) # Candidate statement for prediction phi_f = T.vector('phi_f') score_o = self.calc_score_o(phi_x, phi_f) self.predict_function_o = theano.function(inputs = [phi_x, phi_f], outputs = score_o)