def logits_and_state(self): """Creates a block that goes from tokens to (logits, state) tuples.""" unknown_idx = len(self.vocab) def lookup_word(word): return self.vocab.get(word, unknown_idx) #(GetItem(key) >> block).eval(inp) => block.eval(inp[key]) # InputTransform(funk): A Python function, lifted to a block. # Scalar - input to scalar word2vec = (td.GetItem(0) >> td.InputTransform(lookup_word) >> td.Scalar('int32') >> self.word_embedding) # pair2vec = (self.embed_subtree(), self.embed_subtree()) # Trees are binary, so the tree layer takes two states as its # input_state. zero_state = td.Zeros((self.tree_lstm_cell.state_size, ) * 2) # Input is a word vector. zero_inp = td.Zeros(self.word_embedding.output_type.shape[0]) # AllOf(a, b, c).eval(inp) => (a.eval(inp), b.eval(inp), c.eval(inp)) word_case = td.AllOf(word2vec, zero_state) pair_case = td.AllOf(zero_inp, pair2vec) # OneOf(func, [(key, block),(key,block)])) where funk(input) => key and # OneOf returns one of blocks tree2vec = td.OneOf(len, [(1, word_case), (2, pair_case)]) return tree2vec >> self.tree_lstm_cell
def logits_and_state(): """Creates a block that goes from tokens to (logits, state) tuples.""" unknown_idx = len(word_idx) lookup_word = lambda word: word_idx.get( word) # unknown_idx is the default return value word2vec = ( td.GetItem(0) >> td.GetItem(0) >> td.InputTransform(lookup_word) >> td.Scalar('int32') >> word_embedding ) # <td.Pipe>: None -> TensorType((200,), 'float32') context2vec1 = td.GetItem(1) >> td.InputTransform( makeContextMat) >> td.Vector(10) context2vec2 = td.GetItem(1) >> td.InputTransform( makeContextMat) >> td.Vector(10) ent1posit1 = td.GetItem(2) >> td.InputTransform( makeEntPositMat) >> td.Vector(10) ent1posit2 = td.GetItem(2) >> td.InputTransform( makeEntPositMat) >> td.Vector(10) ent2posit1 = td.GetItem(3) >> td.InputTransform( makeEntPositMat) >> td.Vector(10) ent2posit2 = td.GetItem(3) >> td.InputTransform( makeEntPositMat) >> td.Vector(10) pairs2vec = td.GetItem(0) >> (embed_subtree(), embed_subtree()) # our binary Tree can have two child nodes, therefore, we assume the zero state have two child nodes. zero_state = td.Zeros((tree_lstm.state_size, ) * 2) # Input is a word vector. zero_inp = td.Zeros(word_embedding.output_type.shape[0] ) # word_embedding.output_type.shape[0] == 200 word_case = td.AllOf(word2vec, zero_state, context2vec1, ent1posit1, ent2posit1) children_case = td.AllOf(zero_inp, pairs2vec, context2vec2, ent1posit2, ent2posit2) # if leaf case, go to word case... tree2vec = td.OneOf(lambda x: 1 if len(x[0]) == 1 else 2, [(1, word_case), (2, children_case)]) # tree2vec = td.OneOf(lambda pair: len(pair[0]), [(1, word_case), (2, children_case)]) # logits and lstm states return tree2vec >> tree_lstm >> (output_layer, td.Identity())
def logits_and_state(): """Creates a block that goes from tokens to (logits, state) tuples.""" unknown_idx = len(word_idx) lookup_word = lambda word: word_idx.get(word, unknown_idx) word2vec = (td.GetItem(0) >> td.InputTransform(lookup_word) >> td.Scalar('int32') >> word_embedding) pair2vec = (embed_subtree(), embed_subtree()) # Trees are binary, so the tree layer takes two states as its input_state. zero_state = td.Zeros((tree_lstm.state_size, ) * 2) # Input is a word vector. zero_inp = td.Zeros(word_embedding.output_type.shape[0]) word_case = td.AllOf(word2vec, zero_state) pair_case = td.AllOf(zero_inp, pair2vec) tree2vec = td.OneOf(len, [(1, word_case), (2, pair_case)]) return tree2vec >> tree_lstm >> (output_layer, td.Identity())
def build_program_decoder_for_analysis(token_emb_size, rnn_cell): """ Does the same as build_program_decoder_for_analysis, but also returns the final hidden state of the decoder """ decoder_rnn = td.ScopedLayer(rnn_cell, 'decoder') decoder_rnn_output = td.RNN(decoder_rnn, initial_state_from_input=True) >> td.GetItem(0) fc_layer = td.FC(token_emb_size, activation=tf.nn.relu, initializer=tf.contrib.layers.xavier_initializer(), name='encoder_fc') # decoder_rnn_output.reads() un_normalised_token_probs = td.Map(fc_layer) return decoder_rnn_output >> td.AllOf(un_normalised_token_probs, td.Identity())
def build_program_decoder(token_emb_size, rnn_cell, just_tokens=False): """ Used for blind or 'look-behind' decoders """ decoder_rnn = td.ScopedLayer(rnn_cell, 'decoder') decoder_rnn_output = td.RNN(decoder_rnn, initial_state_from_input=True) >> td.GetItem(0) fc_layer = td.FC( token_emb_size, activation=tf.nn.relu, initializer=tf.contrib.layers.xavier_initializer(), name='encoder_fc' # this is fantastic ) # un_normalised_token_probs = decoder_rnn_output >> td.Map(fc_layer) if just_tokens: return decoder_rnn_output >> td.Map(fc_layer) else: return decoder_rnn_output >> td.AllOf(td.Map(fc_layer), td.Identity())
def _compile(self): with self.sess.as_default(): import tensorflow_fold as td output_size = len(self.labels) self.keep_prob = tf.placeholder_with_default(tf.constant(1.0),shape=None) char_emb = td.Embedding(num_buckets=self.char_buckets, num_units_out=self.embedding_size) #initializer=tf.truncated_normal_initializer(stddev=0.15)) char_cell = td.ScopedLayer(tf.contrib.rnn.LSTMCell(num_units=self.rnn_dim), 'char_cell') char_lstm = (td.InputTransform(lambda s: [ord(c) for c in s]) >> td.Map(td.Scalar('int32') >> char_emb) >> td.RNN(char_cell) >> td.GetItem(1) >> td.GetItem(1)) rnn_fwdcell = td.ScopedLayer(tf.contrib.rnn.LSTMCell(num_units=self.rnn_dim), 'lstm_fwd') fwdlayer = td.RNN(rnn_fwdcell) >> td.GetItem(0) rnn_bwdcell = td.ScopedLayer(tf.contrib.rnn.LSTMCell(num_units=self.rnn_dim), 'lstm_bwd') bwdlayer = (td.Slice(step=-1) >> td.RNN(rnn_bwdcell) >> td.GetItem(0) >> td.Slice(step=-1)) pos_emb = td.Embedding(num_buckets=300, num_units_out=32, initializer=tf.truncated_normal_initializer(stddev=0.1)) pos_x = (td.InputTransform(lambda x: x + 150) >> td.Scalar(dtype='int32') >> pos_emb) pos_y = (td.InputTransform(lambda x: x + 150) >> td.Scalar(dtype='int32') >> pos_emb) input_layer = td.Map(td.Record((char_lstm,pos_x,pos_y)) >> td.Concat()) maxlayer = (td.AllOf(fwdlayer, bwdlayer) >> td.ZipWith(td.Concat()) >> td.Max()) output_layer = (input_layer >> maxlayer >> td.FC(output_size, input_keep_prob=self.keep_prob, activation=None)) self.compiler = td.Compiler.create((output_layer, td.Vector(output_size,dtype=tf.int32))) self.y_out, self.y_true = self.compiler.output_tensors self.y_loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( logits=self.y_out,labels=self.y_true)) self.y_prob = tf.nn.softmax(self.y_out) self.y_true_idx = tf.argmax(self.y_true,axis=1) self.y_pred_idx = tf.argmax(self.y_prob,axis=1) self.y_pred = tf.one_hot(self.y_pred_idx,depth=output_size,dtype=tf.int32) epoch_step = tf.Variable(0, trainable=False) self.epoch_step_op = tf.assign(epoch_step, epoch_step+1) lrate_decay = tf.train.exponential_decay(self.lrate, epoch_step, 1, self.decay) if self.optimizer == 'adam': self.opt = tf.train.AdamOptimizer(learning_rate=lrate_decay) elif self.optimizer == 'adagrad': self.opt = tf.train.AdagradOptimizer(learning_rate=lrate_decay, initial_accumulator_value=1e-08) elif self.optimizer == 'rmsprop' or self.optimizer == 'default': self.opt = tf.train.RMSPropOptimizer(learning_rate=lrate_decay, epsilon=1e-08) else: raise Exception(('The optimizer {} is not in list of available ' + 'optimizers: default, adam, adagrad, rmsprop.') .format(self.optimizer)) # apply learning multiplier on on embedding learning rate embeds = [pos_emb.weights, char_emb.weights] grads_and_vars = self.opt.compute_gradients(self.y_loss) found = 0 for i, (grad, var) in enumerate(grads_and_vars): if var in embeds: found += 1 grad = tf.scalar_mul(self.embedding_factor, grad) grads_and_vars[i] = (grad, var) assert found == len(embeds) # internal consistency check self.train_step = self.opt.apply_gradients(grads_and_vars) self.sess.run(tf.global_variables_initializer()) self.saver = tf.train.Saver(max_to_keep=100)
def _compile(self): with self.sess.as_default(): import tensorflow_fold as td output_size = len(self.labels) self.keep_prob = tf.placeholder_with_default(tf.constant(1.0),shape=None) fshape = (self.window_size * (self.char_embedding_size + self.char_feature_embedding_size), self.num_filters) filt_w3 = tf.Variable(tf.random_normal(fshape, stddev=0.05)) def CNN_Window3(filters): return td.Function(lambda a, b, c: cnn_operation([a,b,c],filters)) def cnn_operation(window_sequences,filters): windows = tf.concat(window_sequences,axis=-1) products = tf.multiply(tf.expand_dims(windows,axis=-1),filters) return tf.reduce_sum(products,axis=-2) char_emb = td.Embedding(num_buckets=self.char_buckets, num_units_out=self.char_embedding_size) cnn_layer = (td.NGrams(self.window_size) >> td.Map(CNN_Window3(filt_w3)) >> td.Max()) # --------- char features def charfeature_lookup(c): if c in string.lowercase: return 0 elif c in string.uppercase: return 1 elif c in string.punctuation: return 2 else: return 3 char_input = td.Map(td.InputTransform(lambda c: ord(c.lower())) >> td.Scalar('int32') >> char_emb) char_features = td.Map(td.InputTransform(charfeature_lookup) >> td.Scalar(dtype='int32') >> td.Embedding(num_buckets=4, num_units_out=self.char_feature_embedding_size)) charlevel = (td.InputTransform(lambda s: ['~'] + [ c for c in s ] + ['~']) >> td.AllOf(char_input,char_features) >> td.ZipWith(td.Concat()) >> cnn_layer) # --------- word features word_emb = td.Embedding(num_buckets=len(self.word_vocab), num_units_out=self.embedding_size, initializer=self.word_embeddings) wordlookup = lambda w: (self.word_vocab.index(w.lower()) if w.lower() in self.word_vocab else 0) wordinput = (td.InputTransform(wordlookup) >> td.Scalar(dtype='int32') >> word_emb) def wordfeature_lookup(w): if re.match('^[a-z]+$',w): return 0 elif re.match('^[A-Z][a-z]+$',w): return 1 elif re.match('^[A-Z]+$',w): return 2 elif re.match('^[A-Za-z]+$',w): return 3 else: return 4 wordfeature = (td.InputTransform(wordfeature_lookup) >> td.Scalar(dtype='int32') >> td.Embedding(num_buckets=5, num_units_out=32)) #----------- rnn_fwdcell = td.ScopedLayer(tf.contrib.rnn.LSTMCell( num_units=self.rnn_dim), 'lstm_fwd') fwdlayer = td.RNN(rnn_fwdcell) >> td.GetItem(0) rnn_bwdcell = td.ScopedLayer(tf.contrib.rnn.LSTMCell( num_units=self.rnn_dim), 'lstm_bwd') bwdlayer = (td.Slice(step=-1) >> td.RNN(rnn_bwdcell) >> td.GetItem(0) >> td.Slice(step=-1)) rnn_layer = td.AllOf(fwdlayer, bwdlayer) >> td.ZipWith(td.Concat()) output_layer = td.FC(output_size, input_keep_prob=self.keep_prob, activation=None) wordlevel = td.AllOf(wordinput,wordfeature) >> td.Concat() network = (td.Map(td.AllOf(wordlevel,charlevel) >> td.Concat()) >> rnn_layer >> td.Map(output_layer) >> td.Map(td.Metric('y_out'))) >> td.Void() groundlabels = td.Map(td.Vector(output_size,dtype=tf.int32) >> td.Metric('y_true')) >> td.Void() self.compiler = td.Compiler.create((network, groundlabels)) self.y_out = self.compiler.metric_tensors['y_out'] self.y_true = self.compiler.metric_tensors['y_true'] self.y_loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( logits=self.y_out,labels=self.y_true)) self.y_prob = tf.nn.softmax(self.y_out) self.y_true_idx = tf.argmax(self.y_true,axis=-1) self.y_pred_idx = tf.argmax(self.y_prob,axis=-1) self.y_pred = tf.one_hot(self.y_pred_idx,depth=output_size,dtype=tf.int32) epoch_step = tf.Variable(0, trainable=False) self.epoch_step_op = tf.assign(epoch_step, epoch_step+1) lrate_decay = tf.train.exponential_decay(self.lrate, epoch_step, 1, self.decay) if self.optimizer == 'adam': self.opt = tf.train.AdamOptimizer(learning_rate=lrate_decay) elif self.optimizer == 'adagrad': self.opt = tf.train.AdagradOptimizer(learning_rate=lrate_decay, initial_accumulator_value=1e-08) elif self.optimizer == 'rmsprop': self.opt = tf.train.RMSPropOptimizer(learning_rate=lrate_decay, epsilon=1e-08) else: raise Exception(('The optimizer {} is not in list of available ' + 'optimizers: default, adam, adagrad, rmsprop.') .format(self.optimizer)) # apply learning multiplier on on embedding learning rate embeds = [word_emb.weights] grads_and_vars = self.opt.compute_gradients(self.y_loss) found = 0 for i, (grad, var) in enumerate(grads_and_vars): if var in embeds: found += 1 grad = tf.scalar_mul(self.embedding_factor, grad) grads_and_vars[i] = (grad, var) assert found == len(embeds) # internal consistency check self.train_step = self.opt.apply_gradients(grads_and_vars) self.sess.run(tf.global_variables_initializer()) self.saver = tf.train.Saver(max_to_keep=100)