def build_train_graph_for_RVAE(rvae_block, look_behind_length=0): token_emb_size = get_size_of_input_vecotrs(rvae_block) c = td.Composition() with c.scope(): padded_input_sequence = td.Map(td.Vector(token_emb_size)).reads( c.input) network_output = rvae_block network_output.reads(padded_input_sequence) un_normalised_token_probs = td.GetItem(0).reads(network_output) mus_and_log_sigs = td.GetItem(1).reads(network_output) input_sequence = td.Slice( start=look_behind_length).reads(padded_input_sequence) # TODO: metric that output of rnn is the same as input sequence cross_entropy_loss = td.ZipWith( td.Function(softmax_crossentropy)) >> td.Mean() cross_entropy_loss.reads(un_normalised_token_probs, input_sequence) kl_loss = td.Function(kl_divergence) kl_loss.reads(mus_and_log_sigs) td.Metric('cross_entropy_loss').reads(cross_entropy_loss) td.Metric('kl_loss').reads(kl_loss) c.output.reads(td.Void()) return c
def add_metrics(is_root): c = td.Composition(name='predict(is_root=%s)' % (is_root)) with c.scope(): labels = c.input[0] logits = td.GetItem(0).reads(c.input[1]) state = td.GetItem(1).reads(c.input[1]) loss = td.Function(tf_node_loss) td.Metric('all_loss').reads(loss.reads(logits, labels)) if is_root: td.Metric('root_loss').reads(loss) result_logits = td.Function(tf_logits) td.Metric('all_logits').reads(result_logits.reads(logits)) if is_root: td.Metric('root_logits').reads(result_logits) # reserve pred and labels pred = td.Function(tf_pred) td.Metric('all_pred').reads(pred.reads(logits)) if is_root: td.Metric('root_pred').reads(pred) answer = td.Function(tf_label) td.Metric('all_labels').reads(answer.reads(labels)) if is_root: td.Metric('root_label').reads(answer) c.output.reads(state) return c
def set_metrics(self, train=True): """A block that adds metrics for loss and hits; output is the LSTM state.""" c = td.Composition( name='predict') with c.scope(): # destructure the input; (labels, logits) labels = c.input[0] logits = c.input[1] # calculate loss loss = td.Function(self.tf_node_loss) td.Metric('root_loss').reads(loss.reads(logits, labels)) hits = td.Function(self.tf_fine_grained_hits) td.Metric('root_hits').reads(hits.reads(logits, labels)) c.output.reads(logits) return c
def build_token_level_RVAE(z_size, token_emb_size, look_behind_length): c = td.Composition() c.set_input_type( td.SequenceType(td.TensorType(([token_emb_size]), 'float32'))) with c.scope(): padded_input_sequence = c.input # build encoder block encoder_rnn_cell = build_program_encoder(default_gru_cell(2 * z_size)) output_sequence = td.RNN(encoder_rnn_cell) >> td.GetItem(0) mus_and_log_sigs = output_sequence >> td.GetItem(-1) reparam_z = resampling_block(z_size) if look_behind_length > 0: decoder_input_sequence = ( td.Slice(stop=-1) >> td.NGrams(look_behind_length) >> td.Map( td.Concat())) else: decoder_input_sequence = td.Map( td.Void() >> td.FromTensor(tf.zeros((0, )))) # build decoder block un_normalised_token_probs = build_program_decoder( token_emb_size, default_gru_cell(z_size), just_tokens=True) # remove padding for input sequence input_sequence = td.Slice(start=look_behind_length) input_sequence.reads(padded_input_sequence) mus_and_log_sigs.reads(input_sequence) reparam_z.reads(mus_and_log_sigs) decoder_input_sequence.reads(padded_input_sequence) td.Metric('encoder_sequence_length').reads( td.Length().reads(input_sequence)) td.Metric('decoder_sequence_length').reads( td.Length().reads(decoder_input_sequence)) un_normalised_token_probs.reads(decoder_input_sequence, reparam_z) c.output.reads(un_normalised_token_probs, mus_and_log_sigs) return c
def bidirectional_dynamic_FC(fw_cell, bw_cell, hidden): bidir_conv_lstm = td.Composition() with bidir_conv_lstm.scope(): fw_seq = td.Identity().reads(bidir_conv_lstm.input[0]) labels = ( td.GetItem(1) >> td.Map(td.Metric("labels")) >> td.Void()).reads( bidir_conv_lstm.input) bw_seq = td.Slice(step=-1).reads(fw_seq) forward_dir = (td.RNN(fw_cell) >> td.GetItem(0)).reads(fw_seq) back_dir = (td.RNN(bw_cell) >> td.GetItem(0)).reads(bw_seq) back_to_leftright = td.Slice(step=-1).reads(back_dir) output_transform = td.FC(1, activation=None) bidir_common = (td.ZipWith( td.Concat() >> output_transform >> td.Metric('logits'))).reads( forward_dir, back_to_leftright) bidir_conv_lstm.output.reads(bidir_common) return bidir_conv_lstm
def add_metrics(is_root, is_neutral): """A block that adds metrics for loss and hits; output is the LSTM state.""" c = td.Composition(name='predict(is_root=%s, is_neutral=%s)' % (is_root, is_neutral)) with c.scope(): # destructure the input; (labels, (logits, state)) labels = c.input[0] logits = td.GetItem(0).reads(c.input[1]) state = td.GetItem(1).reads(c.input[1]) # calculate loss loss = td.Function(tf_node_loss) td.Metric('all_loss').reads(loss.reads(logits, labels)) if is_root: td.Metric('root_loss').reads(loss) # calculate fine-grained hits hits = td.Function(tf_fine_grained_hits) td.Metric('all_hits').reads(hits.reads(logits, labels)) if is_root: td.Metric('root_hits').reads(hits) # calculate binary hits, if the label is not neutral if not is_neutral: binary_hits = td.Function(tf_binary_hits).reads(logits, labels) td.Metric('all_binary_hits').reads(binary_hits) if is_root: td.Metric('root_binary_hits').reads(binary_hits) # output the state, which will be read by our by parent's LSTM cell c.output.reads(state) return c
def _compile(self): with self.sess.as_default(): import tensorflow_fold as td output_size = len(self.labels) self.keep_prob = tf.placeholder_with_default(tf.constant(1.0),shape=None) fshape = (self.window_size * (self.char_embedding_size + self.char_feature_embedding_size), self.num_filters) filt_w3 = tf.Variable(tf.random_normal(fshape, stddev=0.05)) def CNN_Window3(filters): return td.Function(lambda a, b, c: cnn_operation([a,b,c],filters)) def cnn_operation(window_sequences,filters): windows = tf.concat(window_sequences,axis=-1) products = tf.multiply(tf.expand_dims(windows,axis=-1),filters) return tf.reduce_sum(products,axis=-2) char_emb = td.Embedding(num_buckets=self.char_buckets, num_units_out=self.char_embedding_size) cnn_layer = (td.NGrams(self.window_size) >> td.Map(CNN_Window3(filt_w3)) >> td.Max()) # --------- char features def charfeature_lookup(c): if c in string.lowercase: return 0 elif c in string.uppercase: return 1 elif c in string.punctuation: return 2 else: return 3 char_input = td.Map(td.InputTransform(lambda c: ord(c.lower())) >> td.Scalar('int32') >> char_emb) char_features = td.Map(td.InputTransform(charfeature_lookup) >> td.Scalar(dtype='int32') >> td.Embedding(num_buckets=4, num_units_out=self.char_feature_embedding_size)) charlevel = (td.InputTransform(lambda s: ['~'] + [ c for c in s ] + ['~']) >> td.AllOf(char_input,char_features) >> td.ZipWith(td.Concat()) >> cnn_layer) # --------- word features word_emb = td.Embedding(num_buckets=len(self.word_vocab), num_units_out=self.embedding_size, initializer=self.word_embeddings) wordlookup = lambda w: (self.word_vocab.index(w.lower()) if w.lower() in self.word_vocab else 0) wordinput = (td.InputTransform(wordlookup) >> td.Scalar(dtype='int32') >> word_emb) def wordfeature_lookup(w): if re.match('^[a-z]+$',w): return 0 elif re.match('^[A-Z][a-z]+$',w): return 1 elif re.match('^[A-Z]+$',w): return 2 elif re.match('^[A-Za-z]+$',w): return 3 else: return 4 wordfeature = (td.InputTransform(wordfeature_lookup) >> td.Scalar(dtype='int32') >> td.Embedding(num_buckets=5, num_units_out=32)) #----------- rnn_fwdcell = td.ScopedLayer(tf.contrib.rnn.LSTMCell( num_units=self.rnn_dim), 'lstm_fwd') fwdlayer = td.RNN(rnn_fwdcell) >> td.GetItem(0) rnn_bwdcell = td.ScopedLayer(tf.contrib.rnn.LSTMCell( num_units=self.rnn_dim), 'lstm_bwd') bwdlayer = (td.Slice(step=-1) >> td.RNN(rnn_bwdcell) >> td.GetItem(0) >> td.Slice(step=-1)) rnn_layer = td.AllOf(fwdlayer, bwdlayer) >> td.ZipWith(td.Concat()) output_layer = td.FC(output_size, input_keep_prob=self.keep_prob, activation=None) wordlevel = td.AllOf(wordinput,wordfeature) >> td.Concat() network = (td.Map(td.AllOf(wordlevel,charlevel) >> td.Concat()) >> rnn_layer >> td.Map(output_layer) >> td.Map(td.Metric('y_out'))) >> td.Void() groundlabels = td.Map(td.Vector(output_size,dtype=tf.int32) >> td.Metric('y_true')) >> td.Void() self.compiler = td.Compiler.create((network, groundlabels)) self.y_out = self.compiler.metric_tensors['y_out'] self.y_true = self.compiler.metric_tensors['y_true'] self.y_loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( logits=self.y_out,labels=self.y_true)) self.y_prob = tf.nn.softmax(self.y_out) self.y_true_idx = tf.argmax(self.y_true,axis=-1) self.y_pred_idx = tf.argmax(self.y_prob,axis=-1) self.y_pred = tf.one_hot(self.y_pred_idx,depth=output_size,dtype=tf.int32) epoch_step = tf.Variable(0, trainable=False) self.epoch_step_op = tf.assign(epoch_step, epoch_step+1) lrate_decay = tf.train.exponential_decay(self.lrate, epoch_step, 1, self.decay) if self.optimizer == 'adam': self.opt = tf.train.AdamOptimizer(learning_rate=lrate_decay) elif self.optimizer == 'adagrad': self.opt = tf.train.AdagradOptimizer(learning_rate=lrate_decay, initial_accumulator_value=1e-08) elif self.optimizer == 'rmsprop': self.opt = tf.train.RMSPropOptimizer(learning_rate=lrate_decay, epsilon=1e-08) else: raise Exception(('The optimizer {} is not in list of available ' + 'optimizers: default, adam, adagrad, rmsprop.') .format(self.optimizer)) # apply learning multiplier on on embedding learning rate embeds = [word_emb.weights] grads_and_vars = self.opt.compute_gradients(self.y_loss) found = 0 for i, (grad, var) in enumerate(grads_and_vars): if var in embeds: found += 1 grad = tf.scalar_mul(self.embedding_factor, grad) grads_and_vars[i] = (grad, var) assert found == len(embeds) # internal consistency check self.train_step = self.opt.apply_gradients(grads_and_vars) self.sess.run(tf.global_variables_initializer()) self.saver = tf.train.Saver(max_to_keep=100)
network_output = build_VAE(Z_SIZE, 54) network_output.reads(input_sequence) un_normalised_token_probs = td.GetItem(0).reads(network_output) mus_and_log_sigs = td.GetItem(1).reads(network_output) cross_entropy_loss = td.ZipWith(td.Function(softmax_crossentropy)) >> td.Mean() cross_entropy_loss.reads( un_normalised_token_probs, input_sequence ) kl_loss = td.Function(kl_divergence) kl_loss.reads(mus_and_log_sigs) td.Metric('cross_entropy_loss').reads(cross_entropy_loss) td.Metric('kl_loss').reads(kl_loss) c.output.reads(td.Void()) # Tokenised version of my code example_input = np.array([ 1, 2, 51, 16, 4, 17, 52, 3, 53, 16, 5, 38, 6, 37, 6, 37, 6, 37, 6, 38, 6, 37, 6, 37, 6, 38, 53, 16, 8, 9, 10, 11, 12, 13, 14, 7, 51, 17, 11, 48, 11, 8, 52, 53, 17, 5, 37, 6, 38, 6, 37, 6, 38, 6, 38, 53, 17, 8, 9, 10, 11, 7, 51, 9, 26, 51, 20, 9, 9, 52, 52, 0 ])