def buildModel(self, x, y_context, y_task, is_train, dropout, scope="multiTask"): # Assume the input shape is (batch_size, max_length, feature_length) #TASK = primary task, CONTEXT = secondary task # Create lstm cell for the shared layer body_lstm_cell, _ = createLSTMCell(self.batch_size, self.body_lstm_size, self.body_n_layer, forget_bias=0.0) context_cost = tf.constant(0) task_cost = tf.constant(0.0, dtype=tf.float32) if not self.is_multi_task: context_output = tf.constant(0) with tf.variable_scope("shared_lstm"): body_cell_output, last_body_state = tf.nn.dynamic_rnn(cell = body_lstm_cell, dtype=tf.float32, sequence_length=self.length(x), inputs=x) if self.is_multi_task: with tf.variable_scope("context_branch"): # Select the last output that is not generated by zero vectors if self.secondary_task == "missing word": last_body_output = self.last_relevant(body_cell_output, self.length(body_cell_output)) # feed the last output to the fc layer and make prediction with tf.variable_scope("context_fc"): context_fc_out = fcLayer(x=last_body_output, in_shape=self.body_lstm_size, out_shape=self.context_branch_fc, activation=self.fc_activation, dropout=dropout, is_train=is_train, scope="fc1") with tf.variable_scope("context_pred"): context_output, context_logits = predictionLayer(x=context_fc_out, y=y_context, in_shape=self.context_branch_fc, out_shape=y_context.get_shape()[-1].value, activation=self.context_output_activation) context_cost = compute_cost(logit=context_logits, y=y_context, out_type="last_only", max_length=self.max_length, batch_size=self.batch_size, embed_dim=self.feature_length, activation=self.context_output_activation) if self.secondary_task == "word generation": context_input = tf.transpose(body_cell_output, [1, 0, 2]) context_input = tf.reshape(context_input, [-1, self.body_lstm_size]) context_input_list = tf.split(context_input, self.max_length, 0) fc_output_list = [] with tf.variable_scope("context_fc"): for step in range(self.max_length): if step > 0: tf.get_variable_scope().reuse_variables() fc_out = fcLayer(x=context_input_list[step], in_shape=self.body_lstm_size, out_shape=self.context_branch_fc, activation=self.fc_activation, dropout=dropout, is_train=is_train, scope="fc1") fc_output_list.append(tf.expand_dims(fc_out, axis=1)) context_fc_out = tf.concat(fc_output_list, axis=1) with tf.variable_scope("context_pred"): context_output, context_logits = predictionLayer(x=context_fc_out, y=y_context, in_shape=self.context_branch_fc, out_shape=y_context.get_shape()[-1].value, activation=self.context_output_activation) context_cost = compute_cost(logit=context_logits, y=y_context, out_type="sequential", max_length=self.max_length, batch_size=self.batch_size, embed_dim=self.feature_length,activation=self.context_output_activation) print "Context cost shape: ", context_cost.get_shape() with tf.variable_scope("task_branch"): with tf.variable_scope("task_fc"): # Select the last output that is not generated by zero vectors last_body_output = self.last_relevant(body_cell_output, self.length(body_cell_output)) # feed the last output to the fc layer and make prediction task_fc_out = fcLayer(x=last_body_output, in_shape=self.body_lstm_size, out_shape=self.task_branch_fc, activation=self.fc_activation, dropout=dropout, is_train=is_train, scope="fc2") task_output, task_logits = predictionLayer(x=task_fc_out, y=y_task, in_shape=self.task_branch_fc, out_shape=y_task.get_shape()[-1].value, activation=self.task_output_activation) print "Task output shape: ", task_output.get_shape() task_cost = compute_cost(logit=task_logits, y=y_task, out_type="last_only", max_length=self.max_length, batch_size=self.batch_size, embed_dim=self.n_classes,activation=self.task_output_activation) return context_cost, task_cost, task_output, context_output
def buildModel(x, y_context, y_task, is_train, dropout, scope="multiTask"): # Assume the input shape is (batch_size, n_steps, feature_length) #TASK = primary task, CONTEXT = secondary task # Permuting batch_size and n_steps x = tf.transpose(x, [1, 0, 2]) print x.get_shape() # Reshaping to (n_steps*batch_size, feature_length) x = tf.reshape(x, [-1, feature_length]) # Split to get a list of "n_steps" tensors of shape (batch_size, feature_length) print x.get_shape() x = tf.split(x, n_steps, 0) print x # Create lstm cell for the shared layer body_lstm_cell, body_state = createLSTMCell(batch_size, body_lstm_size, body_n_layer, forget_bias=0.0) # Create lstm cell for branch 1 context_lstm_cell, context_state = createLSTMCell(batch_size, context_lstm_size, context_n_layer, forget_bias=0.0) # Create lstm cells for branch 2 task_lstm_cell, task_state = createLSTMCell(batch_size, task_lstm_size, task_n_layer, forget_bias=0.0) #combined_cost = tf.constant(0) context_cost = tf.constant(0) task_cost = tf.constant(0) for time_step in range(n_steps): with tf.variable_scope("shared_lstm"): if time_step > 0: tf.get_variable_scope().reuse_variables() (body_cell_output, body_state) = body_lstm_cell(x[time_step], body_state) with tf.variable_scope("context_branch"): if time_step > 0: tf.get_variable_scope().reuse_variables() (context_cell_output, context_state) = context_lstm_cell(body_cell_output, context_state) with tf.variable_scope("context_fc"): if time_step == n_steps - 1: context_fc_out = fcLayer(x=context_cell_output, in_shape=context_lstm_size, out_shape=context_branch_fc, activation=fc_activation, dropout=dropout, is_train=is_train, scope="fc1") context_cost, context_output = predictionLayer(x=context_fc_out, y=y_context, in_shape=context_branch_fc, out_shape=y_context.get_shape()[-1].value, activation=output_activation) with tf.variable_scope("task_branch"): if time_step > 0: tf.get_variable_scope().reuse_variables() (task_cell_output, task_state) = task_lstm_cell(body_cell_output, task_state) with tf.variable_scope("task_fc"): if time_step == n_steps - 1: task_fc_out = fcLayer(x=task_cell_output, in_shape=task_lstm_size, out_shape=task_branch_fc, activation=fc_activation, dropout=dropout, is_train=is_train, scope="fc2") task_cost, task_output = predictionLayer(x=task_fc_out, y=y_task, in_shape=context_branch_fc, out_shape=y_task.get_shape()[-1].value, activation=output_activation) return context_cost, task_cost, task_output, context_output
def buildModel(self, x, y_context, y_task, is_train, dropout, scope="multiTask"): # Assume the input shape is (batch_size, max_length, feature_length) #TASK = primary task, CONTEXT = secondary task # Create lstm cell for the shared layer body_lstm_cell, _ = createLSTMCell(self.batch_size, self.body_lstm_size, self.body_n_layer, forget_bias=0.0) # Create lstm cell for branch 1 context_lstm_cell, _ = createLSTMCell(self.batch_size, self.context_lstm_size, self.context_n_layer, forget_bias=0.0) # Create lstm cells for branch 2 task_lstm_cell, _ = createLSTMCell(self.batch_size, self.task_lstm_size, self.task_n_layer, forget_bias=0.0) context_cost = tf.constant(0) task_cost = tf.constant(0) with tf.variable_scope("shared_lstm"): body_cell_output, last_body_state = tf.nn.dynamic_rnn(cell = body_lstm_cell, dtype=tf.float32, sequence_length=self.length(x), inputs=x) with tf.variable_scope("context_branch"): context_cell_output, last_context_state = tf.nn.dynamic_rnn(cell = context_lstm_cell, dtype=tf.float32, sequence_length=self.length(body_cell_output), inputs=body_cell_output) # The output from LSTMs will be (batch_size, max_length, out_size) with tf.variable_scope("context_fc"): # Select the last output that is not generated by zero vectors last_context_output = self.last_relevant(context_cell_output, self.length(context_cell_output)) # feed the last output to the fc layer and make prediction context_fc_out = fcLayer(x=last_context_output, in_shape=self.context_lstm_size, out_shape=self.context_branch_fc, activation=self.fc_activation, dropout=self.dropout, is_train=is_train, scope="fc1") context_cost, context_output = predictionLayer(x=context_fc_out, y=y_context, in_shape=self.context_branch_fc, out_shape=y_context.get_shape()[-1].value, activation=self.output_activation) with tf.variable_scope("task_branch"): task_cell_output, last_task_state = tf.nn.dynamic_rnn(cell = task_lstm_cell, dtype=tf.float32, sequence_length=self.length(body_cell_output), inputs=body_cell_output) with tf.variable_scope("task_fc"): # Select the last output that is not generated by zero vectors last_task_output = self.last_relevant(task_cell_output, self.length(task_cell_output)) # feed the last output to the fc layer and make prediction task_fc_out = fcLayer(x=last_task_output, in_shape=self.task_lstm_size, out_shape=self.task_branch_fc, activation=self.fc_activation, dropout=self.dropout, is_train=is_train, scope="fc2") task_cost, task_output = predictionLayer(x=task_fc_out, y=y_task, in_shape=self.context_branch_fc, out_shape=y_task.get_shape()[-1].value, activation=self.output_activation) return context_cost, task_cost, task_output, context_output
def buildModel(x, y_context, y_task, is_train, scope="multiTask"): # Assume the input shape is (batch_size, n_steps, feature_length) #TASK = primary task, CONTEXT = secondary task # Permuting batch_size and n_steps x = tf.transpose(x, [1, 0, 2]) # Reshaping to (n_steps*batch_size, feature_length) x = tf.reshape(x, [-1, feature_length]) # Split to get a list of "n_steps" tensors of shape (batch_size, feature_length) x = tf.split(x, n_steps, 0) # Create lstm cell for the shared layer body_lstm_cell, body_state = createLSTMCell(batch_size, body_lstm_size, body_n_layer, forget_bias=0.0) # Create lstm cell for branch 1 context_lstm_cell, context_state = createLSTMCell(batch_size, context_lstm_size, context_n_layer, forget_bias=0.0) # Create lstm cells for branch 2 task_lstm_cell, task_state = createLSTMCell(batch_size, task_lstm_size, task_n_layer, forget_bias=0.0) #combined_cost = tf.constant(0) context_cost = tf.constant(0) task_cost = tf.constant(0) #IMPLEMENTATION NOTES #No idea how code compiles in mcrnn... indentation makes 0 sense. #cant get context_output to next layer so have to use separate for loops... #need to cast as float32. #we should try both top and bottom outputs for our targets. for time_step in range(n_steps): # first, we construct the context lstm print time_step with tf.variable_scope("context_branch"): if time_step > 0: tf.get_variable_scope().reuse_variables() (context_cell_output, context_state) = context_lstm_cell(x[time_step], context_state) with tf.variable_scope("context_fc"): if time_step > 0: tf.get_variable_scope().reuse_variables() context_fc_out = fcLayer(x=context_cell_output, in_shape=context_lstm_size, out_shape=context_branch_fc, activation=fc_activation, dropout=dropout, is_train=is_train, scope="fc1") context_cost, context_output = predictionLayer( x=context_fc_out, y=y_context, in_shape=context_branch_fc, out_shape=y_context.get_shape()[-1].value, activation=output_activation) # then make the body where the input is the concatenation of both text and context_output with tf.variable_scope("body_lstm"): if time_step > 0: tf.get_variable_scope().reuse_variables() #body_input = tf.concat([x[time_step],context_cell_output], 1) (body_cell_output, body_state) = body_lstm_cell(x[time_step], body_state) # finally make the output task cell. with tf.variable_scope("task_branch"): if time_step > 0: tf.get_variable_scope().reuse_variables() task_input = tf.concat([body_cell_output, context_cell_output], 1) (task_cell_output, task_state) = task_lstm_cell(task_input, task_state) with tf.variable_scope("task_fc"): if time_step == n_steps - 1: task_fc_out = fcLayer(x=task_cell_output, in_shape=task_lstm_size, out_shape=task_branch_fc, activation=fc_activation, dropout=dropout, is_train=is_train, scope="fc2") task_cost, task_output = predictionLayer( x=task_fc_out, y=y_task, in_shape=context_branch_fc, out_shape=y_task.get_shape()[-1].value, activation=output_activation) return context_cost, task_cost, task_output, context_output
def buildModel(x, y_context, y_task, is_train, scope="multiTask"): # Assume the input shape is (batch_size, n_steps, feature_length) # Permuting batch_size and n_steps x = tf.transpose(x, [1, 0, 2]) print x.get_shape() # Reshaping to (n_steps*batch_size, feature_length) x = tf.reshape(x, [-1, feature_length]) # Split to get a list of "n_steps" tensors of shape (batch_size, feature_length) print x.get_shape() x = tf.split(x, n_steps, 0) # Create lstm cell for the shared layer lstm_cell_0, state_0 = createLSTMCell(batch_size, lstm_size_0, n_layer_0, forget_bias=0.0) # Create lstm cell for branch 1 lstm_cell_1, state_1 = createLSTMCell(batch_size, lstm_size_1, n_layer_1, forget_bias=0.0) # Create lstm cells for branch 2 task_lstm_cell, task_state = createLSTMCell(batch_size, lstm_size_2, n_layer_2, forget_bias=0.0) combined_cost = tf.constant(0) cost1 = tf.constant(0) cost2 = tf.constant(0) for time_step in range(n_steps): with tf.variable_scope("SharedLSTM"): if time_step > 0: tf.get_variable_scope().reuse_variables() shared_lstm_bn = tf.contrib.layers.batch_norm(x[time_step], center=True, scale=True, is_training=True, scope='bn1') (cell_output_0, state_0) = lstm_cell_0(shared_lstm_bn, state_0) with tf.variable_scope("task_branch"): if time_step > 0: tf.get_variable_scope().reuse_variables() task_lstm_bn = tf.contrib.layers.batch_norm(cell_output_0, center=True, scale=True, is_training=True, scope='bn2') (task_cell_output, task_state) = task_lstm_cell(task_lstm_bn, task_state) with tf.variable_scope("Branch_task_fc"): if time_step == n_steps - 1: task_fc_bn = tf.contrib.layers.batch_norm(task_cell_output, center=True, scale=True, is_training=True, scope='bn3') fc_out2 = fcLayer(x=task_fc_bn, in_shape=lstm_size_2, out_shape=branch2_fc, activation=fc_activation, dropout=dropout, is_train=is_train, scope="fc2") task_pred_bn = tf.contrib.layers.batch_norm(fc_out2, center=True, scale=True, is_training=True, scope='bn4') cost2, output2 = predictionLayer( x=task_pred_bn, y=y_task, in_shape=branch1_fc, out_shape=y_task.get_shape()[-1].value, activation=output_activation) return cost2, output2
def buildModel(self, x, y_context, y_task, is_train, dropout, scope="multiTask"): context_cost = tf.constant(0) task_cost = tf.constant(0.0, dtype=tf.float32) # Assume the input shape is (batch_size, max_length, feature_length) # TASK = primary task, CONTEXT = secondary task # Create the forward cell for all LSTMs body_lstm_cell, _ = createLSTMCell(self.batch_size, self.body_lstm_size, self.body_n_layer, forget_bias=0.0) context_lstm_cell, _ = createLSTMCell(self.batch_size, self.context_lstm_size, self.context_n_layer, forget_bias=0.0) task_lstm_cell, _ = createLSTMCell(self.batch_size, self.task_lstm_size, self.task_n_layer, forget_bias=0.0) if self.is_bidirectional: # Create the backward cell for LSTM body_lstm_cell_bw, _ = createLSTMCell(self.batch_size, self.body_lstm_size, self.body_n_layer, forget_bias=0.0) context_lstm_cell_bw, _ = createLSTMCell(self.batch_size, self.context_lstm_size, self.context_n_layer, forget_bias=0.0) task_lstm_cell_bw, _ = createLSTMCell(self.batch_size, self.task_lstm_size, self.task_n_layer, forget_bias=0.0) if not self.is_multi_task: context_output = tf.constant(0) with tf.variable_scope("shared_lstm"): body_cell_output, last_body_state = self.rnn_layer( fw_cell=body_lstm_cell, bw_cell=body_lstm_cell_bw, rnn_inputs=x, is_bidirectional=self.is_bidirectional) if self.is_multi_task: with tf.variable_scope("context_branch"): # The output from bidirectional LSTM is a list = [fw_output, bw_output], each of size (batch_size, max_length, out_size) if self.is_bidirectional: # Concatenate the input of both directions along the feature dimension axis 2(300 -> 600) context_input = tf.concat(body_cell_output, axis=2) else: context_input = body_cell_output context_cell_output, last_context_state = self.rnn_layer( fw_cell=context_lstm_cell, bw_cell=None, rnn_inputs=context_input) # The output from LSTMs will be (batch_size, max_length, out_size) # Select the last output that is not generated by zero vectors if self.secondary_task == "missing word": last_context_output = self.last_relevant( context_cell_output, self.length(context_cell_output)) # feed the last output to the fc layer and make prediction with tf.variable_scope("context_fc"): context_fc_out = fcLayer( x=last_context_output, in_shape=self.contxt_fc_input_size, out_shape=self.context_branch_fc, activation=self.fc_activation, dropout=self.dropout, is_train=is_train, scope="fc1") with tf.variable_scope("context_pred"): context_output, context_logits = predictionLayer( x=context_fc_out, y=y_context, in_shape=self.context_branch_fc, out_shape=y_context.get_shape()[-1].value, activation=self.context_output_activation) context_cost = compute_cost( logit=context_logits, y=y_context, out_type="last_only", max_length=self.max_length, batch_size=self.batch_size, embed_dim=self.feature_length, activation=self.context_output_activation) if self.secondary_task == "word generation": context_cell_output = tf.transpose(context_cell_output, [1, 0, 2]) context_cell_output = tf.reshape( context_cell_output, [-1, self.context_lstm_size]) context_output_list = tf.split(context_cell_output, self.max_length, 0) fc_output_list = [] with tf.variable_scope("context_fc"): for step in range(self.max_length): if step > 0: tf.get_variable_scope().reuse_variables() fc_out = fcLayer( x=context_output_list[step], in_shape=self.context_fc_input_size, out_shape=self.context_branch_fc, activation=self.fc_activation, dropout=self.dropout, is_train=is_train, scope="fc1") fc_output_list.append( tf.expand_dims(fc_out, axis=1)) context_fc_out = tf.concat(fc_output_list, axis=1) print "context fc output shape before transpose: ", context_fc_out.get_shape( ) with tf.variable_scope("context_pred"): context_output, context_logits = predictionLayer( x=context_fc_out, y=y_context, in_shape=self.context_branch_fc, out_shape=y_context.get_shape()[-1].value, activation=self.context_output_activation) print "Context prediction output shape: ", context_output.get_shape( ) context_cost = compute_cost( logit=context_logits, y=y_context, out_type="sequential", max_length=self.max_length, batch_size=self.batch_size, embed_dim=self.feature_length, activation=self.context_output_activation) print "Context cost shape: ", context_cost.get_shape() with tf.variable_scope("task_branch"): if self.is_bidirectional: task_input = tf.concat(body_cell_output, axis=2) else: task_input = body_cell_output task_cell_output, last_task_state = self.rnn_layer( fw_cell=task_lstm_cell, bw_cell=None, rnn_inputs=task_input) with tf.variable_scope("task_fc"): # Select the last output that is not generated by zero vectors last_task_output = self.last_relevant( task_cell_output, self.length(task_cell_output)) # feed the last output to the fc layer and make prediction task_fc_out = fcLayer(x=last_task_output, in_shape=self.task_fc_input_size, out_shape=self.task_branch_fc, activation=self.fc_activation, dropout=self.dropout, is_train=is_train, scope="fc2") task_output, task_logits = predictionLayer( x=task_fc_out, y=y_task, in_shape=self.context_branch_fc, out_shape=y_task.get_shape()[-1].value, activation=self.task_output_activation) task_cost = compute_cost( logit=task_logits, y=y_task, out_type="last_only", max_length=self.max_length, batch_size=self.batch_size, embed_dim=self.feature_length, activation=self.task_output_activation) return context_cost, task_cost, task_output, context_output