Ejemplo n.º 1
0
def buildModel(x, y_context, y_task, is_train, dropout, scope="multiTask"):
    # Assume the input shape is (batch_size, n_steps, feature_length)

    #TASK = primary task, CONTEXT = secondary task

    # Permuting batch_size and n_steps
    x = tf.transpose(x, [1, 0, 2])
    print x.get_shape()
    # Reshaping to (n_steps*batch_size, feature_length)
    x = tf.reshape(x, [-1, feature_length])
    # Split to get a list of "n_steps" tensors of shape (batch_size, feature_length)
    print x.get_shape()
    x = tf.split(x, n_steps, 0)
    print x

    # Create lstm cell for the shared layer
    body_lstm_cell, body_state = createLSTMCell(batch_size, body_lstm_size, body_n_layer, forget_bias=0.0)
    # Create lstm cell for branch 1
    context_lstm_cell, context_state = createLSTMCell(batch_size, context_lstm_size, context_n_layer, forget_bias=0.0)
    # Create lstm cells for branch 2
    task_lstm_cell, task_state = createLSTMCell(batch_size, task_lstm_size, task_n_layer, forget_bias=0.0)

    #combined_cost = tf.constant(0)
    context_cost = tf.constant(0)
    task_cost = tf.constant(0)

    for time_step in range(n_steps):
        with tf.variable_scope("shared_lstm"):
            if time_step > 0:
                tf.get_variable_scope().reuse_variables()
            (body_cell_output, body_state) = body_lstm_cell(x[time_step], body_state)

        with tf.variable_scope("context_branch"):
            if time_step > 0:
                tf.get_variable_scope().reuse_variables()
            (context_cell_output, context_state) = context_lstm_cell(body_cell_output, context_state)

        with tf.variable_scope("context_fc"):
            if time_step == n_steps - 1:
                context_fc_out = fcLayer(x=context_cell_output, in_shape=context_lstm_size, out_shape=context_branch_fc, activation=fc_activation, dropout=dropout, is_train=is_train, scope="fc1")
                context_cost, context_output = predictionLayer(x=context_fc_out, y=y_context, in_shape=context_branch_fc, out_shape=y_context.get_shape()[-1].value, activation=output_activation)

        with tf.variable_scope("task_branch"):
            if time_step > 0:
                tf.get_variable_scope().reuse_variables()
            (task_cell_output, task_state) = task_lstm_cell(body_cell_output, task_state)
        with tf.variable_scope("task_fc"):
            if time_step == n_steps - 1:
                task_fc_out = fcLayer(x=task_cell_output, in_shape=task_lstm_size, out_shape=task_branch_fc, activation=fc_activation, dropout=dropout, is_train=is_train, scope="fc2")
                task_cost, task_output = predictionLayer(x=task_fc_out, y=y_task, in_shape=context_branch_fc, out_shape=y_task.get_shape()[-1].value, activation=output_activation)

    return context_cost, task_cost, task_output, context_output
Ejemplo n.º 2
0
	def buildModel(self, x, y_context, y_task, is_train, dropout, scope="multiTask"):

    	    # Assume the input shape is (batch_size, max_length, feature_length)

    	    #TASK = primary task, CONTEXT = secondary task

    	    # Create lstm cell for the shared layer
            body_lstm_cell, _ = createLSTMCell(self.batch_size, self.body_lstm_size, self.body_n_layer, forget_bias=0.0)

    	    context_cost = tf.constant(0)
    	    task_cost = tf.constant(0.0, dtype=tf.float32)

            if not self.is_multi_task: context_output = tf.constant(0)

    	    with tf.variable_scope("shared_lstm"):
        	body_cell_output, last_body_state = tf.nn.dynamic_rnn(cell = body_lstm_cell, dtype=tf.float32, sequence_length=self.length(x), inputs=x)

            if self.is_multi_task:
    	        with tf.variable_scope("context_branch"):
        	    # Select the last output that is not generated by zero vectors
                    if self.secondary_task == "missing word":
        	        last_body_output = self.last_relevant(body_cell_output, self.length(body_cell_output))
        	        # feed the last output to the fc layer and make prediction
    	                with tf.variable_scope("context_fc"):
        	            context_fc_out = fcLayer(x=last_body_output, in_shape=self.body_lstm_size, out_shape=self.context_branch_fc, activation=self.fc_activation, dropout=dropout, is_train=is_train, scope="fc1")
        	        with tf.variable_scope("context_pred"):
		            context_output, context_logits = predictionLayer(x=context_fc_out, y=y_context, in_shape=self.context_branch_fc, out_shape=y_context.get_shape()[-1].value, activation=self.context_output_activation)
		            context_cost = compute_cost(logit=context_logits, y=y_context, out_type="last_only", max_length=self.max_length, batch_size=self.batch_size, embed_dim=self.feature_length, activation=self.context_output_activation)

                    if self.secondary_task == "word generation":
			context_input = tf.transpose(body_cell_output, [1, 0, 2])
 	                context_input = tf.reshape(context_input, [-1, self.body_lstm_size])
                        context_input_list = tf.split(context_input, self.max_length, 0)
                        fc_output_list = []
			with tf.variable_scope("context_fc"):
		            for step in range(self.max_length):
			        if step > 0: tf.get_variable_scope().reuse_variables()
			        fc_out = fcLayer(x=context_input_list[step], in_shape=self.body_lstm_size, out_shape=self.context_branch_fc, activation=self.fc_activation, dropout=dropout, is_train=is_train, scope="fc1")
			        fc_output_list.append(tf.expand_dims(fc_out, axis=1))
			    context_fc_out = tf.concat(fc_output_list, axis=1)
			with tf.variable_scope("context_pred"):
        	            context_output, context_logits = predictionLayer(x=context_fc_out, y=y_context, in_shape=self.context_branch_fc, out_shape=y_context.get_shape()[-1].value, activation=self.context_output_activation)
			    context_cost = compute_cost(logit=context_logits, y=y_context, out_type="sequential", max_length=self.max_length, batch_size=self.batch_size, embed_dim=self.feature_length,activation=self.context_output_activation)


		    print "Context cost shape: ", context_cost.get_shape()

    	    with tf.variable_scope("task_branch"):
    	    	with tf.variable_scope("task_fc"):
        	    # Select the last output that is not generated by zero vectors
        	    last_body_output = self.last_relevant(body_cell_output, self.length(body_cell_output))
        	    # feed the last output to the fc layer and make prediction
        	    task_fc_out = fcLayer(x=last_body_output, in_shape=self.body_lstm_size, out_shape=self.task_branch_fc, activation=self.fc_activation, dropout=dropout, is_train=is_train, scope="fc2")
        	    task_output, task_logits = predictionLayer(x=task_fc_out, y=y_task, in_shape=self.task_branch_fc, out_shape=y_task.get_shape()[-1].value, activation=self.task_output_activation)
		    print "Task output shape: ", task_output.get_shape()
		    task_cost = compute_cost(logit=task_logits, y=y_task, out_type="last_only", max_length=self.max_length, batch_size=self.batch_size, embed_dim=self.n_classes,activation=self.task_output_activation)

            return context_cost, task_cost, task_output, context_output
Ejemplo n.º 3
0
	def buildModel(self, x, y_context, y_task, is_train, dropout, scope="multiTask"):
     
    	    # Assume the input shape is (batch_size, max_length, feature_length) 

    	    #TASK = primary task, CONTEXT = secondary task
    
    	    # Create lstm cell for the shared layer 
            body_lstm_cell, _ = createLSTMCell(self.batch_size, self.body_lstm_size, self.body_n_layer, forget_bias=0.0)
            # Create lstm cell for branch 1 
            context_lstm_cell, _ = createLSTMCell(self.batch_size, self.context_lstm_size, self.context_n_layer, forget_bias=0.0)
            # Create lstm cells for branch 2
	    task_lstm_cell, _ = createLSTMCell(self.batch_size, self.task_lstm_size, self.task_n_layer, forget_bias=0.0)

    	    context_cost = tf.constant(0)
    	    task_cost = tf.constant(0)

    	    with tf.variable_scope("shared_lstm"):
        	body_cell_output, last_body_state = tf.nn.dynamic_rnn(cell = body_lstm_cell, dtype=tf.float32, sequence_length=self.length(x), inputs=x)
        
    	    with tf.variable_scope("context_branch"):
        	context_cell_output, last_context_state = tf.nn.dynamic_rnn(cell = context_lstm_cell, dtype=tf.float32, sequence_length=self.length(body_cell_output), inputs=body_cell_output)

    	    # The output from LSTMs will be (batch_size, max_length, out_size)
    	    with tf.variable_scope("context_fc"):
        	# Select the last output that is not generated by zero vectors
        	last_context_output = self.last_relevant(context_cell_output, self.length(context_cell_output))
        	# feed the last output to the fc layer and make prediction
        	context_fc_out = fcLayer(x=last_context_output, in_shape=self.context_lstm_size, out_shape=self.context_branch_fc, activation=self.fc_activation, dropout=self.dropout, is_train=is_train, scope="fc1")
        	context_cost, context_output = predictionLayer(x=context_fc_out, y=y_context, in_shape=self.context_branch_fc, out_shape=y_context.get_shape()[-1].value, activation=self.output_activation)

    	    with tf.variable_scope("task_branch"):
        	task_cell_output, last_task_state = tf.nn.dynamic_rnn(cell = task_lstm_cell, dtype=tf.float32, sequence_length=self.length(body_cell_output), inputs=body_cell_output)

    	    with tf.variable_scope("task_fc"):
        	# Select the last output that is not generated by zero vectors
        	last_task_output = self.last_relevant(task_cell_output, self.length(task_cell_output))
        	# feed the last output to the fc layer and make prediction
        	task_fc_out = fcLayer(x=last_task_output, in_shape=self.task_lstm_size, out_shape=self.task_branch_fc, activation=self.fc_activation, dropout=self.dropout, is_train=is_train, scope="fc2")
        	task_cost, task_output = predictionLayer(x=task_fc_out, y=y_task, in_shape=self.context_branch_fc, out_shape=y_task.get_shape()[-1].value, activation=self.output_activation)

    	    return context_cost, task_cost, task_output, context_output
Ejemplo n.º 4
0
def buildModel(x, y_context, y_task, is_train, scope="multiTask"):

    # Assume the input shape is (batch_size, n_steps, feature_length)

    #TASK = primary task, CONTEXT = secondary task

    # Permuting batch_size and n_steps
    x = tf.transpose(x, [1, 0, 2])
    # Reshaping to (n_steps*batch_size, feature_length)
    x = tf.reshape(x, [-1, feature_length])
    # Split to get a list of "n_steps" tensors of shape (batch_size, feature_length)
    x = tf.split(x, n_steps, 0)

    # Create lstm cell for the shared layer
    body_lstm_cell, body_state = createLSTMCell(batch_size,
                                                body_lstm_size,
                                                body_n_layer,
                                                forget_bias=0.0)
    # Create lstm cell for branch 1
    context_lstm_cell, context_state = createLSTMCell(batch_size,
                                                      context_lstm_size,
                                                      context_n_layer,
                                                      forget_bias=0.0)
    # Create lstm cells for branch 2
    task_lstm_cell, task_state = createLSTMCell(batch_size,
                                                task_lstm_size,
                                                task_n_layer,
                                                forget_bias=0.0)

    #combined_cost = tf.constant(0)
    context_cost = tf.constant(0)
    task_cost = tf.constant(0)

    #IMPLEMENTATION NOTES
    #No idea how code compiles in mcrnn... indentation makes 0 sense.
    #cant get context_output to next layer so have to use separate for loops...
    #need to cast as float32.
    #we should try both top and bottom outputs for our targets.
    for time_step in range(n_steps):
        # first, we construct the context lstm
        print time_step
        with tf.variable_scope("context_branch"):
            if time_step > 0:
                tf.get_variable_scope().reuse_variables()
            (context_cell_output,
             context_state) = context_lstm_cell(x[time_step], context_state)
        with tf.variable_scope("context_fc"):
            if time_step > 0:
                tf.get_variable_scope().reuse_variables()

            context_fc_out = fcLayer(x=context_cell_output,
                                     in_shape=context_lstm_size,
                                     out_shape=context_branch_fc,
                                     activation=fc_activation,
                                     dropout=dropout,
                                     is_train=is_train,
                                     scope="fc1")
            context_cost, context_output = predictionLayer(
                x=context_fc_out,
                y=y_context,
                in_shape=context_branch_fc,
                out_shape=y_context.get_shape()[-1].value,
                activation=output_activation)

        # then make the body where the input is the concatenation of both text and context_output
        with tf.variable_scope("body_lstm"):
            if time_step > 0:
                tf.get_variable_scope().reuse_variables()
            #body_input = tf.concat([x[time_step],context_cell_output], 1)
            (body_cell_output,
             body_state) = body_lstm_cell(x[time_step], body_state)

        # finally make the output task cell.
        with tf.variable_scope("task_branch"):
            if time_step > 0:
                tf.get_variable_scope().reuse_variables()

            task_input = tf.concat([body_cell_output, context_cell_output], 1)
            (task_cell_output,
             task_state) = task_lstm_cell(task_input, task_state)

        with tf.variable_scope("task_fc"):
            if time_step == n_steps - 1:
                task_fc_out = fcLayer(x=task_cell_output,
                                      in_shape=task_lstm_size,
                                      out_shape=task_branch_fc,
                                      activation=fc_activation,
                                      dropout=dropout,
                                      is_train=is_train,
                                      scope="fc2")
                task_cost, task_output = predictionLayer(
                    x=task_fc_out,
                    y=y_task,
                    in_shape=context_branch_fc,
                    out_shape=y_task.get_shape()[-1].value,
                    activation=output_activation)

    return context_cost, task_cost, task_output, context_output
Ejemplo n.º 5
0
def buildModel(x, y_context, y_task, is_train, scope="multiTask"):

    # Assume the input shape is (batch_size, n_steps, feature_length)

    # Permuting batch_size and n_steps
    x = tf.transpose(x, [1, 0, 2])
    print x.get_shape()
    # Reshaping to (n_steps*batch_size, feature_length)
    x = tf.reshape(x, [-1, feature_length])
    # Split to get a list of "n_steps" tensors of shape (batch_size, feature_length)
    print x.get_shape()
    x = tf.split(x, n_steps, 0)

    # Create lstm cell for the shared layer
    lstm_cell_0, state_0 = createLSTMCell(batch_size,
                                          lstm_size_0,
                                          n_layer_0,
                                          forget_bias=0.0)
    # Create lstm cell for branch 1
    lstm_cell_1, state_1 = createLSTMCell(batch_size,
                                          lstm_size_1,
                                          n_layer_1,
                                          forget_bias=0.0)
    # Create lstm cells for branch 2
    task_lstm_cell, task_state = createLSTMCell(batch_size,
                                                lstm_size_2,
                                                n_layer_2,
                                                forget_bias=0.0)

    combined_cost = tf.constant(0)
    cost1 = tf.constant(0)
    cost2 = tf.constant(0)

    for time_step in range(n_steps):
        with tf.variable_scope("SharedLSTM"):
            if time_step > 0:
                tf.get_variable_scope().reuse_variables()
            shared_lstm_bn = tf.contrib.layers.batch_norm(x[time_step],
                                                          center=True,
                                                          scale=True,
                                                          is_training=True,
                                                          scope='bn1')
            (cell_output_0, state_0) = lstm_cell_0(shared_lstm_bn, state_0)

        with tf.variable_scope("task_branch"):
            if time_step > 0:
                tf.get_variable_scope().reuse_variables()
            task_lstm_bn = tf.contrib.layers.batch_norm(cell_output_0,
                                                        center=True,
                                                        scale=True,
                                                        is_training=True,
                                                        scope='bn2')
            (task_cell_output,
             task_state) = task_lstm_cell(task_lstm_bn, task_state)

        with tf.variable_scope("Branch_task_fc"):
            if time_step == n_steps - 1:
                task_fc_bn = tf.contrib.layers.batch_norm(task_cell_output,
                                                          center=True,
                                                          scale=True,
                                                          is_training=True,
                                                          scope='bn3')
                fc_out2 = fcLayer(x=task_fc_bn,
                                  in_shape=lstm_size_2,
                                  out_shape=branch2_fc,
                                  activation=fc_activation,
                                  dropout=dropout,
                                  is_train=is_train,
                                  scope="fc2")
                task_pred_bn = tf.contrib.layers.batch_norm(fc_out2,
                                                            center=True,
                                                            scale=True,
                                                            is_training=True,
                                                            scope='bn4')
                cost2, output2 = predictionLayer(
                    x=task_pred_bn,
                    y=y_task,
                    in_shape=branch1_fc,
                    out_shape=y_task.get_shape()[-1].value,
                    activation=output_activation)

    return cost2, output2
Ejemplo n.º 6
0
    def buildModel(self,
                   x,
                   y_context,
                   y_task,
                   is_train,
                   dropout,
                   scope="multiTask"):

        context_cost = tf.constant(0)
        task_cost = tf.constant(0.0, dtype=tf.float32)

        # Assume the input shape is (batch_size, max_length, feature_length)
        # TASK = primary task, CONTEXT = secondary task

        # Create the forward cell for all LSTMs
        body_lstm_cell, _ = createLSTMCell(self.batch_size,
                                           self.body_lstm_size,
                                           self.body_n_layer,
                                           forget_bias=0.0)
        context_lstm_cell, _ = createLSTMCell(self.batch_size,
                                              self.context_lstm_size,
                                              self.context_n_layer,
                                              forget_bias=0.0)
        task_lstm_cell, _ = createLSTMCell(self.batch_size,
                                           self.task_lstm_size,
                                           self.task_n_layer,
                                           forget_bias=0.0)

        if self.is_bidirectional:
            # Create the backward cell for LSTM
            body_lstm_cell_bw, _ = createLSTMCell(self.batch_size,
                                                  self.body_lstm_size,
                                                  self.body_n_layer,
                                                  forget_bias=0.0)
            context_lstm_cell_bw, _ = createLSTMCell(self.batch_size,
                                                     self.context_lstm_size,
                                                     self.context_n_layer,
                                                     forget_bias=0.0)
            task_lstm_cell_bw, _ = createLSTMCell(self.batch_size,
                                                  self.task_lstm_size,
                                                  self.task_n_layer,
                                                  forget_bias=0.0)

        if not self.is_multi_task: context_output = tf.constant(0)

        with tf.variable_scope("shared_lstm"):
            body_cell_output, last_body_state = self.rnn_layer(
                fw_cell=body_lstm_cell,
                bw_cell=body_lstm_cell_bw,
                rnn_inputs=x,
                is_bidirectional=self.is_bidirectional)

        if self.is_multi_task:
            with tf.variable_scope("context_branch"):
                # The output from bidirectional LSTM is a list = [fw_output, bw_output], each of size (batch_size, max_length, out_size)
                if self.is_bidirectional:
                    # Concatenate the input of both directions along the feature dimension axis 2(300 -> 600)
                    context_input = tf.concat(body_cell_output, axis=2)
                else:
                    context_input = body_cell_output
                context_cell_output, last_context_state = self.rnn_layer(
                    fw_cell=context_lstm_cell,
                    bw_cell=None,
                    rnn_inputs=context_input)

                # The output from LSTMs will be (batch_size, max_length, out_size)

                # Select the last output that is not generated by zero vectors
                if self.secondary_task == "missing word":
                    last_context_output = self.last_relevant(
                        context_cell_output, self.length(context_cell_output))
                    # feed the last output to the fc layer and make prediction
                    with tf.variable_scope("context_fc"):
                        context_fc_out = fcLayer(
                            x=last_context_output,
                            in_shape=self.contxt_fc_input_size,
                            out_shape=self.context_branch_fc,
                            activation=self.fc_activation,
                            dropout=self.dropout,
                            is_train=is_train,
                            scope="fc1")
                    with tf.variable_scope("context_pred"):
                        context_output, context_logits = predictionLayer(
                            x=context_fc_out,
                            y=y_context,
                            in_shape=self.context_branch_fc,
                            out_shape=y_context.get_shape()[-1].value,
                            activation=self.context_output_activation)
                        context_cost = compute_cost(
                            logit=context_logits,
                            y=y_context,
                            out_type="last_only",
                            max_length=self.max_length,
                            batch_size=self.batch_size,
                            embed_dim=self.feature_length,
                            activation=self.context_output_activation)

                if self.secondary_task == "word generation":
                    context_cell_output = tf.transpose(context_cell_output,
                                                       [1, 0, 2])
                    context_cell_output = tf.reshape(
                        context_cell_output, [-1, self.context_lstm_size])
                    context_output_list = tf.split(context_cell_output,
                                                   self.max_length, 0)

                    fc_output_list = []
                    with tf.variable_scope("context_fc"):
                        for step in range(self.max_length):
                            if step > 0:
                                tf.get_variable_scope().reuse_variables()
                            fc_out = fcLayer(
                                x=context_output_list[step],
                                in_shape=self.context_fc_input_size,
                                out_shape=self.context_branch_fc,
                                activation=self.fc_activation,
                                dropout=self.dropout,
                                is_train=is_train,
                                scope="fc1")
                            fc_output_list.append(
                                tf.expand_dims(fc_out, axis=1))
                        context_fc_out = tf.concat(fc_output_list, axis=1)
                        print "context fc output shape before transpose: ", context_fc_out.get_shape(
                        )

                    with tf.variable_scope("context_pred"):
                        context_output, context_logits = predictionLayer(
                            x=context_fc_out,
                            y=y_context,
                            in_shape=self.context_branch_fc,
                            out_shape=y_context.get_shape()[-1].value,
                            activation=self.context_output_activation)
                        print "Context prediction output shape: ", context_output.get_shape(
                        )
                        context_cost = compute_cost(
                            logit=context_logits,
                            y=y_context,
                            out_type="sequential",
                            max_length=self.max_length,
                            batch_size=self.batch_size,
                            embed_dim=self.feature_length,
                            activation=self.context_output_activation)

                print "Context cost shape: ", context_cost.get_shape()

        with tf.variable_scope("task_branch"):
            if self.is_bidirectional:
                task_input = tf.concat(body_cell_output, axis=2)
            else:
                task_input = body_cell_output
            task_cell_output, last_task_state = self.rnn_layer(
                fw_cell=task_lstm_cell, bw_cell=None, rnn_inputs=task_input)

            with tf.variable_scope("task_fc"):
                # Select the last output that is not generated by zero vectors
                last_task_output = self.last_relevant(
                    task_cell_output, self.length(task_cell_output))
                # feed the last output to the fc layer and make prediction
                task_fc_out = fcLayer(x=last_task_output,
                                      in_shape=self.task_fc_input_size,
                                      out_shape=self.task_branch_fc,
                                      activation=self.fc_activation,
                                      dropout=self.dropout,
                                      is_train=is_train,
                                      scope="fc2")
                task_output, task_logits = predictionLayer(
                    x=task_fc_out,
                    y=y_task,
                    in_shape=self.context_branch_fc,
                    out_shape=y_task.get_shape()[-1].value,
                    activation=self.task_output_activation)
                task_cost = compute_cost(
                    logit=task_logits,
                    y=y_task,
                    out_type="last_only",
                    max_length=self.max_length,
                    batch_size=self.batch_size,
                    embed_dim=self.feature_length,
                    activation=self.task_output_activation)

        return context_cost, task_cost, task_output, context_output