def double_predict_classification(self, response, confound_input, x_input):
        """Predicts a categorical outcome twice.

    First, predict the outcome from the confound.
    Second, concat the text input and the residuals from step (1), and
    use the resulting vector to generate better predictions of the outcome.

    Args:
      response: dict, an element of config.variable_spec. This is the response
        variable we are predicting.
      confound_input: tensor [batch, num confounds], the confounds, all
        stacked up into vectors.
      x_input: tensor [batch, hidden], an encoded version of the text input.

    Returns:
      confound_preds: tensor [batch, num classes], the predictions from the
        confound.
      confound_loss: tensor [batch], the cross-entropy loss between confound
        predictions and targets.
      final_preds: tensor[batch, num classes], the final predictions from
        confound residuals + text input.
      final_loss: tensor[batch, num classes], the cross-entropy loss between the
        final preds and targets.
    """
        with tf.variable_scope('control_pred'):
            confound_preds, confound_loss = tf_utils.classifier(
                inputs=confound_input,
                labels=self.iter[response['name']],
                layers=self.params['classification_layers_1'],
                num_classes=self.dataset.num_levels(response['name']),
                hidden=self.params['classification_hidden_1'],
                dropout=self.dropout,
                sparse_labels=True)

        if self.params['ablate_confounds']:
            confound_preds = tf.zeros_like(confound_preds)

        final_input = tf.concat([x_input, confound_preds], axis=1)

        with tf.variable_scope('final_pred'):
            final_preds, final_loss = tf_utils.classifier(
                bias=False,
                inputs=final_input,
                labels=self.iter[response['name']],
                layers=self.params['classification_layers_2'],
                num_classes=self.dataset.num_levels(response['name']),
                hidden=self.params['classification_hidden_2'],
                dropout=self.dropout,
                sparse_labels=True)

        return confound_preds, confound_loss, final_preds, final_loss
Example #2
0
    def __init__(self, config, params, dataset, iterators):
        """Constructs the graph and training/summary ops."""
        self.iter = iterators
        self.config = config
        self.params = params
        self.dataset = dataset
        self.filter_sizes = [
            int(x) for x in self.params['filter_size'].split(',')
        ]

        tf_graph = tf.get_default_graph()

        self.learning_rate = tf.constant(params['learning_rate'])
        self.dropout = tf.placeholder(tf.float32, name='dropout')
        self.global_step = tf.Variable(0, trainable=False)

        source_name = dataset.input_varname()
        self.input_text, self.input_ids, self.input_lens = self.iter[
            source_name]

        # Use a cnn to encode the source.
        conv, src_encoded = self.cnn_encoder()

        # Now build all the prediction heads (one per non-input variable).
        self.step_output = defaultdict(dict)
        for variable in self.config.data_spec[1:]:
            if variable['skip']:
                continue

            with tf.variable_scope(variable['name']):
                if variable['control']:
                    prediction_input = self.reverse(src_encoded)
                else:
                    prediction_input = tf.identity(src_encoded)

                # Each prediction head is a single fully-connected layer without
                # activation functions or bias. This makes it a simple linear projection
                # into the output space.
                if variable['type'] == utils.CATEGORICAL:
                    preds, mean_loss = tf_utils.classifier(
                        inputs=prediction_input,
                        labels=self.iter[variable['name']],
                        layers=1,
                        num_classes=self.dataset.num_levels(variable['name']),
                        dropout=self.dropout,
                        sparse_labels=True,
                        bias=False)
                elif variable['type'] == utils.CONTINUOUS:
                    preds, mean_loss = tf_utils.regressor(
                        inputs=prediction_input,
                        labels=self.iter[variable['name']],
                        layers=1,
                        dropout=self.dropout,
                        bias=False)
                else:
                    raise Exception('ERROR: unknown type %s for variable %s' %
                                    (variable['type'], variable['name']))

                prediction_head_weights = tf_graph.get_tensor_by_name(
                    '%s/prediction_head/weights:0' % variable['name'])

                mean_loss = variable['weight'] * mean_loss
                # The user is allowed to specify a "rho" term which is a dampening
                # factor on the adversarial signal. This helps the model achieve a
                # balance between the losses of the prediction head and encoder.
                if variable['control']:
                    mean_loss = self.params['rho'] * mean_loss

            tf.summary.scalar('%s_loss' % variable['name'], mean_loss)

            # Save everything you need for inference: the input, loss, the
            # convolutional feature maps, the output projection weights, and the
            # model's predictions.
            self.step_output[variable['name']]['input'] = self.iter[
                variable['name']]
            self.step_output[variable['name']]['loss'] = mean_loss
            self.step_output[variable['name']]['conv'] = conv
            self.step_output[
                variable['name']]['weights'] = prediction_head_weights
            self.step_output[variable['name']]['pred'] = preds

        # Optimization and summary writing.
        self.loss = tf.reduce_sum(
            [x['loss'] for x in self.step_output.values()])
        tf.summary.scalar('global_loss', self.loss)

        self.train_step = tf.contrib.layers.optimize_loss(
            loss=self.loss,
            global_step=self.global_step,
            learning_rate=self.learning_rate,
            clip_gradients=self.params['gradient_clip'],
            optimizer='Adam',
            summaries=['gradient_norm'])

        # Savers, summaries, etc.
        self.trainable_variable_names = [
            v.name for v in tf.trainable_variables()
        ]
        self.summaries = tf.summary.merge_all()
        self.saver = tf.train.Saver(tf.global_variables())
Example #3
0
    def __init__(self, config, params, dataset, iterators):
        self.iter = iterators
        self.config = config
        self.params = params
        self.dataset = dataset

        self.learning_rate = tf.constant(params['learning_rate'])
        self.global_step = tf.Variable(0, trainable=False)

        source_name = dataset.input_varname()
        self.input_text, input_ids, input_lens = self.iter[source_name]

        # transform input text into big BOW vector
        with tf.variable_scope('input'):
            input_vector = tf.map_fn(
                lambda seq: self._to_dense_vector(seq, self.dataset.vocab_size
                                                  ),
                self.iter[dataset.input_varname()][1])
            input_encoded = tf_utils.fc_tube(
                inputs=tf.cast(input_vector, tf.float32),
                num_outputs=self.params['encoder_layers'],
                layers=self.params['encoder_layers'])
        # TODO this is PAINFULLY hacky!!!
        cur_graph = tf.get_default_graph()
        self.feature_weights = cur_graph.get_tensor_by_name(
            'input/layer_0/weights:0')
        self.feature_intercept = cur_graph.get_tensor_by_name(
            'input/layer_0/biases:0')

        # now build all the prediction heads
        self.step_output = defaultdict(dict)
        for variable in self.config.data_spec[1:]:
            if variable['skip']:
                continue

            with tf.variable_scope(variable['name'] + '_prediction_head'):
                if variable['control']:
                    prediction_input = self.reverse(input_encoded)
                else:
                    prediction_input = tf.identity(input_encoded)

                if variable['type'] == 'categorical':
                    preds, mean_loss = tf_utils.classifier(
                        inputs=prediction_input,
                        labels=self.iter[variable['name']],
                        layers=self.params['classifier_layers'],
                        num_classes=self.dataset.num_classes(variable['name']),
                        hidden=self.params['classifier_units'],
                        dropout=0.0,
                        sparse_labels=True)
                elif variable['type'] == 'continuous':
                    preds, mean_loss = tf_utils.regressor(
                        inputs=prediction_input,
                        labels=self.iter[variable['name']],
                        layers=self.params['regressor_layers'],
                        hidden=self.params['regressor_units'],
                        dropout=0.0)
                else:
                    raise Exception('ERROR: unknown type %s for variable %s' %
                                    (variable['type'], variable['name']))

                mean_loss = tf.scalar_mul(variable['weight'], mean_loss)

            tf.summary.scalar('%s_loss' % variable['name'], mean_loss)
            self.step_output[variable['name']]['loss'] = mean_loss
            self.step_output[variable['name']]['pred'] = preds

        # regularize if need be
        if self.params['lambda'] > 0:
            if self.params['regularizor'] == 'l2':
                reg = tf.contrib.layers.l2_regularizer(self.params['lambda'])
            else:
                reg = tf.contrib.layers.l1_regularizer(self.params['lambda'])
            reg_weights = tf.trainable_variables() \
                if self.params['reg_type'] =='all' else [self.feature_weights]
            reg_term = tf.contrib.layers.apply_regularization(reg, reg_weights)
        else:
            reg_term = 0
        tf.summary.scalar('regularization_loss', reg_term)

        # now optimize
        self.loss = tf.reduce_sum(
            [x['loss'] for x in self.step_output.values()])
        self.loss += reg_term
        tf.summary.scalar('global_loss', self.loss)

        self.train_step = tf.contrib.layers.optimize_loss(
            loss=self.loss,
            global_step=self.global_step,
            learning_rate=self.learning_rate,
            clip_gradients=self.params['gradient_clip'],
            optimizer='Adam',
            summaries=["gradient_norm"])

        # savers, summaries, etc
        self.trainable_variable_names = [
            v.name for v in tf.trainable_variables()
        ]
        self.summaries = tf.summary.merge_all()
        self.saver = tf.train.Saver(tf.global_variables())
    def __init__(self, config, params, dataset, iterators):
        self.iter = iterators
        self.config = config
        self.params = params
        self.dataset = dataset

        self.learning_rate = tf.constant(params['learning_rate'])
        self.dropout = tf.placeholder(tf.float32, name='dropout')
        self.global_step = tf.Variable(0, trainable=False)

        source_name = dataset.input_varname()
        self.input_text, input_ids, input_lens = self.iter[source_name]

        # use attention to encode the source
        with tf.variable_scope('encoder'):
            rnn_outputs, source_embeddings = tf_utils.rnn_encode(
                source=input_ids,
                source_len=input_lens,
                vocab_size=self.dataset.vocab_size,
                embedding_size=self.params['embedding_size'],
                layers=self.params['encoder_layers'],
                units=self.params['encoder_units'],
                dropout=self.dropout,
                glove_matrix=tf_utils.get_glove(dataset) if self.params['use_glove'] else None)

        with tf.variable_scope('attention'):
            self.attn_scores, attn_context = tf_utils.attention(
                states=rnn_outputs,
                seq_lens=input_lens,
                layers=self.params['attn_layers'],
                units=self.params['attn_units'],
                dropout=self.dropout)

        # now build all the prediction heads
        self.step_output = defaultdict(dict)
        for variable in self.config.data_spec[1:]:
            if variable['skip']:
                continue

            with tf.variable_scope(variable['name'] + '_prediction_head'):
                if variable['control']:
                    prediction_input = self.reverse(attn_context)
                else:
                    prediction_input = tf.identity(attn_context)

                if variable['type'] == 'categorical':
                    preds, mean_loss = tf_utils.classifier(
                        inputs=prediction_input,
                        labels=self.iter[variable['name']],
                        layers=self.params['classifier_layers'],
                        num_classes=self.dataset.num_classes(variable['name']),
                        hidden=self.params['classifier_units'],
                        dropout=self.dropout,
                        sparse_labels=True)
                elif variable['type'] == 'continuous':
                    preds, mean_loss = tf_utils.regressor(
                        inputs=prediction_input,
                        labels=self.iter[variable['name']],
                        layers=self.params['regressor_layers'],
                        hidden=self.params['regressor_units'],
                        dropout=self.dropout)
                else:
                    raise Exception('ERROR: unknown type %s for variable %s' % (
                        variable['type'], variable['name']))

                mean_loss = tf.scalar_mul(variable['weight'], mean_loss)

            tf.summary.scalar('%s_loss' % variable['name'], mean_loss)
            self.step_output[variable['name']]['loss'] = mean_loss
            self.step_output[variable['name']]['pred'] = preds

        # now optimize
        self.loss = tf.reduce_sum([x['loss'] for x in self.step_output.values()])
        tf.summary.scalar('global_loss', self.loss)

        self.train_step = tf.contrib.layers.optimize_loss(
            loss=self.loss,
            global_step=self.global_step,
            learning_rate=self.learning_rate,
            clip_gradients=self.params['gradient_clip'],
            optimizer='Adam',
            summaries=["gradient_norm"])

        # savers, summaries, etc
        self.trainable_variable_names = [v.name for v in tf.trainable_variables()]
        self.summaries = tf.summary.merge_all()
        self.saver = tf.train.Saver(tf.global_variables())
Example #5
0
    def __init__(self, config, params, dataset, iterators):
        """Constructs the graph and training/summary ops."""
        self.iter = iterators
        self.config = config
        self.params = params
        self.dataset = dataset

        self.learning_rate = tf.constant(params['learning_rate'])
        self.dropout = tf.placeholder(tf.float32, name='dropout')
        self.global_step = tf.Variable(0, trainable=False)

        source_name = dataset.input_varname()
        self.input_text, _, _ = self.iter[source_name]

        # Transform the input text into a big bag of words vector.
        with tf.variable_scope('input'):
            input_vector = tf.map_fn(
                lambda seq: tf_utils.sparse_to_dense_vector(  # pylint: disable=g-long-lambda
                    seq, self.dataset.vocab_size),
                self.iter[dataset.input_varname()][1])
            input_encoded = tf_utils.fc_tube(
                inputs=tf.cast(input_vector, tf.float32),
                num_outputs=self.params['encoder_layers'],
                layers=self.params['encoder_layers'])

        # Pull out the vector of weights which dots the input vector.
        # TODO(rpryzant) -- there must be a more elegant way to do this in TF?
        cur_graph = tf.get_default_graph()
        self.feature_weights = cur_graph.get_tensor_by_name(
            'input/layer_0/weights:0')
        self.feature_intercept = cur_graph.get_tensor_by_name(
            'input/layer_0/biases:0')

        # Now build all the prediction heads, one for each non-input variable.
        self.step_output = defaultdict(dict)
        for variable in self.config.data_spec[1:]:
            if variable['skip']:
                continue

            with tf.variable_scope(variable['name'] + '_prediction_head'):
                if variable['control']:
                    prediction_input = self.reverse(input_encoded)
                else:
                    prediction_input = tf.identity(input_encoded)

                if variable['type'] == utils.CATEGORICAL:
                    preds, mean_loss = tf_utils.classifier(
                        inputs=prediction_input,
                        labels=self.iter[variable['name']],
                        layers=self.params['classifier_layers'],
                        num_classes=self.dataset.num_levels(variable['name']),
                        hidden=self.params['classifier_units'],
                        dropout=self.dropout,
                        sparse_labels=True)
                elif variable['type'] == utils.CONTINUOUS:
                    preds, mean_loss = tf_utils.regressor(
                        inputs=prediction_input,
                        labels=self.iter[variable['name']],
                        layers=self.params['regressor_layers'],
                        hidden=self.params['regressor_units'],
                        dropout=self.dropout)
                else:
                    raise Exception('ERROR: unknown type %s for variable %s' %
                                    (variable['type'], variable['name']))

                mean_loss = tf.scalar_mul(variable['weight'], mean_loss)

            tf.summary.scalar('%s_loss' % variable['name'], mean_loss)
            self.step_output[variable['name']]['input'] = self.iter[
                variable['name']]
            self.step_output[variable['name']]['loss'] = mean_loss
            self.step_output[variable['name']]['pred'] = preds

        # Regularize the parameters.
        if self.params['lambda'] > 0:
            if self.params['regularizer'] == 'l2':
                regularizer = tf.contrib.layers.l2_regularizer(
                    self.params['lambda'])
            else:
                regularizer = tf.contrib.layers.l1_regularizer(
                    self.params['lambda'])

            if self.params['reg_type'] == 'all':
                regularization_weights = tf.trainable_variables()
            else:
                regularization_weights = [self.feature_weights]

            regularization_term = tf.contrib.layers.apply_regularization(
                regularizer, regularization_weights)
        else:
            regularization_term = 0

        tf.summary.scalar('regularization_loss', regularization_term)

        # Optimization ops.
        self.loss = tf.reduce_sum(
            [x['loss'] for x in self.step_output.values()])
        self.loss += regularization_term
        tf.summary.scalar('global_loss', self.loss)

        self.train_step = tf.contrib.layers.optimize_loss(
            loss=self.loss,
            global_step=self.global_step,
            learning_rate=self.learning_rate,
            clip_gradients=self.params['gradient_clip'],
            optimizer='Adam',
            summaries=['gradient_norm'])

        # Savers, summaries, etc.
        self.trainable_variable_names = [
            v.name for v in tf.trainable_variables()
        ]
        self.summaries = tf.summary.merge_all()
        self.saver = tf.train.Saver(tf.global_variables())