def regressor(hidden, targets, n_targets, config, train=False, reuse=None, **kwargs): """ A simple linear regressor. :param hidden: The output of the featurizer. [batch_size, embed_dim] :param targets: The placeholder representing the regression targets. [batch_size] :param n_targets: A python int containing the number of outputs that the model should be learning to predict over. :param dropout_placeholder: :param config: A config object, containing all parameters for the featurizer. :param train: If this flag is true, dropout and losses are added to the graph. :param reuse: Should reuse be set within this scope. :param kwargs: Spare arguments. :return: dict containing: logits: The regression outputs. losses: L2 Loss for the regression targets. """ with tf.variable_scope('regressor', reuse=reuse): hidden = dropout(hidden, config.clf_p_drop, train) outputs = perceptron(hidden, n_targets, config) if targets is None: loss = None else: loss = tf.nn.l2_loss(outputs - targets) return { 'logits': outputs, 'losses': loss }
def classifier(hidden, targets, n_classes, dropout_placeholder, config, train=False, reuse=None, **kwargs): """ A simple linear classifier. :param hidden: The output of the featurizer. [batch_size, embed_dim] :param targets: The placeholder representing the sparse target ids. [batch_size] :param n_classes: A python int containing the number of classes that the model should be learning to predict over. :param dropout_placeholder: :param config: A config object, containing all parameters for the featurizer. :param train: If this flag is true, dropout and losses are added to the graph. :param reuse: Should reuse be set within this scope. :param kwargs: Spare arguments. :return: dict containing: logits: The unnormalised log probabilities of each class. losses: The loss for the classifier. """ with tf.variable_scope('model', reuse=reuse): hidden = dropout(hidden, config.clf_p_drop, train, dropout_placeholder) clf_logits = perceptron(hidden, n_classes, config) clf_losses = tf.nn.softmax_cross_entropy_with_logits_v2( logits=clf_logits, labels=tf.stop_gradient(targets) ) return { 'logits': clf_logits, 'losses': clf_losses }
def featurizer(X, encoder, dropout_placeholder, config, train=False, reuse=None, max_length=None): """ The transformer element of the finetuning model. Maps from tokens ids to a dense, embedding of the sequence. :param X: A tensor of token indexes with shape [batch_size, sequence_length, token_idx] :param encoder: A TextEncoder object. :param dropout_placeholder: A placeholder, 1 when dropout is on, 0 when it is off. :param config: A config object, containing all parameters for the featurizer. :param train: If this flag is true, dropout and losses are added to the graph. :param reuse: Should reuse be set within this scope. :param max_length: Maximum sequence length. :return: A dict containing; embed_weights: the word embedding matrix. features: The output of the featurizer_final state. sequence_features: The output of the featurizer at each timestep. """ max_length = max_length or config.max_length with tf.variable_scope('model', reuse=reuse): embed_weights = tf.get_variable( "we", [encoder.vocab_size + max_length, config.n_embed], initializer=tf.random_normal_initializer( stddev=config.weight_stddev)) embed_weights = dropout(embed_weights, config.embed_p_drop, train, dropout_placeholder) X = tf.reshape(X, [-1, max_length, 2]) h = embed(X, embed_weights) for layer in range(config.n_layer): h = block(h, config.n_heads, config.act_fn, config.resid_p_drop, config.attn_p_drop, 'h%d' % layer, dropout_placeholder, train=train, scale=True) # Use hidden state at classifier token as input to final proj. + softmax clf_h = tf.reshape(h, [-1, config.n_embed]) # [batch * seq_len, embed] clf_token = encoder['_classify_'] pool_idx = tf.cast( tf.argmax(tf.cast(tf.equal(X[:, :, 0], clf_token), tf.float32), 1), tf.int32) clf_h = tf.gather( clf_h, tf.range(shape_list(X)[0], dtype=tf.int32) * max_length + pool_idx) clf_h = tf.reshape(clf_h, [-1, config.n_embed]) # [batch, embed] return { 'embed_weights': embed_weights, 'features': clf_h, 'sequence_features': h }
def featurizer(X, encoder, config, train=False, reuse=None): """ The transformer element of the finetuning model. Maps from tokens ids to a dense, embedding of the sequence. :param X: A tensor of token indexes with shape [batch_size, sequence_length, token_idx] :param encoder: A TextEncoder object. :param config: A config object, containing all parameters for the featurizer. :param train: If this flag is true, dropout and losses are added to the graph. :param reuse: Should reuse be set within this scope. :return: A dict containing; embed_weights: the word embedding matrix. features: The output of the featurizer_final state. sequence_features: The output of the featurizer at each timestep. """ initial_shape = [a or -1 for a in X.get_shape().as_list()] X = tf.reshape(X, shape=[-1] + initial_shape[-2:]) with tf.variable_scope('model/featurizer', reuse=reuse): embed_weights = tf.get_variable("we", [encoder.vocab_size + config.max_length, config.n_embed], initializer=tf.random_normal_initializer(stddev=config.weight_stddev)) if config.train_embeddings: embed_weights = dropout(embed_weights, config.embed_p_drop, train) else: embed_weights = tf.stop_gradient(embed_weights) X = tf.reshape(X, [-1, config.max_length, 2]) h = embed(X, embed_weights) for layer in range(config.n_layer): if (layer - config.n_layer) == config.num_layers_trained and config.num_layers_trained != 12: h = tf.stop_gradient(h) train_layer = False else: train_layer = train with tf.variable_scope('h%d_' % layer): block_fn = functools.partial(block, n_head=config.n_heads, act_fn=config.act_fn, resid_pdrop=config.resid_p_drop, attn_pdrop=config.attn_p_drop, scope='h%d' % layer, train=train_layer, scale=True) if config.low_memory_mode and train_layer: block_fn = recompute_grad(block_fn, use_entire_scope=True) h = block_fn(h) # Use hidden state at classifier token as input to final proj. + softmax clf_h = tf.reshape(h, [-1, config.n_embed]) # [batch * seq_len, embed] clf_token = encoder['_classify_'] pool_idx = tf.cast(tf.argmax(tf.cast(tf.equal(X[:, :, 0], clf_token), tf.float32), 1), tf.int32) clf_h = tf.gather(clf_h, tf.range(shape_list(X)[0], dtype=tf.int32) * config.max_length + pool_idx) clf_h = tf.reshape(clf_h, shape=initial_shape[: -2] + [config.n_embed]) seq_feats = tf.reshape(h, shape=initial_shape[:-1] + [config.n_embed]) return { 'embed_weights': embed_weights, 'features': clf_h, 'sequence_features': seq_feats }
def multi_choice_question(hidden, targets, n_targets, dropout_placeholder, config, train=False, reuse=None, **kwargs): with tf.variable_scope("model", reuse=reuse): hidden = dropout(hidden, config.clf_p_drop, train, dropout_placeholder) hidden = tf.unstack(hidden, num=n_targets, axis=1) hidden = tf.concat(hidden, axis=0) # some model clf_out = perceptron(hidden, 1, config) clf_out = tf.split(clf_out, n_targets, axis=0) clf_out = tf.concat(clf_out, 1) clf_losses = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=clf_out, labels=tf.stop_gradient(targets) ) return { 'logits': clf_out, 'losses': clf_losses }
def multi_choice_question(hidden, targets, n_targets, dropout_placeholder, config, train=False, reuse=None, **kwargs): with tf.variable_scope("model", reuse=reuse): initial_shape = shape_list(hidden) hidden = dropout(hidden, config.clf_p_drop, train, dropout_placeholder) # some model clf_out = perceptron(merge_leading_dims(hidden, 2), n_targets, config) clf_logits = tf.reshape(clf_out, shape=initial_shape[0] + [n_targets]) clf_losses = tf.nn.softmax_cross_entropy_with_logits_v2( logits=clf_logits, labels=tf.stop_gradient(targets)) return {'logits': clf_logits, 'losses': clf_losses}
def multi_choice_question(hidden, targets, n_targets, config, train=False, reuse=None, **kwargs): with tf.variable_scope("model", reuse=reuse): hidden = dropout(hidden, config.clf_p_drop, train) hidden = tf.unstack(hidden, num=n_targets, axis=1) hidden = tf.concat(hidden, axis=0) clf_out = perceptron(hidden, 1, config) clf_out = tf.split(clf_out, n_targets, axis=0) clf_out = tf.concat(clf_out, 1) if targets is None: clf_losses = None else: clf_losses = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=clf_out, labels=tf.stop_gradient(targets) ) clf_losses = _apply_class_weight(clf_losses, targets, kwargs.get('class_weights')) return { 'logits': clf_out, 'losses': clf_losses }