Example #1
0
    def model_fn_body(self, features):
        hparams = self._hparams
        # TODO(rshin): Give identity_module lower weight by default.
        multi_conv = multi_conv_module(kernel_sizes=[(3, 3), (5, 5), (7, 7)],
                                       seps=[0, 1])
        conv_modules = [multi_conv, identity_module]
        activation_modules = [
            identity_module, lambda x, _: tf.nn.relu(x),
            lambda x, _: tf.nn.elu(x), lambda x, _: tf.tanh(x)
        ]
        norm_modules = [identity_module, layernorm_module, noamnorm_module]
        binary_modules = [
            first_binary_module, second_binary_module, sum_binary_module,
            shakeshake_binary_module
        ]
        inputs = features["inputs"]

        def run_unary(x, name):
            """A single step of unary modules."""
            x_shape = x.get_shape()
            with tf.variable_scope(name):
                with tf.variable_scope("norm"):
                    x = run_unary_modules(norm_modules, x, hparams)
                    x.set_shape(x_shape)
                with tf.variable_scope("activation"):
                    x = run_unary_modules(activation_modules, x, hparams)
                    x.set_shape(x_shape)
                with tf.variable_scope("conv"):
                    x = run_unary_modules(conv_modules, x, hparams)
                    x.set_shape(x_shape)
            return tf.nn.dropout(x, 1.0 - hparams.dropout), batch_deviation(x)

        cur1, cur2, cur3, extra_loss = inputs, inputs, inputs, 0.0
        cur_shape = inputs.get_shape()
        for i in xrange(hparams.num_hidden_layers):
            with tf.variable_scope("layer_%d" % i):
                cur1, loss1 = run_unary(cur1, "unary1")
                cur2, loss2 = run_unary(cur2, "unary2")
                cur3, loss3 = run_unary(cur2, "unary3")
                extra_loss += (loss1 + loss2 + loss3) / float(
                    hparams.num_hidden_layers)
                with tf.variable_scope("binary1"):
                    next1 = run_binary_modules(binary_modules, cur1, cur2,
                                               hparams)
                    next1.set_shape(cur_shape)
                with tf.variable_scope("binary2"):
                    next2 = run_binary_modules(binary_modules, cur1, cur3,
                                               hparams)
                    next2.set_shape(cur_shape)
                with tf.variable_scope("binary3"):
                    next3 = run_binary_modules(binary_modules, cur2, cur3,
                                               hparams)
                    next3.set_shape(cur_shape)
                cur1, cur2, cur3 = next1, next2, next3

        anneal = common_layers.inverse_exp_decay(hparams.anneal_until)
        extra_loss *= hparams.batch_deviation_loss_factor * anneal
        return cur1, extra_loss
Example #2
0
def run_unary_modules_basic(modules, cur, hparams):
    """Run unary modules."""
    selection_var = tf.get_variable("selection", [len(modules)],
                                    initializer=tf.zeros_initializer())
    inv_t = 100.0 * common_layers.inverse_exp_decay(100000, min_value=0.01)
    selected_weights = tf.nn.softmax(selection_var * inv_t)
    all_res = [modules[n](cur, hparams) for n in xrange(len(modules))]
    all_res = tf.concat([tf.expand_dims(r, axis=0) for r in all_res], axis=0)
    res = all_res * tf.reshape(selected_weights, [-1, 1, 1, 1, 1])
    return tf.reduce_sum(res, axis=0)
Example #3
0
def run_unary_modules_basic(modules, cur, hparams):
    """Run unary modules."""
    selection_weights = create_selection_weights(
        "selection",
        "softmax",
        shape=[len(modules)],
        inv_t=100.0 *
        common_layers.inverse_exp_decay(hparams.anneal_until, min_value=0.01))
    all_res = [modules[n](cur, hparams) for n in xrange(len(modules))]
    all_res = tf.concat([tf.expand_dims(r, axis=0) for r in all_res], axis=0)
    res = all_res * tf.reshape(selection_weights.normalized, [-1, 1, 1, 1, 1])
    return tf.reduce_sum(res, axis=0)
Example #4
0
    def model_fn_body(self, features):
        hparams = self._hparams
        conv_modules = [
            conv_module(kw, kw, sep, div) for kw in [3, 5, 7]
            for sep in [0, 1] for div in [1]
        ] + [identity_module]
        activation_modules = [
            identity_module, lambda x, _: tf.nn.relu(x),
            lambda x, _: tf.nn.elu(x), lambda x, _: tf.tanh(x)
        ]
        norm_modules = [identity_module, layernorm_module, noamnorm_module]
        binary_modules = [
            first_binary_module, second_binary_module, sum_binary_module,
            shakeshake_binary_module
        ]
        inputs = features["inputs"]

        def run_unary(x, name):
            """A single step of unary modules."""
            x_shape = x.get_shape()
            with tf.variable_scope(name):
                with tf.variable_scope("norm"):
                    x = run_unary_modules(norm_modules, x, hparams)
                    x.set_shape(x_shape)
                with tf.variable_scope("activation"):
                    x = run_unary_modules(activation_modules, x, hparams)
                    x.set_shape(x_shape)
                with tf.variable_scope("conv"):
                    x = run_unary_modules(conv_modules, x, hparams)
                    x.set_shape(x_shape)
            return tf.nn.dropout(x, 1.0 - hparams.dropout), batch_deviation(x)

        cur1, cur2, extra_loss = inputs, inputs, 0.0
        cur_shape = inputs.get_shape()
        for i in xrange(hparams.num_hidden_layers):
            with tf.variable_scope("layer_%d" % i):
                cur1, loss1 = run_unary(cur1, "unary1")
                cur2, loss2 = run_unary(cur2, "unary2")
                extra_loss += (loss1 + loss2) / float(
                    hparams.num_hidden_layers)
                with tf.variable_scope("binary1"):
                    next1 = run_binary_modules(binary_modules, cur1, cur2,
                                               hparams)
                    next1.set_shape(cur_shape)
                with tf.variable_scope("binary2"):
                    next2 = run_binary_modules(binary_modules, cur1, cur2,
                                               hparams)
                    next2.set_shape(cur_shape)
                cur1, cur2 = next1, next2

        anneal = common_layers.inverse_exp_decay(hparams.anneal_until)
        extra_loss *= hparams.batch_deviation_loss_factor * anneal
        return cur1, extra_loss
Example #5
0
def run_unary_modules_sample(modules, cur, hparams, k):
    """Run modules, sampling k."""
    selection_weights = create_selection_weights(
        "selection", ("softmax_topk", k),
        shape=[len(modules)],
        inv_t=100.0 *
        common_layers.inverse_exp_decay(hparams.anneal_until, min_value=0.01))
    all_res = [
        tf.cond(tf.less(selection_weights.normalized[n], 1e-6),
                lambda: tf.zeros_like(cur),
                lambda i=n: modules[i](cur, hparams))
        for n in xrange(len(modules))
    ]
    all_res = tf.concat([tf.expand_dims(r, axis=0) for r in all_res], axis=0)
    res = all_res * tf.reshape(selection_weights.normalized, [-1, 1, 1, 1, 1])
    return tf.reduce_sum(res, axis=0)
Example #6
0
def run_unary_modules_sample(modules, cur, hparams, k):
    """Run modules, sampling k."""
    selection_var = tf.get_variable("selection", [len(modules)],
                                    initializer=tf.zeros_initializer())
    selection = tf.multinomial(tf.expand_dims(selection_var, axis=0), k)
    selection = tf.squeeze(selection, axis=0)  # [k] selected classes.
    to_run = tf.one_hot(selection, len(modules))  # [k x nmodules] one-hot.
    to_run = tf.reduce_sum(to_run, axis=0)  # [nmodules], 0=not run, 1=run.
    all_res = [
        tf.cond(tf.less(to_run[n], 0.1),
                lambda: tf.zeros_like(cur),
                lambda i=n: modules[i](cur, hparams))
        for n in xrange(len(modules))
    ]
    inv_t = 100.0 * common_layers.inverse_exp_decay(100000, min_value=0.01)
    selected_weights = tf.nn.softmax(selection_var * inv_t - 1e9 *
                                     (1.0 - to_run))
    all_res = tf.concat([tf.expand_dims(r, axis=0) for r in all_res], axis=0)
    res = all_res * tf.reshape(selected_weights, [-1, 1, 1, 1, 1])
    return tf.reduce_sum(res, axis=0)