Exemple #1
0
def quantize_post(x, name, tag):
    if tag != "none":
        if mpi_rank == 0:
            qspec_f = QuantizeSpec(copy=qspec_e6f7, logfile="qspec_e6f07.f.%s.txt" % tag)
            qspec_b = QuantizeSpec(copy=qspec_e5f2, logfile="qspec_e5f02.b.%s.txt" % tag)
        else:
            qspec_f = qspec_e6f7
            qspec_b = qspec_e5f2
        return quantize(x, qspec_f, qspec_b, name=name)
    return x
Exemple #2
0
def quantize_pre(x, name, tag):
    if tag != "none":
        if mpi_rank == 0:
            qspec_f = QuantizeSpec(copy=qspec_e4f3, logfile="qspec_e4f03.f.%s.txt" % tag)
            qspec_b = QuantizeSpec(copy=qspec_e6f7, logfile="qspec_e6f07.b.%s.txt" % tag)
        else:
            qspec_f = qspec_e4f3
            qspec_b = qspec_e6f7
        return quantize(x, qspec_f, qspec_b, name=name)
    return x
Exemple #3
0
def model(X, Y, hps):

    # tf Variable of random ints of size (3 * GPU_SMs * 1024)
    # tf doesn't support int32 variables?  Hack with float32 view.
    entropy_init = np.random.randint(-(1<<31), (1<<31), size=80*3*1024, dtype=np.int32).view(np.float32)

    if hps.tag != "none":
        qspec_e4f11 = QuantizeSpec(
            ebits      = 4,
            fbits      = 11,
            stochastic = 2,
            denorm     = True,
            frequency  = 512,
            bias_pad   = 1,
            logfile="qspec_e4f11.%s.b.txt" % hps.tag,
        )
        qspec_e5f10 = QuantizeSpec(
            ebits      = 5,
            fbits      = 10,
            stochastic = 2,
            denorm     = True,
            frequency  = 512,
            bias_pad   = 4,
            logfile="qspec_e5f10.%s.b.txt" % hps.tag,
        )
    else:
        qspec_e4f11 = None
        qspec_e5f10 = None
    xs = tf.split(X, mpi_size, 0)
    ys = tf.split(Y, mpi_size, 0)

    with tf.device("/gpu:0"), tf.variable_scope("model"):

        entropy = tf.get_variable("entropy", initializer=entropy_init, trainable=False)
        set_entropy(entropy)

        h = embed_input(xs[mpi_rank], hps)
        for l in range(hps.n_layer):
            h = transformer_block(h, 'layer_%d' % l, hps.n_head)
        logits = output(h, hps)

        loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=ys[mpi_rank])
        loss = tf.reduce_mean(loss)

        params = tf.trainable_variables()
        grads  = tf.gradients(loss, params)

        # for p in params:
        #     print(p.op.name + "_" + "_".join(str(x) for x in p.shape.as_list()))

        test = tf.reduce_sum(tf.cast(tf.equal(tf.cast(tf.argmax(logits, 1), tf.int32), ys[mpi_rank]), tf.float32))

        grad_scale = 1.0 / mpi_size

        # all reduce grads
        if mpi_size > 1:
            group_allreduce(grads, params, search_strings=["classifier"] + ["layer_%d" % l for l in range(hps.n_layer-1, -1, -1)])

            loss = allreduce(loss) * grad_scale
            test = allreduce(test)

        train = Adam(grads, params, grad_scale=grad_scale, param_qspec=qspec_e4f11, mean_qspec=qspec_e5f10, var_qspec=qspec_e5f10)

    return loss, train, test
Exemple #4
0
from tqdm   import tqdm
from mpi4py import MPI
from tensorflow.examples.tutorials.mnist import input_data

from blocksparse.transformer import transpose_0213, masked_softmax
from blocksparse.norms       import layer_norm
from blocksparse.optimize    import Adam
from blocksparse.embed       import embedding_lookup
from blocksparse.quantize    import QuantizeSpec, quantize, set_entropy
from blocksparse.ewops       import bias_relu
from blocksparse.nccl        import allreduce, group_allreduce, sync_variables_op

qspec_e4f3 = QuantizeSpec(
    ebits      = 4,
    fbits      = 3,
    denorm     = True,
    frequency  = 512,
    bias_pad   = 1,
)
qspec_e5f2 = QuantizeSpec(
    ebits      = 5,
    fbits      = 2,
    stochastic = 2,
    denorm     = True,
    frequency  = 512,
    bias_pad   = 8,
)
qspec_e6f7 = QuantizeSpec(
    ebits      = 6,
    fbits      = 7,
    stochastic = 0,