Example #1
0
def mgpu_predict(*xs):
    gpu_ops = []
    xs = (tf.split(x, n_gpu, 0) for x in xs)
    for i, xs in enumerate(zip(*xs)):
        with tf.device(assign_to_gpu(i, "/gpu:0")), tf.variable_scope(tf.get_variable_scope(), reuse=True):
            lm_logits, lm_losses = model(*xs, 
                units=units, n_vocab=n_vocab, n_special=n_special, n_embd=n_embd, 
                embd_pdrop=embd_pdrop, train=False, reuse=True)
            gpu_ops.append([lm_logits, lm_losses])
    ops = [tf.concat(op, 0) for op in zip(*gpu_ops)]
    return ops
Example #2
0
def mgpu_train(*xs):
    gpu_ops = []
    gpu_grads = []
    xs = (tf.split(x, n_gpu, 0) for x in xs)
    for i, xs in enumerate(zip(*xs)):
        do_reuse = True if i > 0 else None
        with tf.device(assign_to_gpu(i, "/gpu:0")), tf.variable_scope(
                tf.get_variable_scope(), reuse=do_reuse):
            clf_logits, clf_losses, lm_losses = model(*xs,
                                                      clf_token=clf_token,
                                                      n_vocab=n_vocab,
                                                      n_class=n_class,
                                                      n_special=n_special,
                                                      n_ctx=n_ctx,
                                                      n_embd=n_embd,
                                                      embd_pdrop=embd_pdrop,
                                                      n_layer=n_layer,
                                                      n_head=n_head,
                                                      attn_pdrop=attn_pdrop,
                                                      resid_pdrop=resid_pdrop,
                                                      clf_pdrop=clf_pdrop,
                                                      train=True,
                                                      reuse=do_reuse)
            if lm_coef > 0:
                train_loss = tf.reduce_mean(
                    clf_losses) + lm_coef * tf.reduce_mean(lm_losses)
            else:
                train_loss = tf.reduce_mean(clf_losses)
            params = find_trainable_variables("model")
            grads = tf.gradients(train_loss, params)
            grads = list(zip(grads, params))
            gpu_grads.append(grads)
            gpu_ops.append([clf_logits, clf_losses, lm_losses])
    ops = [tf.concat(op, 0) for op in zip(*gpu_ops)]
    grads = average_grads(gpu_grads)
    grads = [g for g, p in grads]
    train = adam(params,
                 grads,
                 lr,
                 lr_schedule,
                 n_updates_total,
                 warmup=lr_warmup,
                 l2=l2,
                 max_grad_norm=max_grad_norm,
                 vector_l2=vector_l2,
                 b1=b1,
                 b2=b2,
                 e=e)
    return [train] + ops
Example #3
0
def mgpu_train(*xs):
    gpu_ops = []
    gpu_grads = []
    xs = (tf.split(x, n_gpu, 0) for x in xs)
    for i, xs in enumerate(zip(*xs)):
        do_reuse = True if i > 0 else None
        with tf.device(assign_to_gpu(i, "/gpu:0")), tf.variable_scope(
                tf.get_variable_scope(), reuse=do_reuse):
            s_preds, e_preds, qa_losses = model(*xs,
                                                n_word=n_word,
                                                n_char=n_char,
                                                n_pred=n_pred,
                                                n_wembd=n_wembd,
                                                n_cembd=n_cembd,
                                                units=units,
                                                embd_pdrop=embd_pdrop,
                                                n_head=n_head,
                                                attn_pdrop=attn_pdrop,
                                                resid_pdrop=resid_pdrop,
                                                train=True,
                                                reuse=do_reuse)
            train_loss = tf.reduce_mean(qa_losses)
            params = find_trainable_variables("model")
            grads = tf.gradients(train_loss, params)
            grads = list(zip(grads, params))
            gpu_grads.append(grads)
            gpu_ops.append([s_preds, e_preds, qa_losses])
    ops = [tf.concat(op, 0) for op in zip(*gpu_ops)]
    grads = average_grads(gpu_grads)
    grads = [g for g, p in grads]
    train = adam(params,
                 grads,
                 lr,
                 lr_schedule,
                 n_updates_total,
                 warmup=lr_warmup,
                 l2=l2,
                 max_grad_norm=max_grad_norm,
                 vector_l2=vector_l2,
                 b1=b1,
                 b2=b2,
                 e=e)
    return [train] + ops
Example #4
0
def mgpu_predict(*xs):
    gpu_ops = []
    xs = (tf.split(x, n_gpu, 0) for x in xs)
    for i, xs in enumerate(zip(*xs)):
        with tf.device(assign_to_gpu(i, "/gpu:0")), tf.variable_scope(
                tf.get_variable_scope(), reuse=True):
            s_preds, e_preds, qa_losses = model(*xs,
                                                n_word=n_word,
                                                n_char=n_char,
                                                n_pred=n_pred,
                                                n_wembd=n_wembd,
                                                n_cembd=n_cembd,
                                                units=units,
                                                embd_pdrop=embd_pdrop,
                                                n_head=n_head,
                                                attn_pdrop=attn_pdrop,
                                                resid_pdrop=resid_pdrop,
                                                train=False,
                                                reuse=True)
            gpu_ops.append([s_preds, e_preds, qa_losses])
    ops = [tf.concat(op, 0) for op in zip(*gpu_ops)]
    return ops
Example #5
0
def mgpu_predict(*xs):
    gpu_ops = []
    xs = (tf.split(x, n_gpu, 0) for x in xs)
    for i, xs in enumerate(zip(*xs)):
        with tf.device(assign_to_gpu(i, "/gpu:0")), tf.variable_scope(
                tf.get_variable_scope(), reuse=True):
            clf_logits, clf_losses, lm_losses = model(*xs,
                                                      clf_token=clf_token,
                                                      n_vocab=n_vocab,
                                                      n_class=n_class,
                                                      n_special=n_special,
                                                      n_ctx=n_ctx,
                                                      n_embd=n_embd,
                                                      embd_pdrop=embd_pdrop,
                                                      n_layer=n_layer,
                                                      n_head=n_head,
                                                      attn_pdrop=attn_pdrop,
                                                      resid_pdrop=resid_pdrop,
                                                      clf_pdrop=clf_pdrop,
                                                      train=False,
                                                      reuse=True)
            gpu_ops.append([clf_logits, clf_losses, lm_losses])
    ops = [tf.concat(op, 0) for op in zip(*gpu_ops)]
    return ops