예제 #1
0
def sself_att(x,
              num_heads,
              num_per_heads,
              ac=None,
              dropout_rate=0.0,
              mode='train'):
    '''
        x-shape: batch_size, seq_length, hidden_size/emb_size;
    '''
    if (mode != 'train'):
        dropout_rate = 0.0
    g.infor('D-SELFATT DROPOUT:%.2f' % dropout_rate)
    xshape = tf.shape(x)
    x = tf.reshape(x, [-1, xshape[-1]])
    que = dense(num_heads * num_per_heads, ac=ac)(x)
    key = dense(num_heads * num_per_heads, ac=ac)(x)
    val = dense(num_heads * num_per_heads, ac=ac)(x)
    que = tf.reshape(que, [xshape[0], num_heads, xshape[1], num_per_heads])
    key = tf.reshape(key, [xshape[0], num_heads, xshape[1], num_per_heads])
    score = tf.multiply(tf.matmul(que, key, transpose_b=True),
                        1.0 / math.sqrt(float(num_per_heads)))
    score = softmax(score)
    val = tf.reshape(val, [xshape[0], xshape[1], num_heads, num_per_heads])
    val = tf.transpose(val, [0, 2, 1, 3])
    context = tf.transpose(tf.matmul(score, val), [0, 2, 1, 3])
    context = tf.reshape(context, [xshape[0], xshape[1], -1])
    if (dropout_rate > 0.0):
        context = dropout(dropout_rate)(context)
    return context
예제 #2
0
def flist():
    '''
    List all functions and annotations within this module.
    '''
    global __anno__, __all__
    g.infor(__anno__)
    for k, v in __all__:
        g.normal(k)
        g.normal('ANO => ' + v + '\n')
예제 #3
0
파일: traint.py 프로젝트: Raven-D/gdu
def check_value(fname):
    value = 0.0
    try:
        with co.open(fname, 'r', 'utf-8') as rf:
            value = float(rf.read())
    except Exception:
        g.error('NO CONF VALUE FOUND, %s' % fname)
    finally:
        value == value if (None != value) else 0.0
        g.infor('DATA AUGMENT : %f .' % value)
        return value
예제 #4
0
def gru(num_units, ac=tanh, dropout=0.0, mode='train', res=False):
    if (mode != 'train'):
        dropout = 0.0
    gru = None
    gru = tf.contrib.rnn.GRUCell(num_units, activation=ac)
    if (dropout > 0.0 and dropout < 1.0):
        gru = tf.contrib.rnn.DropoutWrapper(cell=gru,
                                            input_keep_prob=(1.0 - dropout))
    if (res):
        gru = tf.contrib.rnn.ResidualWrapper(gru)
    g.infor('|  CREATE GRU UNITS:%d DROPOUT:%.2f RESIDUAL:%r  |' %
            (num_units, dropout, res))
    g.infor('GRU DROPOUT:%.2f' % dropout)
    return gru
예제 #5
0
def load_model(model, model_dir, session):
    '''
    model => model class
    mode_dir => model dir string path
    session => tensorflow.Session
    '''
    latest_ckpt = tf.train.latest_checkpoint(model_dir)
    gi = tf.global_variables_initializer()
    if latest_ckpt:
        model.saver.restore(session, latest_ckpt)
    else:
        session.run(gi)
        g.infor('CREATE NEW MODEL...')

    global_step = model.global_step.eval(session=session)
    return model, global_step
예제 #6
0
def dselfatt(x, att_size, dropout_rate=0.0, ac=None, res=True, mode='train'):
    if (mode != 'train'):
        dropout_rate = 0.0
    g.infor('S-SELFATT DROPOUT:%.2f' % dropout_rate)
    xshape = tf.shape(x)
    q = dense(att_size, ac=ac)(x)
    k = dense(att_size, ac=ac)(x)
    v = dense(att_size, ac=ac)(x)
    s = tf.matmul(q, k, transpose_b=True)
    s = tf.multiply(s, 1.0 / tf.sqrt(tf.cast(att_size, tf.float32)))
    s = tf.nn.softmax(s, -1)
    result = dropout(rate=dropout_rate)(tf.matmul(s, v))
    if (res):
        return result + x
    else:
        return result
예제 #7
0
파일: traint.py 프로젝트: Raven-D/gdu
def infer():
    tf.reset_default_graph()
    with tf.Graph().as_default() as global_graph:
        model = Transfer(param=PARAM, mode='infer')
        sess_conf = tf.ConfigProto(intra_op_parallelism_threads=8,
                                   inter_op_parallelism_threads=8)
        sess_conf.gpu_options.allow_growth = True
        with tf.Session(graph=global_graph, config=sess_conf) as sess:
            model, global_step = tc.load_model(model, MODEL_DIR, sess)
            vocab = g.read_vocab(VOCAB)
            rvocab = g.reverse_vocab(vocab)
            g.infor('          |begin global step : %d|          ' %
                    global_step)
            g.infor(TAB + '|stage-%d|' % PARAM['stage'] + TAB)
            source_label = g.read_contents(TEST, split='=')
            source, label = g.unzip_tuple(source_label)
            index = 0
            # save graph pb file
            # tf.train.write_graph(sess.graph_def, MODEL_DIR, 'transfer-infer.pbtxt')
            texts = ''
            CACHE = 512
            infer_len = len(source_label)
            for i in range(TRAIN_STEP):
                sid, sil, teach, dod, dol, need_shuffle, index = g.get_seq2seq_batch(
                    source=source,
                    label=label,
                    batch_size=1,
                    index=index,
                    vocab=vocab)
                if (need_shuffle):
                    break
                sample_id = model.infer(sess, [sid, sil])
                # bs = 1
                sample_id = sample_id[0][0]
                anwser = g.convert_ids_to_string(sample_id, rvocab)
                texts += anwser
                # g.normal('\n')
                # g.normal(g.convert_ids_to_string(sid[0], rvocab))
                # g.normal(anwser)
                # g.normal(g.convert_ids_to_string(dod[0], rvocab))
                # g.normal('\n')
                if (i > 0 and i % CACHE == 0):
                    with co.open('infer-t.txt', 'a', 'utf-8') as wf:
                        wf.write(texts)
                        texts = ''
                    g.infor(TAB + 'progress:%.4f' %
                            (float(i + 1) / infer_len * 100.) + TAB)
            if (texts != ''):
                with co.open('infer-t.txt', 'a', 'utf-8') as wf:
                    wf.write(texts)
                    texts = ''
            g.infor(TAB + 'task infer finished' + TAB)
예제 #8
0
def transformer_block(x,
                      layers,
                      num_heads,
                      num_per_heads,
                      ac=gelu,
                      dropout_rate=0.0,
                      mode='train'):
    '''
        Transformer Block, x must with a explicit seq_length and hidden_size/emb_size;
        x.shape = [batch_size, static_seq_length, hidden_size/emb_size];
    '''
    if (mode != 'train'):
        dropout_rate = 0.0
    g.infor('TRANSFORMER BLOCK DROPOUT:%.2f' % dropout_rate)
    all_layers_outputs = []
    xshape = x.shape
    layer_input = x
    for layer_idx in range(layers):
        with tf.variable_scope('layer_%d' % layer_idx):
            with tf.variable_scope("attention"):
                with tf.variable_scope("self"):
                    cxt = sself_att(layer_input, num_heads, num_per_heads,
                                    None, dropout_rate, mode)
                with tf.variable_scope("output"):
                    # linear projection
                    cxt = dense(xshape[-1].value, ac, use_bias=False)(cxt)
                    cxt = dropout(dropout_rate)(cxt)
                    cxt = norm(cxt + layer_input)
            with tf.variable_scope('intermediate'):
                itm = dense(xshape[-1].value * 4, ac)(cxt)
            with tf.variable_scope("output"):
                fout = dense(xshape[-1].value, None)(itm)
                fout = dropout(dropout_rate)(fout)
                fout = norm(fout + cxt)
                layer_input = fout
                all_layers_outputs.append(fout)
    return all_layers_outputs
예제 #9
0
파일: traint.py 프로젝트: Raven-D/gdu
def train():
    tf.reset_default_graph()
    with tf.Graph().as_default() as global_graph:
        model = Transfer(param=PARAM, mode='train')
        sess_conf = tf.ConfigProto(intra_op_parallelism_threads=8,
                                   inter_op_parallelism_threads=8)
        sess_conf.gpu_options.allow_growth = True
        with tf.Session(graph=global_graph, config=sess_conf) as sess:
            model, global_step = tc.load_model(model, MODEL_DIR, sess)
            sess.graph.finalize()
            vocab = g.read_vocab(VOCAB)
            rvocab = g.reverse_vocab(vocab)
            g.infor(TAB + '|begin global step : %d|' % global_step + TAB)
            g.infor(TAB + '|stage-%d|' % PARAM['stage'] + TAB)
            source_label = g.read_contents(
                STAGE1_DATA if PARAM['stage'] == 1 else STAGE2_DATA, split='=')
            source_label = g.shuffle(source_label)
            source, label = g.unzip_tuple(source_label)
            index = 0
            augment = check_value('./augment')
            # reinit the projection layer before train stage 2.
            # sess.run(model.reinit)
            for i in range(TRAIN_STEP):
                sid, sil, teach, dod, dol, need_shuffle, index = g.get_seq2seq_batch(source=source, label=label,\
                                                  batch_size=BATCH_SIZE, index=index, vocab=vocab, augment=augment)

                _, loss, global_step, lr, tn = model.train(
                    sess, [sid, sil, teach, dod, dol])
                g.normal(
                    'step:%d loss:%.4f lr:%.6f TN:%.6f    ' %
                    (global_step, loss, lr, tn), 'fg_green')
                if (need_shuffle):
                    source, label = g.unzip_tuple(g.shuffle(source_label))
                    g.infor('\n' + TAB + '|shuffle data|' + TAB + '\n')
                # save model
                if (need_shuffle):
                    g.record('loss', loss, limit=-1, force_write=True)
                    model.saver.save(sess,
                                     MODEL_DIR + '/transfer.ckpt',
                                     global_step=global_step)
                    g.infor(
                        '                                                                         '
                    )
                    g.infor(
                        '                      save model when step to :%d                      '
                        % global_step)
                    g.infor(
                        '                                                                         '
                    )
                    augment = check_value('./augment')
                else:
                    g.record('loss', loss, limit=-1)
예제 #10
0
파일: traint.py 프로젝트: Raven-D/gdu
                anwser = g.convert_ids_to_string(sample_id, rvocab)
                texts += anwser
                # g.normal('\n')
                # g.normal(g.convert_ids_to_string(sid[0], rvocab))
                # g.normal(anwser)
                # g.normal(g.convert_ids_to_string(dod[0], rvocab))
                # g.normal('\n')
                if (i > 0 and i % CACHE == 0):
                    with co.open('infer-t.txt', 'a', 'utf-8') as wf:
                        wf.write(texts)
                        texts = ''
                    g.infor(TAB + 'progress:%.4f' %
                            (float(i + 1) / infer_len * 100.) + TAB)
            if (texts != ''):
                with co.open('infer-t.txt', 'a', 'utf-8') as wf:
                    wf.write(texts)
                    texts = ''
            g.infor(TAB + 'task infer finished' + TAB)


if __name__ == '__main__':
    argv = sys.argv
    if (len(argv) == 2):
        mode = argv[1]
        g.infor('mode : %s' % mode)
        if (mode == 'infer'):
            g.infor('infer...')
            infer()
        else:
            g.infor('train...')
            train()