def sself_att(x, num_heads, num_per_heads, ac=None, dropout_rate=0.0, mode='train'): ''' x-shape: batch_size, seq_length, hidden_size/emb_size; ''' if (mode != 'train'): dropout_rate = 0.0 g.infor('D-SELFATT DROPOUT:%.2f' % dropout_rate) xshape = tf.shape(x) x = tf.reshape(x, [-1, xshape[-1]]) que = dense(num_heads * num_per_heads, ac=ac)(x) key = dense(num_heads * num_per_heads, ac=ac)(x) val = dense(num_heads * num_per_heads, ac=ac)(x) que = tf.reshape(que, [xshape[0], num_heads, xshape[1], num_per_heads]) key = tf.reshape(key, [xshape[0], num_heads, xshape[1], num_per_heads]) score = tf.multiply(tf.matmul(que, key, transpose_b=True), 1.0 / math.sqrt(float(num_per_heads))) score = softmax(score) val = tf.reshape(val, [xshape[0], xshape[1], num_heads, num_per_heads]) val = tf.transpose(val, [0, 2, 1, 3]) context = tf.transpose(tf.matmul(score, val), [0, 2, 1, 3]) context = tf.reshape(context, [xshape[0], xshape[1], -1]) if (dropout_rate > 0.0): context = dropout(dropout_rate)(context) return context
def flist(): ''' List all functions and annotations within this module. ''' global __anno__, __all__ g.infor(__anno__) for k, v in __all__: g.normal(k) g.normal('ANO => ' + v + '\n')
def check_value(fname): value = 0.0 try: with co.open(fname, 'r', 'utf-8') as rf: value = float(rf.read()) except Exception: g.error('NO CONF VALUE FOUND, %s' % fname) finally: value == value if (None != value) else 0.0 g.infor('DATA AUGMENT : %f .' % value) return value
def gru(num_units, ac=tanh, dropout=0.0, mode='train', res=False): if (mode != 'train'): dropout = 0.0 gru = None gru = tf.contrib.rnn.GRUCell(num_units, activation=ac) if (dropout > 0.0 and dropout < 1.0): gru = tf.contrib.rnn.DropoutWrapper(cell=gru, input_keep_prob=(1.0 - dropout)) if (res): gru = tf.contrib.rnn.ResidualWrapper(gru) g.infor('| CREATE GRU UNITS:%d DROPOUT:%.2f RESIDUAL:%r |' % (num_units, dropout, res)) g.infor('GRU DROPOUT:%.2f' % dropout) return gru
def load_model(model, model_dir, session): ''' model => model class mode_dir => model dir string path session => tensorflow.Session ''' latest_ckpt = tf.train.latest_checkpoint(model_dir) gi = tf.global_variables_initializer() if latest_ckpt: model.saver.restore(session, latest_ckpt) else: session.run(gi) g.infor('CREATE NEW MODEL...') global_step = model.global_step.eval(session=session) return model, global_step
def dselfatt(x, att_size, dropout_rate=0.0, ac=None, res=True, mode='train'): if (mode != 'train'): dropout_rate = 0.0 g.infor('S-SELFATT DROPOUT:%.2f' % dropout_rate) xshape = tf.shape(x) q = dense(att_size, ac=ac)(x) k = dense(att_size, ac=ac)(x) v = dense(att_size, ac=ac)(x) s = tf.matmul(q, k, transpose_b=True) s = tf.multiply(s, 1.0 / tf.sqrt(tf.cast(att_size, tf.float32))) s = tf.nn.softmax(s, -1) result = dropout(rate=dropout_rate)(tf.matmul(s, v)) if (res): return result + x else: return result
def infer(): tf.reset_default_graph() with tf.Graph().as_default() as global_graph: model = Transfer(param=PARAM, mode='infer') sess_conf = tf.ConfigProto(intra_op_parallelism_threads=8, inter_op_parallelism_threads=8) sess_conf.gpu_options.allow_growth = True with tf.Session(graph=global_graph, config=sess_conf) as sess: model, global_step = tc.load_model(model, MODEL_DIR, sess) vocab = g.read_vocab(VOCAB) rvocab = g.reverse_vocab(vocab) g.infor(' |begin global step : %d| ' % global_step) g.infor(TAB + '|stage-%d|' % PARAM['stage'] + TAB) source_label = g.read_contents(TEST, split='=') source, label = g.unzip_tuple(source_label) index = 0 # save graph pb file # tf.train.write_graph(sess.graph_def, MODEL_DIR, 'transfer-infer.pbtxt') texts = '' CACHE = 512 infer_len = len(source_label) for i in range(TRAIN_STEP): sid, sil, teach, dod, dol, need_shuffle, index = g.get_seq2seq_batch( source=source, label=label, batch_size=1, index=index, vocab=vocab) if (need_shuffle): break sample_id = model.infer(sess, [sid, sil]) # bs = 1 sample_id = sample_id[0][0] anwser = g.convert_ids_to_string(sample_id, rvocab) texts += anwser # g.normal('\n') # g.normal(g.convert_ids_to_string(sid[0], rvocab)) # g.normal(anwser) # g.normal(g.convert_ids_to_string(dod[0], rvocab)) # g.normal('\n') if (i > 0 and i % CACHE == 0): with co.open('infer-t.txt', 'a', 'utf-8') as wf: wf.write(texts) texts = '' g.infor(TAB + 'progress:%.4f' % (float(i + 1) / infer_len * 100.) + TAB) if (texts != ''): with co.open('infer-t.txt', 'a', 'utf-8') as wf: wf.write(texts) texts = '' g.infor(TAB + 'task infer finished' + TAB)
def transformer_block(x, layers, num_heads, num_per_heads, ac=gelu, dropout_rate=0.0, mode='train'): ''' Transformer Block, x must with a explicit seq_length and hidden_size/emb_size; x.shape = [batch_size, static_seq_length, hidden_size/emb_size]; ''' if (mode != 'train'): dropout_rate = 0.0 g.infor('TRANSFORMER BLOCK DROPOUT:%.2f' % dropout_rate) all_layers_outputs = [] xshape = x.shape layer_input = x for layer_idx in range(layers): with tf.variable_scope('layer_%d' % layer_idx): with tf.variable_scope("attention"): with tf.variable_scope("self"): cxt = sself_att(layer_input, num_heads, num_per_heads, None, dropout_rate, mode) with tf.variable_scope("output"): # linear projection cxt = dense(xshape[-1].value, ac, use_bias=False)(cxt) cxt = dropout(dropout_rate)(cxt) cxt = norm(cxt + layer_input) with tf.variable_scope('intermediate'): itm = dense(xshape[-1].value * 4, ac)(cxt) with tf.variable_scope("output"): fout = dense(xshape[-1].value, None)(itm) fout = dropout(dropout_rate)(fout) fout = norm(fout + cxt) layer_input = fout all_layers_outputs.append(fout) return all_layers_outputs
def train(): tf.reset_default_graph() with tf.Graph().as_default() as global_graph: model = Transfer(param=PARAM, mode='train') sess_conf = tf.ConfigProto(intra_op_parallelism_threads=8, inter_op_parallelism_threads=8) sess_conf.gpu_options.allow_growth = True with tf.Session(graph=global_graph, config=sess_conf) as sess: model, global_step = tc.load_model(model, MODEL_DIR, sess) sess.graph.finalize() vocab = g.read_vocab(VOCAB) rvocab = g.reverse_vocab(vocab) g.infor(TAB + '|begin global step : %d|' % global_step + TAB) g.infor(TAB + '|stage-%d|' % PARAM['stage'] + TAB) source_label = g.read_contents( STAGE1_DATA if PARAM['stage'] == 1 else STAGE2_DATA, split='=') source_label = g.shuffle(source_label) source, label = g.unzip_tuple(source_label) index = 0 augment = check_value('./augment') # reinit the projection layer before train stage 2. # sess.run(model.reinit) for i in range(TRAIN_STEP): sid, sil, teach, dod, dol, need_shuffle, index = g.get_seq2seq_batch(source=source, label=label,\ batch_size=BATCH_SIZE, index=index, vocab=vocab, augment=augment) _, loss, global_step, lr, tn = model.train( sess, [sid, sil, teach, dod, dol]) g.normal( 'step:%d loss:%.4f lr:%.6f TN:%.6f ' % (global_step, loss, lr, tn), 'fg_green') if (need_shuffle): source, label = g.unzip_tuple(g.shuffle(source_label)) g.infor('\n' + TAB + '|shuffle data|' + TAB + '\n') # save model if (need_shuffle): g.record('loss', loss, limit=-1, force_write=True) model.saver.save(sess, MODEL_DIR + '/transfer.ckpt', global_step=global_step) g.infor( ' ' ) g.infor( ' save model when step to :%d ' % global_step) g.infor( ' ' ) augment = check_value('./augment') else: g.record('loss', loss, limit=-1)
anwser = g.convert_ids_to_string(sample_id, rvocab) texts += anwser # g.normal('\n') # g.normal(g.convert_ids_to_string(sid[0], rvocab)) # g.normal(anwser) # g.normal(g.convert_ids_to_string(dod[0], rvocab)) # g.normal('\n') if (i > 0 and i % CACHE == 0): with co.open('infer-t.txt', 'a', 'utf-8') as wf: wf.write(texts) texts = '' g.infor(TAB + 'progress:%.4f' % (float(i + 1) / infer_len * 100.) + TAB) if (texts != ''): with co.open('infer-t.txt', 'a', 'utf-8') as wf: wf.write(texts) texts = '' g.infor(TAB + 'task infer finished' + TAB) if __name__ == '__main__': argv = sys.argv if (len(argv) == 2): mode = argv[1] g.infor('mode : %s' % mode) if (mode == 'infer'): g.infor('infer...') infer() else: g.infor('train...') train()