def network(): # define encoder input data enc_token_ids = fluid.layers.data(name="enc_token_ids", shape=[None, SEQ_MAX_LEN], dtype='int64') enc_segment_ids = fluid.layers.data(name="enc_segment_ids", shape=[None, SEQ_MAX_LEN], dtype='int64') enc_pos_ids = fluid.layers.data(name="enc_pos_ids", shape=[None, SEQ_MAX_LEN], dtype='int64') enc_input_length = fluid.layers.data(name='enc_input_length', shape=[None, SEQ_MAX_LEN, 1], dtype='int64') # define decoder input data dec_token_ids = fluid.layers.data(name="token_ids", shape=[None, SEQ_MAX_LEN], dtype='int64') dec_segment_ids = fluid.layers.data(name="segment_ids", shape=[None, SEQ_MAX_LEN], dtype='int64') dec_pos_ids = fluid.layers.data(name="pos_ids", shape=[None, SEQ_MAX_LEN], dtype='int64') dec_enc_slf_attn = fluid.layers.data(name='enc_slf_attn', shape=[None, SEQ_MAX_LEN, SEQ_MAX_LEN], dtype='int64') # task label dec_lm_label_mat = fluid.layers.data(name='lm_label_mat', shape=[None, SEQ_MAX_LEN], dtype='int64') dec_lm_pos_mask = fluid.layers.data(name='lm_pos_mask', shape=[None, SEQ_MAX_LEN, SEQ_MAX_LEN], dtype='int64') dec_lm_pos_len = fluid.layers.data(name='lm_pos_len', shape=[None, 1], dtype='int64') goal_type_pos = fluid.layers.data(name="goal_type_pos", shape=[None, 2], dtype='int64') goal_type_label = fluid.layers.data(name="goal_type_label", shape=[None], dtype='int64') # enc_dec_mask enc_dec_mask = fluid.layers.data(name='enc_dec_mask', shape=[None, SEQ_MAX_LEN, SEQ_MAX_LEN], dtype='int64') # network encode = Encoder(enc_token_ids, enc_pos_ids, enc_segment_ids, enc_input_length, config) enc_output = encode.get_sequence_output() decode = Decoder(dec_token_ids, dec_pos_ids, dec_segment_ids, dec_enc_slf_attn, config=config, enc_input=enc_output, enc_input_mask=enc_dec_mask) loss, goal_type_acc = decode.pretrain(goal_type_pos, goal_type_label, dec_lm_label_mat, dec_lm_pos_mask, dec_lm_pos_len) input_name_list = [ enc_token_ids.name, enc_segment_ids.name, enc_pos_ids.name, enc_input_length.name, dec_token_ids.name, dec_segment_ids.name, dec_pos_ids.name, dec_enc_slf_attn.name, enc_dec_mask.name, dec_lm_label_mat.name, dec_lm_pos_mask.name, dec_lm_pos_len.name, goal_type_pos.name, goal_type_label.name ] return loss.name, goal_type_acc.name, input_name_list
goal_type_label = fluid.layers.data(name="goal_type_label", shape=[None], dtype='int64') # enc_dec_mask enc_dec_mask = fluid.layers.data(name='enc_dec_mask', shape=[None, SEQ_MAX_LEN, SEQ_MAX_LEN], dtype='int64') # network encode = Encoder(enc_token_ids, enc_pos_ids, enc_segment_ids, enc_input_length, config) enc_output = encode.get_sequence_output() decode = Decoder(dec_token_ids, dec_pos_ids, dec_segment_ids, dec_enc_slf_attn, config=config, enc_input=enc_output, enc_input_mask=enc_dec_mask) dec_output = decode.get_sequence_output() loss, goal_type_acc = decode.pretrain(goal_type_pos, goal_type_label, dec_lm_label_mat, dec_lm_pos_mask, dec_lm_pos_len) # loss adam = fluid.optimizer.AdamOptimizer() adam.minimize(loss) # define executor place = fluid.CUDAPlace(0) if USE_CUDA else fluid.CPUPlace() exe = fluid.Executor(place) # start up parameter exe.run(startup_prog) params_list = train_prog.block(0).all_parameters() params_name_list = [p.name for p in params_list] write_iterable("enc_dec_params.param", params_name_list)