def load_model(self): data = common.middle_load(os.path.join(self.model_path, "corpus")) self.dict = data["word2idx"] self.turn_size = data["turn_size"] self.max_context_len = data["max_context_len"] tf.reset_default_graph() config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) saver = tf.train.import_meta_graph( os.path.join(self.model_path, "model.meta")) saver.restore(sess, tf.train.latest_checkpoint(self.model_path)) graph = tf.get_default_graph() self.src_max_len = graph.get_tensor_by_name( "init_variables/src_max_len:0") self.batch_size = graph.get_tensor_by_name( "init_variables/batch_size:0") self.tgt_max_len = graph.get_tensor_by_name( "init_variables/tgt_max_len:0") self.src = graph.get_tensor_by_name("init_variables/src_tensor:0") self.postion = graph.get_tensor_by_name("init_variables/src_postion:0") self.turns = graph.get_tensor_by_name("init_variables/turns_tensor:0") self.dropout_rate = graph.get_tensor_by_name( "init_variables/dropout_keep_prob:0") self.enc_output = graph.get_tensor_by_name("enc_output:0") self.tgt = graph.get_tensor_by_name("init_variables/tgt_tensor:0") self.pre_enc_output = graph.get_tensor_by_name( "init_variables/pre_enc_output:0") self.distributes = graph.get_tensor_by_name("pre_distributes:0") self.session = sess
parser.add_argument('--dist_rate', type=float, default=0.4) parser.add_argument('--dist_encode_rate', type=float, default=0.5) parser.add_argument('--not_use_pretrain', action='store_true') parser.add_argument('--use_debug', action='store_true') args = parser.parse_args() random.seed(args.seed) np.random.seed(args.seed) tf.compat.v1.set_random_seed(args.seed) args.turn_size = 6 os.environ["CUDA_VISIBLE_DEVICES"] = args.cuda_device data = middle_load(args.data) args.vocab_size = len(data["word2idx"]) args.max_context_len = data["max_len"] args.use_pretrain = not args.not_use_pretrain training_data = DataLoader(data["train"]["src_texts"], data["train"]["src_turn"], data["train"]["tgt_indexs"], data["train"]["tgt_texts"], data["train"]["eos_indexs"], data["train"]["src_context"], data["train"]["tgt_context"], batch_size=args.batch_size) validation_data = DataLoader(data["valid"]["src_texts"],
self.tgt_texts[_start:_start + _bsz]) tgt_indexs_tensor = index_pairs(self.tgt_indexs[_start:_start + _bsz], tgt_max_len) turns_tensor = self.src_turn[_start:_start + _bsz] eos_indexs = self.eos_indexs[_start:_start + _bsz] src_context = self.src_context[_start:_start + _bsz] return (src_tensor, src_postion, turns_tensor), ( tgt_tensor, tgt_postion ), tgt_indexs_tensor, src_max_len, eos_indexs, tgt_max_len, src_context if __name__ == "__main__": import common corpus = common.middle_load("data/corpus") dl = DataLoader(corpus["train"]["src_texts"], corpus["train"]["src_turn"], corpus["train"]["tgt_indexs"], corpus["train"]["tgt_texts"], corpus["train"]["eos_indexs"], corpus["train"]["src_context"], 4) (src_tensor, src_postion, turns_tensor), ( tgt_tensor, tgt_postion ), tgt_indexs_tensor, src_max_len, eos_indexs, tgt_max_len, src_context = next( dl) print(tgt_max_len) print(tgt_indexs_tensor.tolist()) ei = eos_indexs[0] tgt_ei = tgt_indexs_tensor[0] == 0
tgt_max_len) turns_tensor, _, _ = pad_to_longest(self.src_turn[_start:_start + _bsz]) eos_indexs = self.eos_indexs[_start:_start + _bsz] src_context = self.src_context[_start:_start + _bsz] tgt_context = self.tgt_context[_start:_start + _bsz] return (src_tensor, src_postion, turns_tensor), ( tgt_tensor, tgt_postion ), tgt_indexs_tensor, src_max_len, eos_indexs, tgt_max_len, src_context, tgt_context if __name__ == "__main__": from common import middle_load, middle_save, set_logger data = middle_load("data/corpus") training_data = DataLoader(data["train"]["src_texts"], data["train"]["src_turn"], data["train"]["tgt_indexs"], data["train"]["tgt_texts"], data["train"]["eos_indexs"], data["train"]["src_context"], data["train"]["tgt_context"], batch_size=8) (src_tensor, src_postion, turns_tensor), ( tgt_tensor, tgt_postion ), tgt_indexs_tensor, src_max_len, eos_indexs, tgt_max_len, src_context, tgt_context = next( training_data)
def load_model(self): data = common.middle_load(os.path.join(self.model_path, "corpus")) self.dict = data["word2idx"] self.turn_size = data["turn_size"] self.max_context_len = data["max_context_len"]
tvars, checkpoint_name) tf.train.init_from_checkpoint(checkpoint_name, assignment_map) tmp_g = tf.get_default_graph().as_graph_def() config = tf.ConfigProto(allow_soft_placement=True) with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) tmp_g = tf.graph_util.convert_variables_to_constants( sess, tmp_g, output_tensors) tmp_g = optimize_for_inference( tmp_g, input_tensors, output_tensors, input_datatype, False) with tf.gfile.GFile(f"{graph_output_dir}/graph", 'wb') as f: f.write(tmp_g.SerializeToString()) if __name__ == "__main__": from common import middle_load from model import Transformer class CorpusArgs(object): def __init__(self, _args): for k, v in _args.items(): self.__setattr__(k, v) data = middle_load("weights/corpus") cargs = CorpusArgs(data["args"]) graph_optimize(cargs, Transformer, "weights", "weights/model", ["enc_output", "pre_distributes"], [tf.int32.as_datatype_enum, tf.int32.as_datatype_enum, tf.int32.as_datatype_enum, tf.int64.as_datatype_enum, tf.int64.as_datatype_enum, tf.int64.as_datatype_enum, tf.float32.as_datatype_enum, tf.int64.as_datatype_enum, tf.int64.as_datatype_enum, tf.float32.as_datatype_enum], [ "init_variables/src_max_len", "init_variables/batch_size", "init_variables/tgt_max_len", "init_variables/src_tensor", "init_variables/src_postion", "init_variables/turns_tensor", "init_variables/dropout_keep_prob", "init_variables/tgt_tensor", "init_variables/tgt_postion", "init_variables/pre_enc_output"])