def _evaluate(eval_fn, input_fn, decode_fn, path, config, device_list): graph = tf.Graph() with graph.as_default(): features = input_fn() refs = features["references"] placeholders = [] for i in range(len(device_list)): placeholders.append({ "source": tf.placeholder(tf.int32, [None, None], "source_%d" % i), "source_length": tf.placeholder(tf.int32, [None], "source_length_%d" % i) }) for j in range(100): if features.has_key("mt_%d" % j): placeholders[-1]["mt_%d" % j] = tf.placeholder( tf.int32, [None, None], "mt_%d_%d" % (j, i)) placeholders[-1]["mt_length_%d" % j] = tf.placeholder( tf.int32, [None], "mt_length_%d_%d" % (j, i)) predictions = parallel.data_parallelism(device_list, eval_fn, placeholders) predictions = [pred[0][:, 0, :] for pred in predictions] all_refs = [[] for _ in range(len(refs))] all_outputs = [] sess_creator = tf.train.ChiefSessionCreator(checkpoint_dir=path, config=config) with tf.train.MonitoredSession(session_creator=sess_creator) as sess: while not sess.should_stop(): feats = sess.run(features) inp_feats = {key: feats[key] for key in placeholders[0].keys()} op, feed_dict = _shard_features(inp_feats, placeholders, predictions) # A list of numpy array with shape: [batch, len] outputs = sess.run(op, feed_dict=feed_dict) for shard in outputs: all_outputs.extend(shard.tolist()) # shape: ([batch, len], ..., [batch, len]) references = [item.tolist() for item in feats["references"]] for i in range(len(refs)): all_refs[i].extend(references[i]) decoded_symbols = decode_fn(all_outputs) for i, l in enumerate(decoded_symbols): decoded_symbols[i] = " ".join(l).replace("@@ ", "").split() decoded_refs = [decode_fn(refs) for refs in all_refs] decoded_refs = [list(x) for x in zip(*decoded_refs)] return bleu.bleu(decoded_symbols, decoded_refs)
def main(args): tf.logging.set_verbosity(tf.logging.INFO) # Load configs model_cls_list = [models.get_model(model) for model in args.models] params_list = [default_parameters() for _ in range(len(model_cls_list))] params_list = [ merge_parameters(params, model_cls.get_parameters()) for params, model_cls in zip(params_list, model_cls_list) ] params_list = [ import_params(args.checkpoints[i], args.models[i], params_list[i]) for i in range(len(args.checkpoints)) ] params_list = [ override_parameters(params_list[i], args) for i in range(len(model_cls_list)) ] # Build Graph with tf.Graph().as_default(): model_var_lists = [] # Load checkpoints for i, checkpoint in enumerate(args.checkpoints): tf.logging.info("Loading %s" % checkpoint) var_list = tf.train.list_variables(checkpoint) values = {} reader = tf.train.load_checkpoint(checkpoint) for (name, shape) in var_list: if not name.startswith(model_cls_list[i].get_name()): continue if name.find("losses_avg") >= 0: continue tensor = reader.get_tensor(name) values[name] = tensor model_var_lists.append(values) # Build models model_list = [] for i in range(len(args.checkpoints)): name = model_cls_list[i].get_name() model = model_cls_list[i](params_list[i], name + "_%d" % i) model_list.append(model) params = params_list[0] # Read input file sorted_keys, sorted_inputs = dataset.sort_input_file(args.input) # Build input queue features = dataset.get_inference_input(sorted_inputs, params) # Create placeholders placeholders = [] for i in range(len(params.device_list)): placeholders.append({ "source": tf.placeholder(tf.int32, [None, None], "source_%d" % i), "source_length": tf.placeholder(tf.int32, [None], "source_length_%d" % i) }) # A list of outputs if params.generate_samples: inference_fn = sampling.create_sampling_graph else: inference_fn = inference.create_inference_graph predictions = parallel.data_parallelism( params.device_list, lambda f: inference_fn(model_list, f, params), placeholders) # Create assign ops assign_ops = [] feed_dict = {} all_var_list = tf.trainable_variables() for i in range(len(args.checkpoints)): un_init_var_list = [] name = model_cls_list[i].get_name() for v in all_var_list: if v.name.startswith(name + "_%d" % i): un_init_var_list.append(v) ops = set_variables(un_init_var_list, model_var_lists[i], name + "_%d" % i, feed_dict) assign_ops.extend(ops) assign_op = tf.group(*assign_ops) init_op = tf.tables_initializer() results = [] tf.get_default_graph().finalize() # Create session with tf.Session(config=session_config(params)) as sess: # Restore variables sess.run(assign_op, feed_dict=feed_dict) sess.run(init_op) while True: try: feats = sess.run(features) op, feed_dict = shard_features(feats, placeholders, predictions) results.append(sess.run(op, feed_dict=feed_dict)) message = "Finished batch %d" % len(results) tf.logging.log(tf.logging.INFO, message) except tf.errors.OutOfRangeError: break # Convert to plain text vocab = params.vocabulary["target"] outputs = [] scores = [] for result in results: for item in result[0]: outputs.append(item.tolist()) for item in result[1]: scores.append(item.tolist()) outputs = list(itertools.chain(*outputs)) scores = list(itertools.chain(*scores)) restored_inputs = [] restored_outputs = [] restored_scores = [] for index in range(len(sorted_inputs)): restored_inputs.append(sorted_inputs[sorted_keys[index]]) restored_outputs.append(outputs[sorted_keys[index]]) restored_scores.append(scores[sorted_keys[index]]) # Write to file with open(args.output, "w") as outfile: count = 0 for outputs, scores in zip(restored_outputs, restored_scores): for output, score in zip(outputs, scores): decoded = [] for idx in output: if idx == params.mapping["target"][params.eos]: break decoded.append(vocab[idx]) decoded = " ".join(decoded) if not args.verbose: outfile.write("%s\n" % decoded) #break else: pattern = "%d ||| %s ||| %s ||| %f\n" source = restored_inputs[count] values = (count, source, decoded, score) outfile.write(pattern % values) count += 1
def main(args): tf.logging.set_verbosity(tf.logging.INFO) # Load configs model_cls_list = [transformer.Transformer for model in args.models] params_list = [default_parameters() for _ in range(len(model_cls_list))] params_list = [ merge_parameters(params, model_cls.get_parameters()) for params, model_cls in zip(params_list, model_cls_list) ] params_list = [ import_params(args.checkpoints[i], args.models[i], params_list[i]) for i in range(len(args.checkpoints)) ] params_list = [ override_parameters(params_list[i], args) for i in range(len(model_cls_list)) ] # Build Graph with tf.Graph().as_default(): model_var_lists = [] # Load checkpoints for i, checkpoint in enumerate(args.checkpoints): tf.logging.info("Loading %s" % checkpoint) var_list = tf.train.list_variables(checkpoint) values = {} reader = tf.train.load_checkpoint(checkpoint) for (name, shape) in var_list: if not name.startswith(model_cls_list[i].get_name()): continue if name.find("losses_avg") >= 0: continue tensor = reader.get_tensor(name) values[name] = tensor model_var_lists.append(values) # Build models model_fns = [] for i in range(len(args.checkpoints)): name = model_cls_list[i].get_name() model = model_cls_list[i](params_list[i], name + "_%d" % i) model_fn = model.get_rerank_inference_func() model_fns.append(model_fn) params = params_list[0] # Read input file with open(args.input, "r") as encoded_file: sorted_keys = cPickle.load(encoded_file) decoder_input_list = cPickle.load(encoded_file) encoder_output_list = cPickle.load(encoded_file) state_placeholders = [] for i in range(len(params.device_list)): decode_state = { "encoder": tf.placeholder(tf.float32, [None, None, params.hidden_size], "encoder_%d" % i), #"encoder_weight": we doesn't need encoder weight "source": tf.placeholder(tf.int32, [None, None], "source_%d" % i), "source_length": tf.placeholder(tf.int32, [None], "source_length_%d" % i), # [bos_id, ...] => [..., 0] "target": tf.placeholder(tf.int32, [None, None], "target_%d" % i), #"target_length": tf.placeholder(tf.int32, [None, ], "target_length_%d" % i) } #需要这些值,以进行增量式解码 for j in range(params.num_decoder_layers): decode_state["decoder_layer_%d_key" % j] = tf.placeholder( tf.float32, [None, None, params.hidden_size], "decoder_layer_%d_key_%d" % (j, i)) decode_state["decoder_layer_%d_value" % j] = tf.placeholder( tf.float32, [None, None, params.hidden_size], "decoder_layer_%d_value_%d" % (j, i)) # layer and GPU # we only need the return value of this # decode_state["decoder_layer_%d_att_weight" % j] = tf.placeholder(tf.float32, [None, None, None, None], # # N Head T S inference的时候,T总是为1,表示1步 # "decoder_layer_%d_att_weight" % j), state_placeholders.append(decode_state) def decoding_fn(s): _decoding_fn = model_fns[0][1] #split s to state and feature, and 转换为嵌套的结构,以满足transformer模型 state = { "encoder": s["encoder"], "decoder": { "layer_%d" % j: { "key": s["decoder_layer_%d_key" % j], "value": s["decoder_layer_%d_value" % j], } for j in range(params.num_decoder_layers) } } inputs = s["target"] #inputs = tf.Print(inputs, [inputs], "before target", 100, 10000) feature = { "source": s["source"], "source_length": s["source_length"], # [bos_id, ...] => [..., 0] # "target": tf.pad(inputs[:,1:], [[0, 0], [0, 1]]) #"target": tf.pad(inputs, [[0, 0], [0, 1]]), # 前面没有bos_id,因此直接补上0,这是为了和decode_graph中的补bos相配合 "target": inputs, "target_length": tf.fill([tf.shape(inputs)[0]], tf.shape(inputs)[1]) } #feature["target"] = tf.Print(feature["target"], [feature["target"]], "target", 100,10000) ret = _decoding_fn(feature, state, params) return ret decoder_op = parallel.data_parallelism(params.device_list, lambda s: decoding_fn(s), state_placeholders) #batch = tf.shape(encoder_output)[0] # Create assign ops assign_ops = [] all_var_list = tf.trainable_variables() for i in range(len(args.checkpoints)): un_init_var_list = [] name = model_cls_list[i].get_name() for v in all_var_list: if v.name.startswith(name + "_%d" % i): un_init_var_list.append(v) ops = set_variables(un_init_var_list, model_var_lists[i], name + "_%d" % i) assign_ops.extend(ops) assign_op = tf.group(*assign_ops) results = [] sen_decode_time = [] grid_hyps = [] #存放每个句子中每个grid中的hyps,以便后期分析和统计 # Create session with tf.Session(config=session_config(params)) as sess: # from tensorflow.python import debug as tf_debug # sess = tf_debug.LocalCLIDebugWrapperSession(sess,ui_type='curses')#readline # Restore variables sess.run(assign_op) #startpoint=320 for i, (decode_input, encoder_output) in enumerate( zip(decoder_input_list, encoder_output_list)): # if i < startpoint: # continue # if i == startpoint: # break # print(input["source"]) # print(input["constraints"]) ################# # create constraint translation related model # build ensembled TM thumt_tm = ThumtTranslationModel(sess, decoder_op, encoder_output, state_placeholders, decode_input, params) # Build GBS search cons_decoder = create_constrained_decoder(thumt_tm) ################## max_length = decode_input["source_length"][ 0] + params.decode_length beam_size = params.beam_size # top_beams = params.top_beams top_beams = 1 start_time = time.time() best_output, search_grid = decode(encoder_output, sess, decoder_op, state_placeholders, params, cons_decoder, thumt_tm, decode_input, top_beams, max_hyp_len=max_length, beam_size=beam_size, return_alignments=True, length_norm=False) sen_decode_time.append(time.time() - start_time) hyps_num = {k: len(search_grid[k]) for k in search_grid.keys()} grid_hyps.append(hyps_num) # output_beams = [search_grid[k] for k in search_grid.keys() if k[1] == top_row] # output_hyps = [h for beam in output_beams for h in beam] # constraints=input_constraints, # return_alignments=return_alignments, # length_norm=length_norm) results.append(best_output) message = "Finished decoding sentences index: %d" % (i) tf.logging.log(tf.logging.INFO, message) # Convert to plain text vocab = params.vocabulary["target"] outputs = [] scores = [] mask_ratio = [] best_alignment = [] for result in results: sub_result = zip(*result[0]) outputs.extend(sub_result[0]) scores.extend(sub_result[1]) best_alignment.extend(result[1]) # for sub_result in result: # 每次解码结果可能有多个bestscore # outputs.append(sub_result[0][0][1:]) # seqs # scores.append(sub_result[0][1]) # score # mask_ratio.append(0) # best_alignment.extend(sub_result[1]) new_outputs = [] for s in outputs: new_outputs.append(s[1:]) outputs = new_outputs for s, score in zip(outputs, scores): s1 = [] for idx in s: if idx == params.mapping["target"][params.eos]: break s1.append(vocab[idx]) s1 = " ".join(s1) #print("%s" % s1) print("%f %s" % (score, s1)) restored_inputs = [] restored_outputs = [] restored_scores = [] restored_constraints = [] restored_alignment = [] restored_sen_decode_time = [] restored_grid_hyps = [] for index in range(len(sorted_keys)): restored_outputs.append(outputs[sorted_keys[index]]) restored_scores.append(scores[sorted_keys[index]]) #restored_constraints.append(sorted_constraints[sorted_keys[index]]) restored_alignment.append(best_alignment[sorted_keys[index]]) restored_sen_decode_time.append( sen_decode_time[sorted_keys[index]]) restored_grid_hyps.append(grid_hyps[sorted_keys[index]]) # restored_outputs = outputs # restored_scores = scores # restored_alignment = best_alignment # restored_sen_decode_time = sen_decode_time # restored_grid_hyps = grid_hyps # Write to file with open(args.output, "w") as outfile: count = 0 for output, score, de_time in zip(restored_outputs, restored_scores, restored_sen_decode_time): decoded = [] for idx in output: if idx == params.mapping["target"][params.eos]: break decoded.append(vocab[idx]) decoded = " ".join(decoded) if not args.verbose: outfile.write("%s\n" % decoded) else: pattern = "%d |%s |%f |%f \n" # cons = restored_constraints[count] # cons_token_num = 0 # for cons_item in cons: # cons_token_num += cons_item["tgt_len"] values = (count, decoded, score, de_time) outfile.write(pattern % values) count += 1 with open(args.output + ".alignment", "w") as outfile: count = 0 for alignment in restored_alignment: outfile.write("%d\n" % count) cPickle.dump(alignment, outfile) count += 1 # 保存解码时间和grid中的hyps,以便进行分析 with open(args.output + ".time_hyps", "w") as outfile: cPickle.dump(restored_sen_decode_time, outfile) cPickle.dump(restored_grid_hyps, outfile) with open(args.output + ".time", "w") as outfile: time_sen = np.asarray(restored_sen_decode_time) ave = np.average(time_sen) outfile.write("average time:%f\n" % ave) cPickle.dump(restored_sen_decode_time, outfile)
def build_graph(params, args, model_list, model_cls_list, model_var_lists, problem=None): if problem == "parsing": fo = args.parsing_output fi = args.parsing_input elif problem == "amr": fo = args.amr_outpu fi = args.amr_input else: print("problem only in parsing or amr") # Read input file sorted_keys, sorted_inputs = dataset.sort_input_file(fi) # Build input queue features = dataset.get_inference_input(sorted_inputs, params) # only source data # Create placeholders placeholders = [] for i in range(len(params.device_list)): placeholders.append({ "source": tf.placeholder(tf.int32, [None, None], "source_%d" % i), "source_length": tf.placeholder(tf.int32, [None], "source_length_%d" % i) }) # A list of outputs if params.generate_samples: inference_fn = sampling.create_sampling_graph else: inference_fn = inference.create_inference_graph predictions = parallel.data_parallelism( params.device_list, lambda f: inference_fn(model_list, f, params, problem=problem), placeholders) # Create assign ops assign_ops = [] feed_dict = {} all_var_list = tf.trainable_variables() for i in range(len(args.checkpoints)): un_init_var_list = [] name = model_cls_list[i].get_name() for v in all_var_list: if v.name.startswith(name + "_%d" % i): un_init_var_list.append(v) ops = set_variables(un_init_var_list, model_var_lists[i], name + "_%d" % i, feed_dict) assign_ops.extend(ops) assign_op = tf.group(*assign_ops) init_op = tf.tables_initializer() results = [] tf.get_default_graph().finalize() # Create session with tf.Session(config=session_config(params)) as sess: # Restore variables sess.run(assign_op, feed_dict=feed_dict) sess.run(init_op) while True: try: feats = sess.run(features) op, feed_dict = shard_features(feats, placeholders, predictions) results.append(sess.run(op, feed_dict=feed_dict)) message = "Finished %s batch %d" % (len(results), problem) tf.logging.log(tf.logging.INFO, message) except tf.errors.OutOfRangeError: break # Convert to plain text vocab = params.vocabulary[problem+"_target"] outputs = [] scores = [] for result in results: for item in result[0]: outputs.append(item.tolist()) for item in result[1]: scores.append(item.tolist()) outputs = list(itertools.chain(*outputs)) scores = list(itertools.chain(*scores)) restored_inputs = [] restored_outputs = [] restored_scores = [] for index in range(len(sorted_inputs)): restored_inputs.append(sorted_inputs[sorted_keys[index]]) restored_outputs.append(outputs[sorted_keys[index]]) restored_scores.append(scores[sorted_keys[index]]) # Write to file with open(fo, "w") as outfile: count = 0 for outputs, scores in zip(restored_outputs, restored_scores): for output, score in zip(outputs, scores): decoded = [] for idx in output: if idx == params.mapping["target"][params.eos]: break decoded.append(vocab[idx]) decoded = " ".join(decoded) if not args.verbose: outfile.write("%s\n" % decoded) break else: pattern = "%d ||| %s ||| %s ||| %f\n" source = restored_inputs[count] values = (count, source, decoded, score) outfile.write(pattern % values) count += 1
def main(args): tf.logging.set_verbosity(tf.logging.INFO) # Load configs model_cls_list = [transformer.Transformer for model in args.models] params_list = [default_parameters() for _ in range(len(model_cls_list))] params_list = [ merge_parameters(params, model_cls.get_parameters()) for params, model_cls in zip(params_list, model_cls_list) ] params_list = [ import_params(args.checkpoints[i], args.models[i], params_list[i]) for i in range(len(args.checkpoints)) ] params_list = [ override_parameters(params_list[i], args) for i in range(len(model_cls_list)) ] # Build Graph with tf.Graph().as_default(): model_var_lists = [] # Load checkpoints for i, checkpoint in enumerate(args.checkpoints): tf.logging.info("Loading %s" % checkpoint) var_list = tf.train.list_variables(checkpoint) values = {} reader = tf.train.load_checkpoint(checkpoint) for (name, shape) in var_list: if not name.startswith(model_cls_list[i].get_name()): continue if name.find("losses_avg") >= 0: continue tensor = reader.get_tensor(name) values[name] = tensor model_var_lists.append(values) # Build models model_fns = [] for i in range(len(args.checkpoints)): name = model_cls_list[i].get_name() model = model_cls_list[i](params_list[i], name + "_%d" % i) model_fn = model.get_rerank_inference_func() model_fns.append(model_fn) params = params_list[0] # Read input file sorted_keys, sorted_inputs, sorted_constraints = \ src_cons_dataset.sort_input_src_cons(args.input, args.constraints) # Build input queue features = src_cons_dataset.get_input_with_src_constraints( sorted_inputs, sorted_constraints, params) print(sorted_keys) #Create placeholder placeholders = [] for i in range(len(params.device_list)): placeholders.append({ "source": tf.placeholder(tf.int32, [None, None], "source_%d" % i), "source_length": tf.placeholder(tf.int32, [None], "source_length_%d" % i), "constraints_src_pos": tf.placeholder(tf.int32, [None, None, None], "constraints_src_pos_%d" % i), "constraints": tf.placeholder(tf.int32, [None, None, None], "constraints_%d" % i), "constraints_len": tf.placeholder(tf.int32, [None, None], "constraints_len_%d" % i) }) encoding_fn = model_fns[0][0] encoder_op = parallel.data_parallelism( params.device_list, lambda f: encoding_fn(f, params), placeholders) # Create assign ops assign_ops = [] all_var_list = tf.trainable_variables() for i in range(len(args.checkpoints)): un_init_var_list = [] name = model_cls_list[i].get_name() for v in all_var_list: if v.name.startswith(name + "_%d" % i): un_init_var_list.append(v) ops = set_variables(un_init_var_list, model_var_lists[i], name + "_%d" % i) assign_ops.extend(ops) assign_op = tf.group(*assign_ops) results = [] # Create session with tf.Session(config=session_config(params)) as sess: # Restore variables sess.run(assign_op) sess.run(tf.tables_initializer()) decoder_input_list = [] encoder_output_list = [] while True: try: feats = sess.run(features) encoder_op, feed_dict = shard_features( feats, placeholders, encoder_op) #print("encoding %d" % i) encoder_state = sess.run(encoder_op, feed_dict=feed_dict) for j in range(len(feats["source"])): decoder_input_item = { "source": [feats["source"][j]], "source_length": [feats["source_length"][j]], "constraints_src_pos": feats["constraints_src_pos"][j], "constraints": feats["constraints"][j], "constraints_len": feats["constraints_len"][j], } decoder_input_list.append(decoder_input_item) # 不能简单的用GPU数量来循环,要用实际的输出来循环,因为有时候会空出GPU,比如最后一句或几句,无法凑够给1个GPU for i in range(len(encoder_state[0])): state_len = len(encoder_state[0][i]) for j in range(state_len): encoder_output_item = { "encoder": encoder_state[0][i][j:j + 1], "encoder_weight": encoder_state[1][i][j:j + 1] } encoder_output_list.append(encoder_output_item) # if np.shape(encoder_output_item['encoder'])[1] != decoder_input_list[i]["source_length"] #for input, encoder_output in zip(decoder_input_list, encoder_output_list): message = "Finish encoding sentences: %d" % len( decoder_input_list) tf.logging.log(tf.logging.INFO, message) except tf.errors.OutOfRangeError: break # vocab = params.vocabulary["source"] # for decoder_input, encoder_output in zip(decoder_input_list, encoder_output_list): # #print(decoder_input["source_length"][0], np.shape(encoder_output['encoder'])[1]) # sen = [] # for idx in decoder_input["source"][0]: # if idx == params.mapping["source"][params.eos]: # break # sen.append(vocab[idx]) # s1 = " ".join(sen) # print(s1) # print(encoder_result.shape) # for i in range(encoder_result.shape[0]): # print('[') # for j in range(encoder_result.shape[1]): # print('[') # for k in range(encoder_result.shape[2]): # print("%f" % encoder_result[i][j][k]) # print(']') # print(']') with open(args.output, "w") as outfile: cPickle.dump(sorted_keys, outfile) cPickle.dump(decoder_input_list, outfile) cPickle.dump(encoder_output_list, outfile)
def main(args): tf.logging.set_verbosity(tf.logging.INFO) model_cls_list = [models.get_model(model) for model in args.models] params_list = [default_parameters() for _ in range(len(model_cls_list))] params_list = [ merge_parameters(params, model_cls.get_parameters()) for params, model_cls in zip(params_list, model_cls_list) ] params_list = [ import_params(args.checkpoints[i], args.models[i], params_list[i]) for i in range(len(args.checkpoints)) ] params_list = [ override_parameters(params_list[i], args) for i in range(len(model_cls_list)) ] with tf.Graph().as_default(): model_var_lists = [] for i, checkpoint in enumerate(args.checkpoints): tf.logging.info("Loading %s" % checkpoint) var_list = tf.train.list_variables(checkpoint) values = {} reader = tf.train.load_checkpoint(checkpoint) for (name, shape) in var_list: if not name.startswith(model_cls_list[i].get_name()): continue if name.find("losses_avg") >= 0: continue tensor = reader.get_tensor(name) values[name] = tensor model_var_lists.append(values) model_list = [] for i in range(len(args.checkpoints)): name = model_cls_list[i].get_name() model = model_cls_list[i](params_list[i], name + "_%d" % i) model_list.append(model) params = params_list[0] params.initializer_gain = 1.0 sorted_keys, sorted_inputs = dataset.read_eval_input_file(args.input) features = dataset.get_predict_input(sorted_inputs, params) placeholders = [] for i in range(len(params.device_list)): placeholders.append({ "text": tf.placeholder(tf.int32, [None, None], "text_%d" % i), "text_length": tf.placeholder(tf.int32, [None], "text_length_%d" % i), "aspect": tf.placeholder(tf.int32, [None, None], "aspect_%d" % i), "aspect_length": tf.placeholder(tf.int32, [None], "aspect_length_%d" % i), "polarity": tf.placeholder(tf.int32, [None, None], "polarity_%d" % i) }) predict_fn = inference.create_predict_graph predictions = parallel.data_parallelism( params.device_list, lambda f: predict_fn(model_list, f, params), placeholders) assign_ops = [] feed_dict = {} all_var_list = tf.trainable_variables() for i in range(len(args.checkpoints)): un_init_var_list = [] name = model_cls_list[i].get_name() for v in all_var_list: if v.name.startswith(name + "_%d" % i): un_init_var_list.append(v) ops = set_variables(un_init_var_list, model_var_lists[i], name + "_%d" % i, feed_dict) assign_ops.extend(ops) assign_op = tf.group(*assign_ops) init_op = tf.tables_initializer() results = [] with tf.Session(config=session_config(params)) as sess: sess.run(assign_op, feed_dict=feed_dict) sess.run(init_op) while True: try: feats = sess.run(features) op, feed_dict = shard_features(feats, placeholders, predictions) results.append(sess.run(op, feed_dict=feed_dict)) message = "Finished batch %d" % len(results) tf.logging.log(tf.logging.INFO, message) except tf.errors.OutOfRangeError: break input_features = [] scores1 = [] scores2 = [] output_alphas = [] for result in results: for item in result[0]: input_features.append(item.tolist()) for item in result[1]: scores1.append(item.tolist()) for item in result[2]: scores2.append(item.tolist()) for item in result[3]: output_alphas.append(item.tolist()) scores1 = list(itertools.chain(*scores1)) scores2 = list(itertools.chain(*scores2)) output_alphas = list(itertools.chain(*output_alphas)) restored_scores1 = [] restored_scores2 = [] restored_output_alphas = [] restored_inputs_text = [] restored_inputs_aspect = [] restored_inputs_score = [] for index in range(len(sorted_inputs[0])): restored_scores1.append(scores1[sorted_keys[index]][0]) restored_scores2.append(scores2[sorted_keys[index]]) restored_output_alphas.append(output_alphas[sorted_keys[index]]) restored_inputs_text.append(sorted_inputs[0][sorted_keys[index]]) restored_inputs_aspect.append(sorted_inputs[1][sorted_keys[index]]) restored_inputs_score.append(sorted_inputs[2][sorted_keys[index]]) class3_bad_TP = 0.0 class3_bad_FP = 0.0 class3_bad_FN = 0.0 class3_mid_TP = 0.0 class3_mid_FP = 0.0 class3_mid_FN = 0.0 class3_good_TP = 0.0 class3_good_FP = 0.0 class3_good_FN = 0.0 with open(args.output, "w") as outfile: for score1, score2, score3, alphas, text, aspect in zip( restored_scores1, restored_scores2, restored_inputs_score, restored_output_alphas, restored_inputs_text, restored_inputs_aspect): score1 = str(score1) outfile.write("###########################\n") pattern = "%s|||%f,%f,%f|||%s\n" values = (score1, score2[0], score2[1], score2[2], score3) outfile.write(pattern % values) outfile.write(aspect + "\n") for (word, alpha) in zip(text.split(), alphas): outfile.write(word + " " + str(alpha) + "\t") outfile.write("\n") if score1 == '0' and score3 == '0': class3_bad_TP += 1.0 if score1 == '1' and score3 == '1': class3_mid_TP += 1.0 if score1 == '2' and score3 == '2': class3_good_TP += 1.0 if score1 == '0' and score3 != '0': class3_bad_FP += 1.0 if score1 == '1' and score3 != '1': class3_mid_FP += 1.0 if score1 == '2' and score3 != '2': class3_good_FP += 1.0 if score1 != '0' and score3 == '0': class3_bad_FN += 1.0 if score1 != '1' and score3 == '1': class3_mid_FN += 1.0 if score1 != '2' and score3 == '2': class3_good_FN += 1.0 outfile.write("\n") outfile.write("Class 3:\n") outfile.write("Confusion Matrix:\n") outfile.write("\t" + "{name: >10s}".format(name="positive") + "\t" + "{name: >10s}".format(name="neural") + "\t" + "{name: >10s}".format(name="negative") + "\n") outfile.write("TP\t" + int2int(class3_bad_TP) + "\t" + int2int(class3_mid_TP) + "\t" + int2int(class3_good_TP) + "\n") outfile.write("FP\t" + int2int(class3_bad_FP) + "\t" + int2int(class3_mid_FP) + "\t" + int2int(class3_good_FP) + "\n") outfile.write("FN\t" + int2int(class3_bad_FN) + "\t" + int2int(class3_mid_FN) + "\t" + int2int(class3_good_FN) + "\n") outfile.write( "P\t" + float2int(class3_bad_TP / (class3_bad_TP + class3_bad_FP + 0.000001)) + "\t" + float2int(class3_mid_TP / (class3_mid_TP + class3_mid_FP + 0.000001)) + "\t" + float2int(class3_good_TP / (class3_good_TP + class3_good_FP + 0.000001)) + "\n") outfile.write( "R\t" + float2int(class3_bad_TP / (class3_bad_TP + class3_bad_FN + 0.000001)) + "\t" + float2int(class3_mid_TP / (class3_mid_TP + class3_mid_FN + 0.000001)) + "\t" + float2int(class3_good_TP / (class3_good_TP + class3_good_FN + 0.000001)) + "\n") outfile.write("F1\t" + float2int(class3_bad_TP * 2 / (class3_bad_TP * 2 + class3_bad_FP + class3_bad_FN + 0.000001)) + "\t" + float2int(class3_mid_TP * 2 / (class3_mid_TP * 2 + class3_mid_FP + class3_mid_FN + 0.000001)) + "\t" + float2int(class3_good_TP * 2 / (class3_good_TP * 2 + class3_good_FP + class3_good_FN + 0.000001)) + "\n") outfile.write("F1-Micro:\t" + float2int( (class3_bad_TP + class3_mid_TP + class3_good_TP) * 2 / ((class3_bad_TP + class3_mid_TP + class3_good_TP) * 2 + (class3_bad_FP + class3_mid_FP + class3_good_FP) + (class3_bad_FN + class3_mid_FN + class3_good_FN) + 0.000001)) + "\n") outfile.write("F1-Macro:\t" + float2int( (class3_bad_TP * 2 / (class3_bad_TP * 2 + class3_bad_FP + class3_bad_FN + 0.000001) + class3_mid_TP * 2 / (class3_mid_TP * 2 + class3_mid_FP + class3_mid_FN + 0.000001) + class3_good_TP * 2 / (class3_good_TP * 2 + class3_good_FP + class3_good_FN + 0.000001)) / 3.0) + "\n")
def main(args): tf.logging.set_verbosity(tf.logging.INFO) # Load configs model_cls_list = [models.get_model(model) for model in args.models] params_list = [default_parameters() for _ in range(len(model_cls_list))] params_list = [ merge_parameters(params, model_cls.get_parameters()) for params, model_cls in zip(params_list, model_cls_list) ] params_list = [ import_params(args.checkpoints[i], args.models[i], params_list[i]) for i in range(len(args.checkpoints)) ] params_list = [ override_parameters(params_list[i], args) for i in range(len(model_cls_list)) ] # Build Graph with tf.Graph().as_default(): model_var_lists = [] # Load checkpoints for i, checkpoint in enumerate(args.checkpoints): tf.logging.info("Loading %s" % checkpoint) var_list = tf.train.list_variables(checkpoint) values = {} reader = tf.train.load_checkpoint(checkpoint) for (name, shape) in var_list: if not name.startswith(model_cls_list[i].get_name()): continue if name.find("losses_avg") >= 0: continue tensor = reader.get_tensor(name) values[name] = tensor model_var_lists.append(values) # Build models model_list = [] for i in range(len(args.checkpoints)): name = model_cls_list[i].get_name() model = model_cls_list[i](params_list[i], name + "_%d" % i) model_list.append(model) params = params_list[0] # Read input file sorted_keys, sorted_inputs = dataset.sort_input_file(args.input) # Build input queue features = dataset.get_inference_input(sorted_inputs, params) # Create placeholders placeholders = [] for i in range(len(params.device_list)): placeholders.append({ "source": tf.placeholder(tf.int32, [None, None], "source_%d" % i), "source_length": tf.placeholder(tf.int32, [None], "source_length_%d" % i) }) # A list of outputs if params.generate_samples: inference_fn = sampling.create_sampling_graph else: inference_fn = inference.create_inference_graph predictions = parallel.data_parallelism( params.device_list, lambda f: inference_fn(model_list, f, params), placeholders) # Create assign ops assign_ops = [] feed_dict = {} all_var_list = tf.trainable_variables() for i in range(len(args.checkpoints)): un_init_var_list = [] name = model_cls_list[i].get_name() for v in all_var_list: if v.name.startswith(name + "_%d" % i): un_init_var_list.append(v) ops = set_variables(un_init_var_list, model_var_lists[i], name + "_%d" % i, feed_dict) assign_ops.extend(ops) assign_op = tf.group(*assign_ops) init_op = tf.tables_initializer() results = [] tf.get_default_graph().finalize() tf.logging.info(args.models[0]) if args.models[0] == 'transformer_raw_t5': t5_list = [] for var in tf.trainable_variables(): if 'en_t5_bias_mat' in var.name or 'de_self_relative_attention_bias' in var.name: t5_list.append(var) tf.logging.info(var) for op in tf.get_default_graph().get_operations(): if 'encoder_t5_bias' in op.name or 'decoder_t5_bias' in op.name: if 'random' in op.name or 'read' in op.name or 'Assign' in op.name or 'placeholder' in op.name: continue t5_list.append(op.values()[0]) tf.logging.info(op.values()[0].name) elif args.models[0] == 'transformer_raw_soft_t5': soft_t5_bias_list = [] for op in tf.get_default_graph().get_operations(): if 'soft_t5_bias' in op.name or 'soft_t5_encoder' in op.name or 'soft_t5_decoder' in op.name: if 'random' in op.name or 'read' in op.name or 'Assign' in op.name or 'placeholder' in op.name or 'decoder' in op.name: continue soft_t5_bias_list.append(op.values()[0]) tf.logging.info(op.values()[0].name) # Create session with tf.Session(config=session_config(params)) as sess: # Restore variables sess.run(assign_op, feed_dict=feed_dict) sess.run(init_op) while True: try: feats = sess.run(features) op, feed_dict = shard_features(feats, placeholders, predictions) results.append(sess.run(op, feed_dict=feed_dict)) ''' if args.models[0] == 'transformer_raw_t5': var_en_bucket=tf.get_default_graph().get_tensor_by_name(t5_list[0].name) var_de_bucket=tf.get_default_graph().get_tensor_by_name(t5_list[1].name) var_en_bias=tf.get_default_graph().get_tensor_by_name(t5_list[2].name) en_bucket,de_bucket,en_t5_bias = sess.run([var_en_bucket, var_de_bucket, var_en_bias], feed_dict=feed_dict) ret_param = {'en_bucket':en_bucket,'de_bucket':en_bucket, 'en_t5_bias':en_t5_bias} pickle.dump(ret_param,open(args.checkpoints[0]+'/'+'t5_bias.pkl','wb')) tf.logging.info('store the t5 bias') elif args.models[0] == 'transformer_raw_soft_t5': var_en_alpha=tf.get_default_graph().get_tensor_by_name(soft_t5_bias_list[0].name) var_en_beta=tf.get_default_graph().get_tensor_by_name(soft_t5_bias_list[1].name) var_en_t5_bias=tf.get_default_graph().get_tensor_by_name(soft_t5_bias_list[2].name) en_alpha,en_beta,en_t5_bias = sess.run([var_en_alpha,var_en_beta,var_en_t5_bias], feed_dict=feed_dict) ret_param = {'en_t5_bias':en_t5_bias,'en_alpha':en_alpha, 'en_beta':en_beta} pickle.dump(ret_param,open(args.checkpoints[0]+'/'+'soft_t5_bias.pkl','wb')) tf.logging.info('store the soft-t5 bias') ''' message = "Finished batch %d" % len(results) tf.logging.log(tf.logging.INFO, message) except tf.errors.OutOfRangeError: break # Convert to plain text vocab = params.vocabulary["target"] outputs = [] scores = [] for result in results: for shard in result: for item in shard[0]: outputs.append(item.tolist()) for item in shard[1]: scores.append(item.tolist()) restored_inputs = [] restored_outputs = [] restored_scores = [] for index in range(len(sorted_inputs)): restored_inputs.append(sorted_inputs[sorted_keys[index]]) restored_outputs.append(outputs[sorted_keys[index]]) restored_scores.append(scores[sorted_keys[index]]) # Write to file if sys.version_info.major == 2: outfile = open(args.output, "w") elif sys.version_info.major == 3: outfile = open(args.output, "w", encoding="utf-8") else: raise ValueError("Unkown python running environment!") count = 0 for outputs, scores in zip(restored_outputs, restored_scores): for output, score in zip(outputs, scores): decoded = [] for idx in output: if idx == params.mapping["target"][params.eos]: break decoded.append(vocab[idx]) decoded = " ".join(decoded) if not args.verbose: outfile.write("%s\n" % decoded) else: pattern = "%d ||| %s ||| %s ||| %f\n" source = restored_inputs[count] values = (count, source, decoded, score) outfile.write(pattern % values) count += 1 outfile.close()
params = params_list[0] # Create placeholders placeholders = [] for i in range(len(params.device_list)): placeholders.append({ "source": tf.placeholder(tf.int32, [None, None], "source_%d" % i), "source_length": tf.placeholder(tf.int32, [None], "source_length_%d" % i) }) # Create parallel predictions inference_fn = inference.create_inference_graph predictions = parallel.data_parallelism( params.device_list, lambda f: inference_fn(model_list, f, params), placeholders) # Create assign ops assign_ops = [] all_var_list = tf.trainable_variables() for i in range(len(args.checkpoints)): un_init_var_list = [] name = model_cls_list[i].get_name() print("1") for v in all_var_list: if v.name.startswith(name + "_%d" % i): un_init_var_list.append(v) print("2")
def main(args): eval_steps = args.eval_steps tf.logging.set_verbosity(tf.logging.DEBUG) # Load configs model_cls_list = [models.get_model(model) for model in args.models] params_list = [default_parameters() for _ in range(len(model_cls_list))] params_list = [ merge_parameters(params, model_cls.get_parameters()) for params, model_cls in zip(params_list, model_cls_list) ] params_list = [ import_params(args.checkpoints[i], args.models[i], params_list[i]) for i in range(len(args.checkpoints)) ] params_list = [ override_parameters(params_list[i], args) for i in range(len(model_cls_list)) ] # Build Graph with tf.Graph().as_default(): model_var_lists = [] # Load checkpoints for i, checkpoint in enumerate(args.checkpoints): tf.logging.info("Loading %s" % checkpoint) var_list = tf.train.list_variables(checkpoint) values = {} reader = tf.train.load_checkpoint(checkpoint) for (name, shape) in var_list: if not name.startswith(model_cls_list[i].get_name()): continue if name.find("losses_avg") >= 0: continue tensor = reader.get_tensor(name) values[name] = tensor model_var_lists.append(values) # Build models model_fns = [] for i in range(len(args.checkpoints)): name = model_cls_list[i].get_name() model = model_cls_list[i](params_list[i], name + "_%d" % i) model_fn = model.get_inference_func() model_fns.append(model_fn) params = params_list[0] # Read input file #features = dataset.get_inference_input(args.input, params) #features_eval = dataset.get_inference_input(args.eval, params) #features_test = dataset.get_inference_input(args.test, params) features_train = dataset.get_inference_input(args.input, params, False, True) features_eval = dataset.get_inference_input(args.eval, params, True, False) features_test = dataset.get_inference_input(args.test, params, True, False) # Create placeholders placeholders = [] for i in range(len(params.device_list)): placeholders.append({ "source": tf.placeholder(tf.int32, [None, None], "source_%d" % i), "source_length": tf.placeholder(tf.int32, [None], "source_length_%d" % i), "target": tf.placeholder(tf.int32, [None, 2], "target_%d" % i) }) # A list of outputs predictions = parallel.data_parallelism( params.device_list, lambda f: inference.create_inference_graph(model_fns, f, params), placeholders) # Create assign ops assign_ops = [] all_var_list = tf.trainable_variables() for i in range(len(args.checkpoints)): un_init_var_list = [] name = model_cls_list[i].get_name() for v in all_var_list: if v.name.startswith(name + "_%d" % i): un_init_var_list.append(v) ops = set_variables(un_init_var_list, model_var_lists[i], name + "_%d" % i) assign_ops.extend(ops) assign_op = tf.group(*assign_ops) results = [] tf_x = tf.placeholder(tf.float32, [None, None, 512]) tf_y = tf.placeholder(tf.int32, [None, 2]) tf_x_len = tf.placeholder(tf.int32, [None]) src_mask = -1e9 * (1.0 - tf.sequence_mask( tf_x_len, maxlen=tf.shape(predictions[0])[1], dtype=tf.float32)) with tf.variable_scope("my_metric"): #q,k,v = tf.split(linear(tf_x, 3*512, True, True, scope="logit_transform"), [512, 512,512],axis=-1) q, k, v = tf.split(nn.linear(predictions[0], 3 * 512, True, True, scope="logit_transform"), [512, 512, 512], axis=-1) q = nn.linear( tf.nn.tanh(q), 1, True, True, scope="logit_transform2")[:, :, 0] + src_mask # label smoothing ce1 = nn.smoothed_softmax_cross_entropy_with_logits( logits=q, labels=tf_y[:, :1], #smoothing=params.label_smoothing, smoothing=False, normalize=True) w1 = tf.nn.softmax(q)[:, None, :] #k = nn.linear(tf.nn.tanh(tf.matmul(w1,v)+k),1,True,True,scope="logit_transform3")[:,:,0]+src_mask k = tf.matmul(k, tf.matmul(w1, v) * (512**-0.5), False, True)[:, :, 0] + src_mask # label smoothing ce2 = nn.smoothed_softmax_cross_entropy_with_logits( logits=k, labels=tf_y[:, 1:], #smoothing=params.label_smoothing, smoothing=False, normalize=True) w2 = tf.nn.softmax(k)[:, None, :] weights = tf.concat([w1, w2], axis=1) loss = tf.reduce_mean(ce1 + ce2) #tf_x = tf.placeholder(tf.float32, [None, 512]) #tf_y = tf.placeholder(tf.int32, [None]) #l1 = tf.layers.dense(tf.squeeze(predictions[0], axis=-2), 64, tf.nn.sigmoid) #output = tf.layers.dense(l1, int(args.softmax_size)) #loss = tf.losses.sparse_softmax_cross_entropy(labels=tf_y, logits=output) o1 = tf.argmax(w1, axis=-1) o2 = tf.argmax(w2, axis=-1) a1, a1_update = tf.metrics.accuracy(labels=tf.squeeze(tf_y[:, 0]), predictions=tf.argmax(w1, axis=-1), name='a1') a2, a2_update = tf.metrics.accuracy(labels=tf.squeeze(tf_y[:, 1]), predictions=tf.argmax(w2, axis=-1), name='a2') accuracy, accuracy_update = tf.metrics.accuracy( labels=tf.squeeze(tf_y), predictions=tf.argmax(weights, axis=-1), name='a_all') running_vars = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES, scope="my_metric") #running_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="my_metric") running_vars_initializer = tf.variables_initializer( var_list=running_vars) #variables_to_train = tf.trainable_variables() #print (len(variables_to_train), (variables_to_train[0]), variables_to_train[1]) #variables_to_train.remove(variables_to_train[0]) #variables_to_train.remove(variables_to_train[0]) #print (len(variables_to_train)) variables_to_train = [ v for v in tf.trainable_variables() if v.name.startswith("my_metric") ] optimizer = tf.train.AdamOptimizer(learning_rate=0.001) train_op = optimizer.minimize(loss, var_list=variables_to_train) #train_op = optimizer.minimize(loss, var_list=running_vars) # Create session with tf.Session(config=session_config(params)) as sess: init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) sess.run(init_op) # Restore variables sess.run(assign_op) sess.run(tf.tables_initializer()) current_step = 0 best_validate_acc = 0 last_test_acc = 0 train_x_set = [] train_y_set = [] valid_x_set = [] valid_y_set = [] test_x_set = [] test_y_set = [] train_x_len_set = [] valid_x_len_set = [] test_x_len_set = [] while current_step < eval_steps: print('=======current step ' + str(current_step)) batch_num = 0 while True: try: feats = sess.run(features_train) op, feed_dict = shard_features(feats, placeholders, predictions) #x = (np.squeeze(sess.run(predictions, feed_dict=feed_dict), axis = -2)) y = feed_dict.values()[2] x_len = feed_dict.values()[1] feed_dict.update({tf_y: y}) feed_dict.update({tf_x_len: x_len}) los, __, pred = sess.run([loss, train_op, weights], feed_dict=feed_dict) print("current_step", current_step, "batch_num", batch_num, "loss", los) batch_num += 1 if batch_num % 100 == 0: # eval b_total = 0 a_total = 0 a1_total = 0 a2_total = 0 validate_acc = 0 batch_num_eval = 0 while True: try: feats_eval = sess.run(features_eval) op, feed_dict_eval = shard_features( feats_eval, placeholders, predictions) #x = (np.squeeze(sess.run(predictions, feed_dict=feed_dict), axis = -2)) y = feed_dict_eval.values()[2] x_len = feed_dict_eval.values()[1] feed_dict_eval.update({tf_y: y}) feed_dict_eval.update({tf_x_len: x_len}) sess.run(running_vars_initializer) acc = 0 #acc, pred = sess.run([accuracy, output], feed_dict = {tf_x : x, tf_y : y}) sess.run([ a1_update, a2_update, accuracy_update, weights ], feed_dict=feed_dict_eval) acc1, acc2, acc = sess.run( [a1, a2, accuracy]) batch_size = len(y) #print(acc) a1_total += round(batch_size * acc1) a2_total += round(batch_size * acc2) a_total += round(batch_size * acc) b_total += batch_size batch_num_eval += 1 if batch_num_eval == 20: break except tf.errors.OutOfRangeError: print("eval out of range") break if b_total: validate_acc = a_total / b_total print("eval acc : " + str(validate_acc) + "( " + str(a1_total / b_total) + ", " + str(a2_total / b_total) + " )") print("last test acc : " + str(last_test_acc)) if validate_acc > best_validate_acc: best_validate_acc = validate_acc # test b_total = 0 a1_total = 0 a2_total = 0 a_total = 0 batch_num_test = 0 with open(args.output, "w") as outfile: while True: try: feats_test = sess.run(features_test) op, feed_dict_test = shard_features( feats_test, placeholders, predictions) #x = (np.squeeze(sess.run(predictions, feed_dict=feed_dict), axis = -2)) y = feed_dict_test.values()[2] x_len = feed_dict_test.values()[1] feed_dict_test.update({tf_y: y}) feed_dict_test.update( {tf_x_len: x_len}) sess.run(running_vars_initializer) acc = 0 #acc, pred = sess.run([accuracy, output], feed_dict = {tf_x : x, tf_y : y}) __, __, __, out1, out2 = sess.run( [ a1_update, a2_update, accuracy_update, o1, o2 ], feed_dict=feed_dict_test) acc1, acc2, acc = sess.run( [a1, a2, accuracy]) batch_size = len(y) a_total += round(batch_size * acc) a1_total += round(batch_size * acc1) a2_total += round(batch_size * acc2) b_total += batch_size batch_num_test += 1 for pred1, pred2 in zip(out1, out2): outfile.write("%s " % pred1[0]) outfile.write("%s\n" % pred2[0]) if batch_num_test == 20: break except tf.errors.OutOfRangeError: print("test out of range") break if b_total: last_test_acc = a_total / b_total print("new test acc : " + str(last_test_acc) + "( " + str(a1_total / b_total) + ", " + str(a2_total / b_total) + " )") if batch_num == 25000: break except tf.errors.OutOfRangeError: print("train out of range") break # eval # b_total = 0 # a_total = 0 # a1_total = 0 # a2_total = 0 # validate_acc = 0 # batch_num = 0 # while True: # try: # feats_eval = sess.run(features_eval) # op, feed_dict = shard_features(feats_eval, placeholders, predictions) # #x = (np.squeeze(sess.run(predictions, feed_dict=feed_dict), axis = -2)) # y = feed_dict.values()[2] # x_len = feed_dict.values()[1] # feed_dict.update({tf_y:y}) # feed_dict.update({tf_x_len:x_len}) # sess.run(running_vars_initializer) # acc = 0 #acc, pred = sess.run([accuracy, output], feed_dict = {tf_x : x, tf_y : y}) # sess.run([a1_update, a2_update, accuracy_update, weights], feed_dict = feed_dict) # acc1,acc2,acc = sess.run([a1,a2,accuracy]) # batch_size = len(y) #print(acc) # a1_total += round(batch_size*acc1) # a2_total += round(batch_size*acc2) # a_total += round(batch_size*acc) # b_total += batch_size # batch_num += 1 # if batch_num == 10: # break # except tf.errors.OutOfRangeError: # print ("eval out of range") # break # validate_acc = a_total/b_total # print("eval acc : " + str(validate_acc) + "( "+str(a1_total/b_total)+ ", "+ str(a2_total/b_total) + " )") # print("last test acc : " + str(last_test_acc)) # if validate_acc > best_validate_acc: # best_validate_acc = validate_acc # test # b_total = 0 # a1_total = 0 # a2_total = 0 # a_total = 0 # batch_num = 0 # while True: # try: # feats_test = sess.run(features_test) # op, feed_dict = shard_features(feats_test, placeholders, # predictions) #x = (np.squeeze(sess.run(predictions, feed_dict=feed_dict), axis = -2)) # y = feed_dict.values()[2] # x_len = feed_dict.values()[1] # feed_dict.update({tf_y:y}) # feed_dict.update({tf_x_len:x_len}) # sess.run(running_vars_initializer) # acc = 0 #acc, pred = sess.run([accuracy, output], feed_dict = {tf_x : x, tf_y : y}) # sess.run([a1_update,a2_update,accuracy_update, weights], feed_dict = feed_dict) # acc1,acc2,acc = sess.run([a1,a2,accuracy]) # batch_size = len(y) # a_total += round(batch_size*acc) # a1_total += round(batch_size*acc1) # a2_total += round(batch_size*acc2) # b_total += batch_size # batch_num += 1 # if batch_num==10: # break # except tf.errors.OutOfRangeError: # print ("test out of range") # break # last_test_acc = a_total/b_total # print("new test acc : " + str(last_test_acc)+ "( "+str(a1_total/b_total)+ ", "+ str(a2_total/b_total) + " )") current_step += 1 print("") print("Final test acc " + str(last_test_acc)) return
def main(args): tf.logging.set_verbosity(tf.logging.INFO) # Load configs model_cls_list = [transformer.Transformer for model in args.models] params_list = [default_parameters() for _ in range(len(model_cls_list))] params_list = [ merge_parameters(params, model_cls.get_parameters()) for params, model_cls in zip(params_list, model_cls_list) ] params_list = [ import_params(args.checkpoints[i], args.models[i], params_list[i]) for i in range(len(args.checkpoints)) ] params_list = [ override_parameters(params_list[i], args) for i in range(len(model_cls_list)) ] # Build Graph with tf.Graph().as_default(): model_var_lists = [] # Load checkpoints for i, checkpoint in enumerate(args.checkpoints): tf.logging.info("Loading %s" % checkpoint) var_list = tf.train.list_variables(checkpoint) values = {} reader = tf.train.load_checkpoint(checkpoint) for (name, shape) in var_list: if not name.startswith(model_cls_list[i].get_name()): continue if name.find("losses_avg") >= 0: continue tensor = reader.get_tensor(name) values[name] = tensor model_var_lists.append(values) # Build models model_fns = [] for i in range(len(args.checkpoints)): name = model_cls_list[i].get_name() model = model_cls_list[i](params_list[i], name + "_%d" % i) model_fn = model.get_rerank_inference_func() model_fns.append(model_fn) params = params_list[0] # Read input file sorted_keys, sorted_inputs, sorted_constraints = \ src_cons_dataset.sort_input_src_cons(args.input, args.constraints) # Build input queue features = src_cons_dataset.get_input_with_src_constraints( sorted_inputs, sorted_constraints, params) print(sorted_keys) #Create placeholder placeholders = [] for i in range(len(params.device_list)): placeholders.append({ "source": tf.placeholder(tf.int32, [None, None], "source_%d" % i), "source_length": tf.placeholder(tf.int32, [None], "source_length_%d" % i), "constraints_src_pos": tf.placeholder(tf.int32, [None, None, None], "constraints_src_pos_%d" % i), "constraints": tf.placeholder(tf.int32, [None, None, None], "constraints_%d" % i), "constraints_len": tf.placeholder(tf.int32, [None, None], "constraints_len_%d" % i) }) encoding_fn = model_fns[0][0] encoder_op = parallel.data_parallelism( params.device_list, lambda f: encoding_fn(f, params), placeholders) state_placeholders = [] for i in range(len(params.device_list)): decode_state = { "encoder": tf.placeholder(tf.float32, [None, None, params.hidden_size], "encoder_%d" % i), #"encoder_weight": we doesn't need encoder weight "source": tf.placeholder(tf.int32, [None, None], "source_%d" % i), "source_length": tf.placeholder(tf.int32, [None], "source_length_%d" % i), # [bos_id, ...] => [..., 0] "target": tf.placeholder(tf.int32, [None, None], "target_%d" % i), #"target_length": tf.placeholder(tf.int32, [None, ], "target_length_%d" % i) } #需要这些值,以进行增量式解码 for j in range(params.num_decoder_layers): decode_state["decoder_layer_%d_key" % j] = tf.placeholder( tf.float32, [None, None, params.hidden_size], "decoder_layer_%d_key_%d" % (j, i)) decode_state["decoder_layer_%d_value" % j] = tf.placeholder( tf.float32, [None, None, params.hidden_size], "decoder_layer_%d_value_%d" % (j, i)) # layer and GPU # we only need the return value of this # decode_state["decoder_layer_%d_att_weight" % j] = tf.placeholder(tf.float32, [None, None, None, None], # # N Head T S inference的时候,T总是为1,表示1步 # "decoder_layer_%d_att_weight" % j), state_placeholders.append(decode_state) def decoding_fn(s): _decoding_fn = model_fns[0][1] #split s to state and feature, and 转换为嵌套的结构,以满足transformer模型 state = { "encoder": s["encoder"], "decoder": { "layer_%d" % j: { "key": s["decoder_layer_%d_key" % j], "value": s["decoder_layer_%d_value" % j], } for j in range(params.num_decoder_layers) } } inputs = s["target"] #inputs = tf.Print(inputs, [inputs], "before target", 100, 10000) feature = { "source": s["source"], "source_length": s["source_length"], # [bos_id, ...] => [..., 0] # "target": tf.pad(inputs[:,1:], [[0, 0], [0, 1]]) #"target": tf.pad(inputs, [[0, 0], [0, 1]]), # 前面没有bos_id,因此直接补上0,这是为了和decode_graph中的补bos相配合 "target": inputs, "target_length": tf.fill([tf.shape(inputs)[0]], tf.shape(inputs)[1]) } #feature["target"] = tf.Print(feature["target"], [feature["target"]], "target", 100,10000) ret = _decoding_fn(feature, state, params) return ret decoder_op = parallel.data_parallelism(params.device_list, lambda s: decoding_fn(s), state_placeholders) #batch = tf.shape(encoder_output)[0] # Create assign ops assign_ops = [] all_var_list = tf.trainable_variables() for i in range(len(args.checkpoints)): un_init_var_list = [] name = model_cls_list[i].get_name() for v in all_var_list: if v.name.startswith(name + "_%d" % i): un_init_var_list.append(v) ops = set_variables(un_init_var_list, model_var_lists[i], name + "_%d" % i) assign_ops.extend(ops) assign_op = tf.group(*assign_ops) results = [] # Create session with tf.Session(config=session_config(params)) as sess: # from tensorflow.python import debug as tf_debug # sess = tf_debug.LocalCLIDebugWrapperSession(sess,ui_type='curses')#readline # Restore variables sess.run(assign_op) sess.run(tf.tables_initializer()) # pad_id = params.mapping["target"][params.pad] # bos_id = params.mapping["target"][params.bos] # eos_id = params.mapping["target"][params.eos] while True: try: feats = sess.run(features) encoder_op, feed_dict = shard_features( feats, placeholders, encoder_op) #print("encoding %d" % i) encoder_state = sess.run(encoder_op, feed_dict=feed_dict) decoder_input_list = [] encoder_output_list = [] for j in range(len(feats["source"])): decoder_input_item = { "source": [feats["source"][j]], "source_length": [feats["source_length"][j]], "constraints_src_pos": feats["constraints_src_pos"][j], "constraints": feats["constraints"][j], "constraints_len": feats["constraints_len"][j], } decoder_input_list.append(decoder_input_item) # 不能简单的用GPU数量来循环,要用实际的输出来循环,因为有时候会空出GPU,比如最后一句或几句,无法凑够给1个GPU for i in range(len(encoder_state[0])): # gpu state_len = len(encoder_state[0][i]) # for j in range(state_len): encoder_output_item = { "encoder": encoder_state[0][i][j:j + 1], "encoder_weight": encoder_state[1][i][j:j + 1] } encoder_output_list.append(encoder_output_item) for input, encoder_output in zip(decoder_input_list, encoder_output_list): # print(input["source"]) # print(input["constraints"]) ################# # create constraint translation related model # build ensembled TM thumt_tm = ThumtTranslationModel( sess, decoder_op, encoder_output, state_placeholders, input, params) # Build GBS search cons_decoder = create_constrained_decoder(thumt_tm) ################## max_length = input["source_length"][ 0] + params.decode_length beam_size = params.beam_size # top_beams = params.top_beams top_beams = 1 best_output = decode(encoder_output, sess, decoder_op, state_placeholders, params, cons_decoder, thumt_tm, input, top_beams, max_hyp_len=max_length, beam_size=beam_size, return_alignments=True, length_norm=False) # constraints=input_constraints, # return_alignments=return_alignments, # length_norm=length_norm) results.append(best_output) message = "Finished sentences: %d" % len(results) tf.logging.log(tf.logging.INFO, message) except tf.errors.OutOfRangeError: break # Convert to plain text vocab = params.vocabulary["target"] outputs = [] scores = [] mask_ratio = [] best_alignment = [] # for result in results: # outputs.append(result) # scores.append(0) # mask_ratio.append(0) for result in results: # print(result[0]) # #outputs.append(result[0][0][1:]) sub_result = zip(*result[0]) outputs.extend(sub_result[0]) scores.extend(sub_result[1]) mask_ratio.extend([0] * len(sub_result[1])) #放入假的ratio best_alignment.extend(result[1]) # for sub_result in result: # 每次解码结果可能有多个bestscore # outputs.append(sub_result[0][0][1:]) # seqs # scores.append(sub_result[0][1]) # score # mask_ratio.append(0) # best_alignment.extend(sub_result[1]) new_outputs = [] for s in outputs: new_outputs.append(s[1:]) outputs = new_outputs for s, score in zip(outputs, scores): s1 = [] for idx in s: if idx == params.mapping["target"][params.eos]: break s1.append(vocab[idx]) s1 = " ".join(s1) #print("%s" % s1) print("%f %s" % (score, s1)) restored_inputs = [] restored_outputs = [] restored_scores = [] restored_ratio = [] restored_constraints = [] restored_alignment = [] for index in range(len(sorted_inputs)): restored_inputs.append(sorted_inputs[sorted_keys[index]]) restored_outputs.append(outputs[sorted_keys[index]]) restored_scores.append(scores[sorted_keys[index]]) restored_ratio.append(mask_ratio[sorted_keys[index]]) restored_constraints.append(sorted_constraints[sorted_keys[index]]) restored_alignment.append(best_alignment[sorted_keys[index]]) # Write to file with open(args.output, "w") as outfile: count = 0 for output, score, ratio in zip(restored_outputs, restored_scores, restored_ratio): decoded = [] for idx in output: if idx == params.mapping["target"][params.eos]: break decoded.append(vocab[idx]) decoded = " ".join(decoded) if not args.verbose: outfile.write("%s\n" % decoded) else: pattern = "%d ||| %s ||| %s ||| %f ||| %f ||| %d\n" source = restored_inputs[count] cons = restored_constraints[count] cons_token_num = 0 for cons_item in cons: cons_token_num += cons_item["tgt_len"] values = (count, source, decoded, score, ratios[0], cons_token_num) outfile.write(pattern % values) count += 1 with open(args.output + ".alignment", "w") as outfile: count = 0 for alignment in restored_alignment: outfile.write("%d\n" % count) cPickle.dump(alignment, outfile) count += 1