def ensemble(args): if len(args.vocab_path) != len(args.checkpoints) + 1: raise ValueError("Unmatched vocabulary number and checkpoint number") # override parameters params = tf.contrib.training.HParams( data_path=args.data_path, model_name=args.model_name, vocab_path=args.vocab_path, model_params=args.model_params, device_list=args.device_list or [0], allow_growth=True ) mparams = get_model_params(args.model_name) params = merge_params(params, mparams) params.parse(args.model_params) dparams = decoding_params() params = merge_params(params, dparams) params.parse(args.decoding_params) if args.emb_path: if args.emb_path.find("glove") > 0: emb = load_glove_embedding(args.emb_path, None) else: emb = np.loadtxt(args.emb_path).astype("float32") else: emb = None vocabularies = get_ensemble_vocabulary(params.vocab_path) model_var_lists = [] model_params_list = [] for i in range(len(args.checkpoints)): cparams = copy.copy(params) cparams.add_hparam("embedding", emb) cparams.add_hparam("vocabulary", vocabularies[i]) model_params_list.append(cparams) # load checkpoints for checkpoint in args.checkpoints: var_list = tf.train.list_variables(checkpoint) values = {} reader = tf.train.load_checkpoint(checkpoint) for (name, shape) in var_list: if not name.startswith("tagger"): continue if name.find("losses_avg") >= 0: continue tensor = reader.get_tensor(name) values[name] = tensor model_var_lists.append(values) # build graph inputs = tf.placeholder(tf.int32, [None, None], "inputs") preds = tf.placeholder(tf.int32, [None, None], "preds") embedding = tf.placeholder(tf.float32, [None, None, None], "embedding") mask = tf.placeholder(tf.float32, [None, None], "mask") features = {"inputs": inputs, "preds": preds} if emb is not None: features["embedding"] = embedding features["mask"] = mask predictions = [] for i in range(len(args.checkpoints)): with tf.variable_scope("tagger_%d" % i): model_fn = get_tagger_model(params.model_name, tf.contrib.learn.ModeKeys.INFER) outputs, probs = model_fn(features, model_params_list[i]) predictions.append(probs) labels = [] ivocab = {} for k, idx in vocabularies[0]["targets"].items(): ivocab[idx] = k for idx in range(len(ivocab)): labels.append(ivocab[idx]) tparams = get_transition_params(labels) # create session with tf.Session(config=session_config(params)) as sess: tf.global_variables_initializer().run() # restore variables all_var_list = tf.trainable_variables() for i in range(len(args.checkpoints)): uninit_var_list = [] for v in all_var_list: if v.name.startswith("tagger_%d" % i): uninit_var_list.append(v) set_variables(uninit_var_list, model_var_lists[i], "tagger_%d" % i) # create input_fn all_sorted_inputs = [] all_sorted_keys = [] all_input_fns = [] for i in range(len(args.checkpoints)): sorted_inputs, sorted_keys, num_batches, fn = get_sorted_input_fn( params.data_path, model_params_list[i].vocabulary["inputs"], params.decode_batch_size * len(params.device_list), model_params_list[i] ) all_sorted_inputs.append(sorted_inputs) all_sorted_keys.append(sorted_keys) all_input_fns.append(fn) decodes = [] for i, input_fn in enumerate(all_input_fns): outputs = [] for features in input_fn: feed_dict = { inputs: features["inputs"], preds: features["preds"] } if args.emb_path: feed_dict[embedding] = features["embedding"] feed_dict[mask] = features["mask"] output = sess.run(predictions[i], feed_dict=feed_dict) outputs.append(output) decodes.append(outputs) # ensemble decodes = list(zip(*decodes)) probs = [] for item in decodes: outputs = sum(item) / float(len(item)) # [batch, max_len, num_label] probs.append(outputs) count = 0 for item in probs: for dist in item: inputs = all_sorted_inputs[0][count] seq_len = len(inputs.strip().split()[1:]) output_text = [] if args.viterbi: dist = dist[:seq_len, :] outputs, _ = tf.contrib.crf.viterbi_decode(dist, tparams) else: dist = dist[:seq_len, :] outputs = np.argmax(dist, axis=1) index = 0 while index < seq_len: output_text.append(ivocab[outputs[index]]) index += 1 # decode to plain text output_text = " ".join(output_text) decodes.append(output_text) count += 1 sorted_inputs.reverse() decodes.reverse() outputs = [] for index in range(len(sorted_inputs)): outputs.append(decodes[sorted_keys[index]]) if not args.output_name: base_filename = os.path.basename(params.data_path) model_name = params.model_name decode_filename = base_filename + "." + model_name + ".decodes" else: decode_filename = args.output_name outfile = tf.gfile.Open(decode_filename, "w") for output in outputs: outfile.write("%s\n" % output) outfile.close()
def srl_model(features, labels, mode, params): if mode == tf.contrib.learn.ModeKeys.TRAIN: initializer = get_initializer(params) tf.get_variable_scope().set_initializer(initializer) model_fn = get_tagger_model(params.model_name, mode) features["targets"] = labels with tf.variable_scope("tagger"): loss = parallel_model(model_fn, features, params, mode) with tf.variable_scope("losses_avg"): loss_moving_avg = tf.get_variable("training_loss", initializer=100.0, trainable=False) lm = loss_moving_avg.assign(loss_moving_avg * 0.9 + loss * 0.1) tf.summary.scalar("loss_avg/total_loss", lm) with tf.control_dependencies([lm]): loss = tf.identity(loss) global_step = tf.train.get_or_create_global_step() lr = get_learning_rate_decay(params.learning_rate, global_step, params) # create optimizer if params.optimizer == "Adam": opt = tf.train.AdamOptimizer(lr, beta1=params.adam_beta1, beta2=params.adam_beta2, epsilon=params.adam_epsilon) elif params.optimizer == "Adadelta": opt = tf.train.AdadeltaOptimizer(lr, rho=params.adadelta_rho, epsilon=params.adadelta_epsilon) elif params.optimizer == "SGD": opt = tf.train.GradientDescentOptimizer(lr) elif params.optimizer == "Nadam": opt = tf.contrib.opt.NadamOptimizer(lr, beta1=params.adam_beta1, beta2=params.adam_beta2, epsilon=params.adam_epsilon) else: raise ValueError("Unknown optimizer %s" % params.optimizer) global_step = tf.train.get_global_step() tf.summary.scalar("learning_rate", lr) log_hook = tf.train.LoggingTensorHook( { "step": global_step, "loss": loss, "inputs": tf.shape(features["inputs"]), "labels": tf.shape(labels) }, every_n_iter=1, ) all_weights = {v.name: v for v in tf.trainable_variables()} total_size = 0 for v_name in sorted(list(all_weights)): v = all_weights[v_name] tf.logging.info("%s\tshape %s", v.name[:-2].ljust(80), str(v.shape).ljust(20)) v_size = int(np.prod(np.array(v.shape.as_list()))) total_size += v_size tf.logging.info("Total trainable variables size: %d", total_size) train_op = tf.contrib.layers.optimize_loss( name="training", loss=loss, global_step=global_step, learning_rate=lr, clip_gradients=params.clip_grad_norm or None, optimizer=opt, colocate_gradients_with_ops=True ) training_chief_hooks = [log_hook] predictions = None elif mode == tf.contrib.learn.ModeKeys.EVAL: model_fn = get_tagger_model(params.model_name, mode) features["targets"] = labels with tf.variable_scope("tagger"): loss, logits = model_fn(features, params) predictions = {"predictions": logits} train_op = None training_chief_hooks = None elif mode == tf.contrib.learn.ModeKeys.INFER: model_fn = get_tagger_model(params.model_name, mode) features["targets"] = labels with tf.variable_scope("tagger"): outputs, probs = model_fn(features, params) predictions = { "inputs": features["inputs"], "outputs": outputs, "distribution": probs } loss = None train_op = None training_chief_hooks = None else: raise ValueError("Unknown mode %s" % mode) spec = tf.contrib.learn.ModelFnOps( mode=mode, loss=loss, train_op=train_op, training_chief_hooks=training_chief_hooks, predictions=predictions ) return spec