Beispiel #1
0
def ensemble(args):
    if len(args.vocab_path) != len(args.checkpoints) + 1:
        raise ValueError("Unmatched vocabulary number and checkpoint number")

    # override parameters
    params = tf.contrib.training.HParams(
        data_path=args.data_path,
        model_name=args.model_name,
        vocab_path=args.vocab_path,
        model_params=args.model_params,
        device_list=args.device_list or [0],
        allow_growth=True
    )

    mparams = get_model_params(args.model_name)
    params = merge_params(params, mparams)
    params.parse(args.model_params)
    dparams = decoding_params()
    params = merge_params(params, dparams)
    params.parse(args.decoding_params)

    if args.emb_path:
        if args.emb_path.find("glove") > 0:
            emb = load_glove_embedding(args.emb_path, None)
        else:
            emb = np.loadtxt(args.emb_path).astype("float32")
    else:
        emb = None

    vocabularies = get_ensemble_vocabulary(params.vocab_path)

    model_var_lists = []
    model_params_list = []

    for i in range(len(args.checkpoints)):
        cparams = copy.copy(params)
        cparams.add_hparam("embedding", emb)
        cparams.add_hparam("vocabulary", vocabularies[i])
        model_params_list.append(cparams)

    # load checkpoints
    for checkpoint in args.checkpoints:
        var_list = tf.train.list_variables(checkpoint)
        values = {}
        reader = tf.train.load_checkpoint(checkpoint)

        for (name, shape) in var_list:
            if not name.startswith("tagger"):
                continue

            if name.find("losses_avg") >= 0:
                continue

            tensor = reader.get_tensor(name)
            values[name] = tensor

        model_var_lists.append(values)

    # build graph
    inputs = tf.placeholder(tf.int32, [None, None], "inputs")
    preds = tf.placeholder(tf.int32, [None, None], "preds")
    embedding = tf.placeholder(tf.float32, [None, None, None], "embedding")
    mask = tf.placeholder(tf.float32, [None, None], "mask")

    features = {"inputs": inputs, "preds": preds}

    if emb is not None:
        features["embedding"] = embedding
        features["mask"] = mask

    predictions = []

    for i in range(len(args.checkpoints)):
        with tf.variable_scope("tagger_%d" % i):
            model_fn = get_tagger_model(params.model_name,
                                        tf.contrib.learn.ModeKeys.INFER)
            outputs, probs = model_fn(features, model_params_list[i])
            predictions.append(probs)

    labels = []
    ivocab = {}

    for k, idx in vocabularies[0]["targets"].items():
        ivocab[idx] = k

    for idx in range(len(ivocab)):
        labels.append(ivocab[idx])

    tparams = get_transition_params(labels)

    # create session
    with tf.Session(config=session_config(params)) as sess:
        tf.global_variables_initializer().run()

        # restore variables
        all_var_list = tf.trainable_variables()

        for i in range(len(args.checkpoints)):
            uninit_var_list = []

            for v in all_var_list:
                if v.name.startswith("tagger_%d" % i):
                    uninit_var_list.append(v)

            set_variables(uninit_var_list, model_var_lists[i], "tagger_%d" % i)

        # create input_fn
        all_sorted_inputs = []
        all_sorted_keys = []
        all_input_fns = []

        for i in range(len(args.checkpoints)):
            sorted_inputs, sorted_keys, num_batches, fn = get_sorted_input_fn(
                params.data_path,
                model_params_list[i].vocabulary["inputs"],
                params.decode_batch_size * len(params.device_list),
                model_params_list[i]
            )
            all_sorted_inputs.append(sorted_inputs)
            all_sorted_keys.append(sorted_keys)
            all_input_fns.append(fn)

        decodes = []

        for i, input_fn in enumerate(all_input_fns):
            outputs = []
            for features in input_fn:
                feed_dict = {
                    inputs: features["inputs"],
                    preds: features["preds"]
                }

                if args.emb_path:
                    feed_dict[embedding] = features["embedding"]
                    feed_dict[mask] = features["mask"]

                output = sess.run(predictions[i], feed_dict=feed_dict)

                outputs.append(output)

            decodes.append(outputs)

        # ensemble
        decodes = list(zip(*decodes))
        probs = []

        for item in decodes:
            outputs = sum(item) / float(len(item))
            # [batch, max_len, num_label]
            probs.append(outputs)

        count = 0

        for item in probs:
            for dist in item:
                inputs = all_sorted_inputs[0][count]
                seq_len = len(inputs.strip().split()[1:])
                output_text = []

                if args.viterbi:
                    dist = dist[:seq_len, :]
                    outputs, _ = tf.contrib.crf.viterbi_decode(dist,
                                                               tparams)
                else:
                    dist = dist[:seq_len, :]
                    outputs = np.argmax(dist, axis=1)

                index = 0

                while index < seq_len:
                    output_text.append(ivocab[outputs[index]])
                    index += 1

                # decode to plain text
                output_text = " ".join(output_text)
                decodes.append(output_text)
                count += 1

        sorted_inputs.reverse()
        decodes.reverse()

        outputs = []

        for index in range(len(sorted_inputs)):
            outputs.append(decodes[sorted_keys[index]])

        if not args.output_name:
            base_filename = os.path.basename(params.data_path)
            model_name = params.model_name
            decode_filename = base_filename + "." + model_name + ".decodes"
        else:
            decode_filename = args.output_name

        outfile = tf.gfile.Open(decode_filename, "w")

        for output in outputs:
            outfile.write("%s\n" % output)

        outfile.close()
Beispiel #2
0
def srl_model(features, labels, mode, params):
    if mode == tf.contrib.learn.ModeKeys.TRAIN:
        initializer = get_initializer(params)
        tf.get_variable_scope().set_initializer(initializer)
        model_fn = get_tagger_model(params.model_name, mode)

        features["targets"] = labels

        with tf.variable_scope("tagger"):
            loss = parallel_model(model_fn, features, params, mode)

            with tf.variable_scope("losses_avg"):
                loss_moving_avg = tf.get_variable("training_loss",
                                                  initializer=100.0,
                                                  trainable=False)
                lm = loss_moving_avg.assign(loss_moving_avg * 0.9 + loss * 0.1)
                tf.summary.scalar("loss_avg/total_loss", lm)

                with tf.control_dependencies([lm]):
                    loss = tf.identity(loss)

        global_step = tf.train.get_or_create_global_step()
        lr = get_learning_rate_decay(params.learning_rate, global_step, params)

        # create optimizer
        if params.optimizer == "Adam":
            opt = tf.train.AdamOptimizer(lr, beta1=params.adam_beta1,
                                         beta2=params.adam_beta2,
                                         epsilon=params.adam_epsilon)
        elif params.optimizer == "Adadelta":
            opt = tf.train.AdadeltaOptimizer(lr, rho=params.adadelta_rho,
                                             epsilon=params.adadelta_epsilon)
        elif params.optimizer == "SGD":
            opt = tf.train.GradientDescentOptimizer(lr)
        elif params.optimizer == "Nadam":
            opt = tf.contrib.opt.NadamOptimizer(lr, beta1=params.adam_beta1,
                                                beta2=params.adam_beta2,
                                                epsilon=params.adam_epsilon)
        else:
            raise ValueError("Unknown optimizer %s" % params.optimizer)

        global_step = tf.train.get_global_step()
        tf.summary.scalar("learning_rate", lr)

        log_hook = tf.train.LoggingTensorHook(
            {
                "step": global_step,
                "loss": loss,
                "inputs": tf.shape(features["inputs"]),
                "labels": tf.shape(labels)
            },
            every_n_iter=1,
        )

        all_weights = {v.name: v for v in tf.trainable_variables()}
        total_size = 0

        for v_name in sorted(list(all_weights)):
            v = all_weights[v_name]
            tf.logging.info("%s\tshape    %s", v.name[:-2].ljust(80),
                            str(v.shape).ljust(20))
            v_size = int(np.prod(np.array(v.shape.as_list())))
            total_size += v_size

        tf.logging.info("Total trainable variables size: %d", total_size)

        train_op = tf.contrib.layers.optimize_loss(
            name="training",
            loss=loss,
            global_step=global_step,
            learning_rate=lr,
            clip_gradients=params.clip_grad_norm or None,
            optimizer=opt,
            colocate_gradients_with_ops=True
        )

        training_chief_hooks = [log_hook]
        predictions = None
    elif mode == tf.contrib.learn.ModeKeys.EVAL:
        model_fn = get_tagger_model(params.model_name, mode)
        features["targets"] = labels
        with tf.variable_scope("tagger"):
            loss, logits = model_fn(features, params)
        predictions = {"predictions": logits}
        train_op = None
        training_chief_hooks = None
    elif mode == tf.contrib.learn.ModeKeys.INFER:
        model_fn = get_tagger_model(params.model_name, mode)
        features["targets"] = labels
        with tf.variable_scope("tagger"):
            outputs, probs = model_fn(features, params)
        predictions = {
            "inputs": features["inputs"],
            "outputs": outputs,
            "distribution": probs
        }
        loss = None
        train_op = None
        training_chief_hooks = None
    else:
        raise ValueError("Unknown mode %s" % mode)

    spec = tf.contrib.learn.ModelFnOps(
        mode=mode, loss=loss, train_op=train_op,
        training_chief_hooks=training_chief_hooks,
        predictions=predictions
    )

    return spec