Ejemplo n.º 1
0
    def _fn(features, labels, params, mode, config):
        """model_fn implementation."""
        local_model = copy.deepcopy(model)

        if mode == tf.estimator.ModeKeys.TRAIN:
            features_shards = dispatcher.shard(features)
            labels_shards = dispatcher.shard(labels)
            losses_shards = dispatcher(_loss_op, local_model, features_shards,
                                       labels_shards, params, mode)
            loss = _extract_loss(losses_shards)
            train_op = local_model.optimize_loss(loss, params=params, hvd=hvd)
            extra_variables = []
            if isinstance(train_op, tuple):
                train_op, extra_variables = train_op

            training_hooks = []
            if extra_variables:
                training_hooks.append(
                    hooks.VariablesInitializerHook(extra_variables))
            if config is not None:
                local_model.examples_inputter.visualize(config.model_dir)
                features_length = local_model.features_inputter.get_length(
                    features)
                labels_length = (local_model.labels_inputter.get_length(labels)
                                 if not model.unsupervised else None)
                num_words = {}
                if features_length is not None:
                    num_words["source"] = tf.reduce_sum(features_length)
                if labels_length is not None:
                    num_words["target"] = tf.reduce_sum(labels_length)
                training_hooks.append(
                    hooks.LogWordsPerSecondHook(
                        num_words,
                        every_n_steps=config.save_summary_steps,
                        output_dir=config.model_dir))
            return tf.estimator.EstimatorSpec(mode,
                                              loss=loss,
                                              train_op=train_op,
                                              training_hooks=training_hooks)

        elif mode == tf.estimator.ModeKeys.EVAL:
            logits, predictions = local_model(features, labels, params, mode)
            loss = local_model.compute_loss(logits,
                                            labels,
                                            training=False,
                                            params=params)
            loss = _extract_loss(loss)
            eval_metric_ops = local_model.compute_metrics(predictions, labels)
            evaluation_hooks = []
            if predictions is not None and eval_prediction_hooks_fn is not None:
                evaluation_hooks.extend(eval_prediction_hooks_fn(predictions))
            return tf.estimator.EstimatorSpec(
                mode,
                loss=loss,
                eval_metric_ops=eval_metric_ops,
                evaluation_hooks=evaluation_hooks)

        elif mode == tf.estimator.ModeKeys.PREDICT:
            _, predictions = local_model(features, labels, params, mode)

            # Forward example index for reordering predictions.
            if "index" in features:
                predictions["index"] = features["index"]

            export_outputs = {}
            export_outputs[tf.saved_model.signature_constants.
                           DEFAULT_SERVING_SIGNATURE_DEF_KEY] = (
                               tf.estimator.export.PredictOutput(predictions))

            return tf.estimator.EstimatorSpec(mode,
                                              predictions=predictions,
                                              export_outputs=export_outputs)

        else:
            raise ValueError("Invalid mode")
Ejemplo n.º 2
0
    def _fn(features, labels, params, mode, config):
        """model_fn implementation."""
        local_model_trans = copy.deepcopy(trans_model)
        local_model_ae = copy.deepcopy(ae_model)
        decode_two = True

        if mode == tf.estimator.ModeKeys.TRAIN:
            features_shards = dispatcher.shard(features)
            labels_shards = dispatcher.shard(labels)
            losses_shards = dispatcher(_loss_op, local_model_trans,
                                       features_shards, labels_shards, params,
                                       mode)
            loss = _extract_loss(losses_shards)
            train_op = local_model_trans.optimize_loss(loss,
                                                       params=params,
                                                       hvd=hvd)
            extra_variables = []
            if isinstance(train_op, tuple):
                train_op, extra_variables = train_op

            training_hooks = []
            if extra_variables:
                training_hooks.append(
                    hooks.VariablesInitializerHook(extra_variables))
            if config is not None:
                local_model_trans.examples_inputter.visualize(config.model_dir)
                features_length = local_model_trans.features_inputter.get_length(
                    features)
                labels_length = (
                    local_model_trans.labels_inputter.get_length(labels)
                    if not trans_model.unsupervised else None)
                num_words = {}
                if features_length is not None:
                    num_words["source"] = tf.reduce_sum(features_length)
                if labels_length is not None:
                    num_words["target"] = tf.reduce_sum(labels_length)
                training_hooks.append(
                    hooks.LogWordsPerSecondHook(
                        num_words,
                        every_n_steps=config.save_summary_steps,
                        output_dir=config.model_dir))
            return tf.estimator.EstimatorSpec(mode,
                                              loss=loss,
                                              train_op=train_op,
                                              training_hooks=training_hooks)

        elif mode == tf.estimator.ModeKeys.EVAL:
            logits, predictions = local_model_trans(features, labels, params,
                                                    mode)
            loss = local_model_trans.compute_loss(logits,
                                                  labels,
                                                  training=False,
                                                  params=params)
            loss = _extract_loss(loss)
            eval_metric_ops = local_model_trans.compute_metrics(
                predictions, labels)
            evaluation_hooks = []
            if predictions is not None and eval_prediction_hooks_fn is not None:
                evaluation_hooks.extend(eval_prediction_hooks_fn(predictions))
            return tf.estimator.EstimatorSpec(
                mode,
                loss=loss,
                eval_metric_ops=eval_metric_ops,
                evaluation_hooks=evaluation_hooks)

        elif mode == tf.estimator.ModeKeys.PREDICT and not infering:
            _, predictions = local_model_trans(features, labels, params, mode)
            if "index" in features:
                predictions["index"] = features["index"]

            export_outputs = {}
            export_outputs[tf.saved_model.signature_constants.
                           DEFAULT_SERVING_SIGNATURE_DEF_KEY] = (
                               tf.estimator.export.PredictOutput(predictions))

            return tf.estimator.EstimatorSpec(mode,
                                              predictions=predictions,
                                              export_outputs=export_outputs)

        elif mode == tf.estimator.ModeKeys.PREDICT:
            print("==========================================")
            #print (local_model.name)
            print(json.dumps(params, indent=2))
            #print (constants.END_OF_SENTENCE_ID)
            #print (constants.END_OF_SENTENCE_TOKEN)
            #ae_features = {
            #          "ids": features["ids_ae"],
            #          "length": features["length_ae"],
            #          "tokens": features["tokens_ae"]
            #        }
            #ae_metadata = {
            #          "source_words_vocabulary":"../vocab/vocab.zh",
            #          "target_words_vocabulary":"../vocab/vocab.zh"
            #        }
            #local_model = load_model("../model/AutoEncoder", "auto_encoder.py", model_name=None, serialize_model=True)
            #local_model.initialize(ae_metadata)
            #_, predictions = local_model(ae_features, labels, params, mode)
            #if "index" in features:
            #  predictions["index"] = features["index"]
            #export_outputs = {}
            #export_outputs[tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY] = (
            #      tf.estimator.export.PredictOutput(predictions))
            #return tf.estimator.EstimatorSpec(mode,predictions=predictions,export_outputs=export_outputs)

            #local_model = local_model_ae
            #local_model = local_model_trans

            beam_size = params.get("beam_width", 1)

            def get_info(local_model, features):
                with tf.variable_scope(
                        local_model.name,
                        initializer=local_model._initializer(params)):
                    if not compat.reuse():
                        print("REUSING")
                        local_model._build()
                    inputs = local_model.features_inputter.make_inputs(
                        features)
                    length = local_model.features_inputter.get_length(features)

                    #inputs = local_model.features_inputter.make_inputs({"ids":features["ids"]})
                    #length = local_model.features_inputter.get_length({"length":features["length"]})
                    #_, predictions = local_model._call(features, labels, params, mode)
                    with tf.variable_scope("encoder"):
                        encoder_outputs, encoder_state, encoder_sequence_length = local_model.encoder.encode(
                            inputs, sequence_length=length, mode=mode)
                    with tf.variable_scope("decoder"):
                        batch_size = tf.shape(
                            tf.contrib.framework.nest.flatten(encoder_outputs)
                            [0])[0]
                        initial_state = tf.contrib.seq2seq.tile_batch(
                            encoder_state, multiplier=beam_size)
                        memory = tf.contrib.seq2seq.tile_batch(
                            encoder_outputs, multiplier=beam_size)
                        memory_sequence_length = tf.contrib.seq2seq.tile_batch(
                            encoder_sequence_length, multiplier=beam_size)

                        output_layer = local_model.output_layer
                        if output_layer == None:
                            output_layer = tf.layers.Dense(
                                local_model.labels_inputter.vocabulary_size,
                                use_bias=True,
                                dtype=local_model.labels_inputter.dtype)
                            output_layer.build(
                                [None, local_model.decoder.output_size])
                        embedding_fn = lambda ids: tf.nn.embedding_lookup(
                            local_model.labels_inputter.embedding, ids)
                        step_fn, initial_state = local_model.decoder.step_fn(
                            tf.estimator.ModeKeys.PREDICT,
                            batch_size * beam_size,
                            initial_state=initial_state,
                            memory=memory,
                            memory_sequence_length=memory_sequence_length,
                            dtype=local_model.labels_inputter.dtype)

                        def symbols_to_logits_fn(ids, step, state):
                            inputs = embedding_fn(ids)
                            returned_values = step_fn(
                                step, inputs, state,
                                tf.estimator.ModeKeys.PREDICT)
                            outputs, state, attention = returned_values
                            logits = output_layer(outputs)
                            return logits, state, attention

                return symbols_to_logits_fn, initial_state, batch_size

            trans_features = {
                "ids": features["ids"],
                "length": features["length"]
            }
            ae_features = {
                "ids": features["ids_ae"],
                "length": features["length_ae"]
            }
            trans_symbols_to_logits_fn, trans_initial_state, batch_size = get_info(
                local_model_trans, trans_features)
            ae_symbols_to_logits_fn, ae_initial_state, _ = get_info(
                local_model_ae, ae_features)

            start_tokens = tf.fill([batch_size],
                                   constants.START_OF_SENTENCE_ID)
            decoding_strategy = decoding_infer.BeamSearch(beam_size,
                                                          length_penalty=0,
                                                          coverage_penalty=0)
            sampler = decoding.BestSampler()
            #sampler = decoding.RandomSampler(from_top_k=1000)

            low_rate = 0.05
            ids_ori = tf.contrib.seq2seq.tile_batch(features["ids_ori"],
                                                    beam_size)
            pass_spe_token = tf.where(ids_ori > 2)
            appear_pos = tf.stack([
                pass_spe_token[:, 0],
                tf.cast(tf.gather_nd(ids_ori, pass_spe_token), tf.int64)
            ],
                                  axis=1)
            low_prob = tf.sparse_to_dense(appear_pos, [
                batch_size * beam_size,
                local_model_trans.labels_inputter.vocabulary_size
            ], low_rate, 0, False)
            print(
                "==============================================================="
            )
            print(low_prob)
            low_prob = None

            sampled_ids, sampled_length = decoding_infer.dynamic_decode(
                trans_symbols_to_logits_fn,
                ae_symbols_to_logits_fn,
                local_model_trans.name,
                local_model_ae.name,
                start_tokens,
                end_id=constants.END_OF_SENTENCE_ID,
                initial_state_trans=trans_initial_state,
                initial_state_ae=ae_initial_state,
                decoding_strategy=decoding_strategy,
                sampler=sampler,
                maximum_iterations=150,
                minimum_iterations=0,
                attention_history=False,
                attention_size=None,
                low_prob=low_prob)
            sampled_length = tf.minimum(sampled_length + 1,
                                        tf.shape(sampled_ids)[2])
            target_vocab_rev = local_model_trans.labels_inputter.vocabulary_lookup_reverse(
            )
            target_tokens = target_vocab_rev.lookup(
                tf.cast(sampled_ids, tf.int64))

            predictions = {
                "tokens": target_tokens,
                "length": sampled_length,
            }
            num_hypotheses = params.get("num_hypotheses", 1)
            if num_hypotheses > 0:
                if num_hypotheses > beam_size:
                    raise ValueError(
                        "n_best cannot be greater than beam_width")
                for key, value in six.iteritems(predictions):
                    predictions[key] = value[:, :num_hypotheses]

            if ae_model != None:
                ae_init_checkpoint = params["ae_init_checkpoint"]
                ae_variables_map = get_pretrained_variables_map(
                    ae_init_checkpoint)
                tf.contrib.framework.init_from_checkpoint(
                    ae_init_checkpoint, ae_variables_map)

            trans_init_checkpoint = params["trans_init_checkpoint"]
            trans_variables_map = get_pretrained_variables_map(
                trans_init_checkpoint)
            tf.contrib.framework.init_from_checkpoint(trans_init_checkpoint,
                                                      trans_variables_map)

            if "index" in features:
                predictions["index"] = features["index"]

            export_outputs = {}
            export_outputs[tf.saved_model.signature_constants.
                           DEFAULT_SERVING_SIGNATURE_DEF_KEY] = (
                               tf.estimator.export.PredictOutput(predictions))

            return tf.estimator.EstimatorSpec(mode,
                                              predictions=predictions,
                                              export_outputs=export_outputs)

        else:
            raise ValueError("Invalid mode")
Ejemplo n.º 3
0
        def _model_fn(features, labels, params, mode, config):
            """model_fn implementation."""
            if mode == tf.estimator.ModeKeys.TRAIN:
                counters = self._register_word_counters(features, labels)
                training_hooks = []
                if config is not None:
                    training_hooks.append(
                        hooks.CountersHook(
                            every_n_steps=config.save_summary_steps,
                            output_dir=config.model_dir,
                            counters=counters))

                features_shards = dispatcher.shard(features)
                labels_shards = dispatcher.shard(labels)

                with tf.variable_scope(self.name,
                                       initializer=self._initializer(params)):
                    losses_shards = dispatcher(_loss_op, features_shards,
                                               labels_shards, params, mode,
                                               config)

                loss = _extract_loss(losses_shards)
                train_op, extra_variables = optimize_loss(
                    loss, params, mixed_precision=(self.dtype == tf.float16))
                if extra_variables:
                    training_hooks.append(
                        hooks.VariablesInitializerHook(extra_variables))
                return tf.estimator.EstimatorSpec(
                    mode,
                    loss=loss,
                    train_op=train_op,
                    training_hooks=training_hooks)
            elif mode == tf.estimator.ModeKeys.EVAL:
                with tf.variable_scope(self.name):
                    logits, predictions = self._build(features,
                                                      labels,
                                                      params,
                                                      mode,
                                                      config=config)
                    loss = self._compute_loss(features, labels, logits, params,
                                              mode)

                loss = _extract_loss(loss)
                eval_metric_ops = self._compute_metrics(
                    features, labels, predictions)  # pylint: disable=assignment-from-none
                evaluation_hooks = []
                if predictions is not None and eval_prediction_hooks_fn is not None:
                    evaluation_hooks.extend(
                        eval_prediction_hooks_fn(predictions))
                return tf.estimator.EstimatorSpec(
                    mode,
                    loss=loss,
                    eval_metric_ops=eval_metric_ops,
                    evaluation_hooks=evaluation_hooks)
            elif mode == tf.estimator.ModeKeys.PREDICT:
                with tf.variable_scope(self.name):
                    _, predictions = self._build(features,
                                                 labels,
                                                 params,
                                                 mode,
                                                 config=config)

                # Forward example index for reordering predictions.
                if "index" in features:
                    predictions["index"] = features["index"]

                export_outputs = {}
                export_outputs[tf.saved_model.signature_constants.
                               DEFAULT_SERVING_SIGNATURE_DEF_KEY] = (
                                   tf.estimator.export.PredictOutput(
                                       predictions))

                return tf.estimator.EstimatorSpec(
                    mode,
                    predictions=predictions,
                    export_outputs=export_outputs)
            else:
                raise RuntimeError("Invalid mode")
Ejemplo n.º 4
0
    def _model_fn(features, labels, params, mode, config):
      """model_fn implementation."""
      if mode == tf.estimator.ModeKeys.TRAIN:
        counters = self._register_word_counters(features, labels)
        training_hooks = []
        if config is not None:
          training_hooks.append(hooks.CountersHook(
              every_n_steps=config.save_summary_steps,
              output_dir=config.model_dir,
              counters=counters))

        features_shards = dispatcher.shard(features)
        labels_shards = dispatcher.shard(labels)

        with tf.variable_scope(self.name, initializer=self._initializer(params)):
          losses_shards = dispatcher(
              _loss_op, features_shards, labels_shards, params, mode, config)

        loss = _extract_loss(losses_shards)



        #train_op, extra_variables = optimize_loss(
        #    loss, params, mixed_precision=(self.dtype == tf.float16))

        #TODO: freeze_update, get config, get var_list, pass var_list, move >>> DIFFERENT VARLIST FOR LSTM AND TRANSFORMER, THUS, BEFORE SLECTING WHAT VARS, CHECK IF IT EXISTS IN CKPT >> MAKE IT ARCHI DEPENDENT 
        freeze_params = params.get("freeze")
        if False: #freeze_params is not None:

            tf.logging.info("Optimizing selected network components:", freeze_params)
                            #json.dumps(freeze_params, indent=2, sort_keys=True))

            var_list = self._get_variables(freeze_params)

            train_op, extra_variables = optimize_loss(
                loss, params, mixed_precision=(self.dtype == tf.float16), var_list=var_list)
        else:
            train_op, extra_variables = optimize_loss(
                loss, params, mixed_precision=(self.dtype == tf.float16))

        #TODO: simpler way of doing it, if freeze is None, pass var_list=None, else select vars to pass.
        #train_op, extra_variables = optimize_loss(
        #    loss, params, mixed_precision=(self.dtype == tf.float16), var_list=self._get_variables(params))


        if extra_variables:
          training_hooks.append(hooks.VariablesInitializerHook(extra_variables))
        return tf.estimator.EstimatorSpec(
            mode,
            loss=loss,
            train_op=train_op,
            training_hooks=training_hooks)
      elif mode == tf.estimator.ModeKeys.EVAL:
        with tf.variable_scope(self.name):
          logits, predictions = self._build(features, labels, params, mode, config=config)
          loss = self._compute_loss(features, labels, logits, params, mode)

        loss = _extract_loss\
            (loss)
        eval_metric_ops = self._compute_metrics(features, labels, predictions)  # pylint: disable=assignment-from-none
        evaluation_hooks = []
        if predictions is not None and eval_prediction_hooks_fn is not None:
          evaluation_hooks.extend(eval_prediction_hooks_fn(predictions))
        return tf.estimator.EstimatorSpec(
            mode,
            loss=loss,
            eval_metric_ops=eval_metric_ops,
            evaluation_hooks=evaluation_hooks)
      elif mode == tf.estimator.ModeKeys.PREDICT:
        with tf.variable_scope(self.name):
          _, predictions = self._build(features, labels, params, mode, config=config)

        # Forward example index for reordering predictions.
        if "index" in features:
          predictions["index"] = features["index"]

        export_outputs = {}
        export_outputs[tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY] = (
            tf.estimator.export.PredictOutput(predictions))

        return tf.estimator.EstimatorSpec(
            mode,
            predictions=predictions,
            export_outputs=export_outputs)
      else:
        raise RuntimeError("Invalid mode")