Python ConfigureError Examples

Programming Language: Python

Namespace/Package Name: semmatch.utils.exception

Class/Type: ConfigureError

Examples at hotexamples.com: 30

Python ConfigureError - 30 examples found. These are the top rated real world Python examples of semmatch.utils.exception.ConfigureError extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

ConfigureError(30)

Frequently Used Methods

ConfigureError (30)

Example #1

Show file

    def _make_output(self, inputs, params):
        task = params.get('task', 'classification')
        task_type = params.get('task_type', 'multiclass')

        if task == 'classification':
            logits = tf.contrib.layers.fully_connected(inputs, self._num_classes, activation_fn=None, scope='logits')
            if task_type == 'multiclass':
                predictions = tf.cast(tf.argmax(logits, -1), tf.int32)
                output_score = tf.nn.softmax(logits, -1)
            elif task_type == 'multilabel':
                threshold = params.get('threshold', 0.5)
                output_score = tf.sigmoid(logits)
                predictions = tf.cast(tf.greater(output_score, threshold), tf.int32)
            elif task_type == 'topk':
                output_score = logits #tf.nn.softmax(logits, -1)
                predictions = tf.cast(tf.greater(logits, 0), tf.int32) #tf.cast(tf.argmax(logits, -1), tf.int32) #
               # predictions = tf.one_hot(predictions, depth=self._num_classes, axis=-1, dtype=tf.int32)
            else:
                raise ConfigureError("Task type %s is not support for task %s. "
                                     "Only multiclass and multilabel is support for task %s" % (task_type, task, task))
        elif task == 'rank':
            logits = tf.contrib.layers.fully_connected(inputs, 1, activation_fn=None, scope='logits')
            predictions = logits
            output_score = logits
        else:
            raise ConfigureError(
                "Task %s is not support. Only task and classification tasks are supported" % task)

        output_dict = {'logits': logits, 'predictions': {'predictions': predictions, 'output_score': output_score}}
        output_score = tf.estimator.export.PredictOutput(output_score)
        output_predictions = tf.estimator.export.PredictOutput(predictions)
        export_outputs = {"output_score": output_score}
        output_dict['export_outputs'] = export_outputs
        return output_dict

Example #2

Show file

    def load_from_files(self, directory):
        if not os.path.exists(directory):
            logger.warning("Vocabulary directory %s does not exist.", directory)
            return False
        namespaces_file = os.path.join(directory, NAMESPACE_PADDING_FILE)
        if not os.path.exists(namespaces_file):
            logger.warning("Vocabulary namespaces file %s does not exist", namespaces_file)
            return False

        vocab_filenames = [filename for filename in os.listdir(directory)
                            if filename.startswith(VOCAB_FILE[:6]) and filename.endswith(VOCAB_FILE[-4:])]
        if len(vocab_filenames) == 0:
            logger.warning("Vocabulary file %s does not exist")

        self._non_padded_namespaces = load_from_txt(namespaces_file)

        for vocab_filename in vocab_filenames:
            namespace = vocab_filename[6:-4]
            vocab_namespace_file = os.path.join(directory, vocab_filename)
            self._namespace_to_path[namespace] = vocab_namespace_file
            vocab_namespace = load_from_txt(vocab_namespace_file)
            self._index_to_token[namespace] = dict((index, token) for index, token in enumerate(vocab_namespace))
            self._token_to_index[namespace] = dict((token, index) for index, token in enumerate(vocab_namespace))

        if self.valid():
            return True
        else:
            raise ConfigureError("Vocabulary valid error")

Example #3

Show file

    def _process(self, example):
        #example['label'] = example['label'][0]
        fields: Dict[str, Field] = {}
        if 'premise' in example:
            tokenized_premise = self._tokenizer.tokenize(example['premise'])
            fields["premise"] = TextField(tokenized_premise,
                                          self._token_indexers,
                                          max_length=self._max_length)

        if 'hypothesis' in example:
            tokenized_hypothesis = self._tokenizer.tokenize(
                example['hypothesis'])
            fields["hypothesis"] = TextField(tokenized_hypothesis,
                                             self._token_indexers,
                                             max_length=self._max_length)
        if 'label' in example:
            if isinstance(example['label'], list):
                if self._num_label is None:
                    raise ConfigureError(
                        "the number of labels is not provided for multi-label classification."
                    )
                fields['label'] = MultiLabelField(example['label'],
                                                  num_label=self._num_label)
            else:
                fields['label'] = LabelField(example['label'])
        return Instance(fields)

Example #4

Show file

File: parameters.py Project: wushanzha/SemMatch

 def pop_choice(self, path, choice, default=None):
     value = self.pop(path, default)
     if value not in choice:
         raise ConfigureError(
             "value %s get by key %s is not in acceptable choices %s" %
             (value, path, str(choice)))
     return value

Example #5

Show file

    def init_from_params(cls, params, vocab):
        config_file = params.pop('config_file', None)
        if config_file is None:
            raise ConfigureError(
                "Please provide ELMo config file for ELMo embedding.")
        # weight_file = params.pop('weight_file', None)
        # if weight_file is None:
        #     logger.warning("The ELMo embedding is initialize randomly.")
        encoder_name = params.pop("encoder_name", "elmo")
        vocab_namespace = params.pop('namespace', 'elmo_characters')
        dropout_rate = params.pop_float('dropout_rate', 0.0)

        ckpt_to_initialize_from = params.pop('ckpt_to_initialize_from', None)
        weight_file = params.pop('weight_file', None)
        if ckpt_to_initialize_from is None and weight_file is None:
            logger.warning("The ELMo embedding is initialize randomly.")

        # tmp_dir = params.pop('tmp_dir', None)
        # if tmp_dir is None:
        #     if weight_file:
        #         tmp_dir = os.path.dirname(weight_file)
        #     else:
        #         tmp_dir = "./"

        params.assert_empty(cls.__name__)

        return cls(config_file=config_file,
                   ckpt_to_initialize_from=ckpt_to_initialize_from,
                   dropout_rate=dropout_rate,
                   encoder_name=encoder_name,
                   vocab_namespace=vocab_namespace,
                   weight_file=weight_file)

Example #6

Show file

    def init_from_params(cls, params, vocab):
        config_file = params.pop('config_file', None)
        if config_file is None:
            raise ConfigureError("Please provide bert config file for bert embedding.")
        old_vocab_file = params.pop('vocab_file', None)
        if old_vocab_file is None:
            logger.warning("The vocab file is not provided. We consider the embedding vocab is the same as the data "
                           "vocab acquiescently.")
        ckpt_to_initialize_from = params.pop('ckpt_to_initialize_from', None)
        if ckpt_to_initialize_from is None:
            logger.warning("The bert embedding is initialize randomly.")
        num_oov_buckets = params.pop_int("num_oov_buckets", 0)
        use_one_hot_embeddings = params.pop_bool("use_one_hot_embeddings", False)
        encoder_name = params.pop("encoder_name", "bert")
        vocab_namespace = params.pop("namespace", 'tokens')
        mask_namespace = params.pop("mask_namespace", None)
        new_vocab_file = vocab.get_vocab_path(vocab_namespace)
        new_vocab_size = vocab.get_vocab_size(vocab_namespace)
        projection_dim = params.pop_int("projection_dim", None)
        dropout_rate = params.pop_float("dropout_rate", 0.0)
        remove_bos_eos = params.pop_bool("remove_bos_eos", True)
        params.assert_empty(cls.__name__)

        return cls(config_file=config_file, ckpt_to_initialize_from=ckpt_to_initialize_from,
                   new_vocab_file=new_vocab_file, new_vocab_size=new_vocab_size, num_oov_buckets= num_oov_buckets,
                   old_vocab_file=old_vocab_file, vocab_namespace=vocab_namespace,
                   remove_bos_eos = remove_bos_eos,
                   mask_namespace=mask_namespace, projection_dim=projection_dim, dropout_rate=dropout_rate,
                   use_one_hot_embeddings=use_one_hot_embeddings, encoder_name=encoder_name)

Example #7

Show file

File: characters_indexer.py Project: wushanzha/SemMatch

 def count_vocab_items(self, token: Token, counter: Dict[str, Dict[str,
                                                                   int]]):
     if token.text is None:
         raise ConfigureError(
             'CharactersIndexer needs a tokenizer that retains text')
     for character in self._character_tokenizer.tokenize(token.text):
         # If `text_id` is set on the character token (e.g., if we're using byte encoding), we
         # will not be using the vocab for this character.
         if getattr(character, 'text_id', None) is None:
             counter[self._namespace][character.text] += 1

Example #8

Show file

    def _read(self, mode: str):
        filename = self.get_filename_by_mode(mode)
        if filename:
            file_path = os.path.join(self._data_path, filename)
            if file_path.lower().endswith("jsonl"):
                if self._field_mapping is None:
                    raise ConfigureError(
                        "field mapping is not provided for jsonl file.")
                with open(file_path, 'r') as json_file:
                    logger.info("Reading instances from jsonl dataset at: %s",
                                file_path)
                    for line in json_file:
                        fields = json.loads(line)
                        example = {}
                        for (field_tar,
                             field_src) in self._field_mapping.items():
                            example[field_tar] = fields[field_src]
                        yield self._process(example)

                        # example = {}
                        # example['premise'] = fields['answer']
                        # example['hypothesis'] = fields['question']
                        # example['label'] = fields['label']
                        # yield self._process(example)

            if file_path.lower().endswith("tsv"):
                if self._field_mapping is None:
                    raise ConfigureError(
                        "field mapping is not provided for tsv file.")
                with open(file_path, 'r') as csv_file:
                    logger.info("Reading instances from tsv dataset at: %s",
                                file_path)
                    for line in csv_file:
                        fields = line.strip().split("\t")
                        example = {}
                        for (field_tar,
                             field_src) in self._field_mapping.items():
                            example[field_tar] = fields[int(field_src)]
                        yield self._process(example)

        else:
            return None

Example #9

Show file

def takes_arg(obj, arg: str) -> bool:
    """
Checks whether the provided obj takes a certain arg.
If it's a class, we're really checking whether its constructor does.
If it's a function or method, we're checking the object itself.
Otherwise, we raise an error.
"""
    if inspect.isclass(obj):
        signature = inspect.signature(obj.__init__)
    elif inspect.ismethod(obj) or inspect.isfunction(obj):
        signature = inspect.signature(obj)
    else:
        raise ConfigureError(f"object {obj} is not callable")
    return arg in signature.parameters

Example #10

Show file

    def _make_loss(self, logits, labels, params):
        task = params.get('task', 'classification')
        task_type = params.get('task_type', 'multiclass')
        if task == 'classification':
            if task_type == 'multiclass':
                #loss = GHM_Loss().ghm_class_loss(logits=logits, targets=labels)
                loss = focal_loss(logits=logits, labels=labels)
                # loss = tf.reduce_mean(
                #     tf.nn.softmax_cross_entropy_with_logits_v2(labels=labels, logits=logits))
            elif task_type == 'multilabel':
                loss = tf.reduce_mean(
                    tf.nn.sigmoid_cross_entropy_with_logits(labels=labels, logits=logits))
            elif task_type == 'topk':
                loss = multilabel_categorical_crossentropy(labels=labels, logits=logits)
            else:
                raise ConfigureError("Task type %s is not support for task %s. "
                                     "Only multiclass and multilabel is support for task %s" % (task_type, task, task))

        elif task == 'rank':
            loss = rank_hinge_loss(labels=labels, logits=logits, params=params)
        else:
            raise ConfigureError(
                "Task %s is not support. Only task and classification tasks are supported" % task)
        return loss

Example #11

Show file

def _read_pretrained_embeddings_text(pretrained_file, embedding_dim, vocab,
                                     vocab_namespace):
    vocab_tokens = vocab.get_vocab_tokens(vocab_namespace)
    vocab_size = vocab.get_vocab_size(vocab_namespace)
    embeddings = {}
    logger.info("Reading pretrained embeddings from: %s" % pretrained_file)
    with open(pretrained_file, 'r', encoding='utf-8') as embeddings_file:
        for line in tqdm.tqdm(embeddings_file):
            token = line.split(" ", 1)[0]
            if token in vocab_tokens:
                fields = line.rstrip().split(' ')
                if len(fields) - 1 != embedding_dim:

                    logger.warning(
                        "Found line with wrong number of dimensions (expected: %d; actual: %d): %s",
                        embedding_dim,
                        len(fields) - 1, line)
                    continue

                vector = np.asarray(fields[1:], dtype='float32')
                embeddings[token] = vector

    if not embeddings:
        ConfigureError(
            "The embedding_dim or vocabulary does not fit the pretrained embedding."
        )
    all_embeddings = np.asarray(list(embeddings.values()))
    embeddings_mean = float(np.mean(all_embeddings))
    embeddings_std = float(np.std(all_embeddings))
    embedding_matrix = np.random.normal(embeddings_mean, embeddings_std,
                                        (vocab_size, embedding_dim))
    embedding_matrix = embedding_matrix.astype(np.float32)
    num_tokens_found = 0
    index_to_tokens = vocab.get_vocab_index_to_token(vocab_namespace)
    for i in range(vocab_size):
        token = index_to_tokens[i]
        if token in embeddings:
            embedding_matrix[i] = embeddings[token]
            num_tokens_found += 1
        else:
            logger.debug(
                "Token %s was not found in the embedding file. Initialising randomly.",
                token)

    logger.info("Pretrained embeddings were found for %d out of %d tokens",
                num_tokens_found, vocab_size)
    return embedding_matrix

Example #12

Show file

def rank_hinge_loss(labels, logits, params):
    num_retrieval = params.get('num_retrieval', None)
    if num_retrieval is None:
        raise ConfigureError(
            "The parameter num_retrieval is not assigned or the dataset is not support rank loss."
        )
    margin = params.get('rank_loss_margin', 1.0)
    labels = tf.argmax(labels, axis=-1)
    labels = tf.reshape(labels, (-1, num_retrieval))
    logits = tf.reshape(logits, (-1, num_retrieval))
    label_mask = tf.cast(tf.sign(labels), tf.float32)
    label_count = tf.reduce_sum(label_mask, axis=-1)
    y_pos = tf.reduce_sum(label_mask * logits, axis=-1) / label_count
    y_neg = tf.reduce_sum(
        (1. - label_mask) * logits, axis=-1) / (num_retrieval - label_count)
    loss = tf.maximum(0., margin - y_pos + y_neg)
    loss = tf.reduce_mean(loss)
    return loss

Example #13

Show file

    def tokens_to_indices(self, tokens: List[Token], vocabulary: Vocabulary):
        # TODO(brendanr): Retain the token to index mappings in the vocabulary and remove this
        # pylint pragma. See:
        # https://github.com/allenai/allennlp/blob/master/allennlp/data/token_indexers/wordpiece_indexer.py#L113
        # pylint: disable=unused-argument

        texts = [token.text for token in tokens]
        texts = [ELMoCharacterMapper.bos_token
                 ] + texts + [ELMoCharacterMapper.eos_token]
        if any(text is None for text in texts):
            raise ConfigureError(
                'ELMoTokenCharactersIndexer needs a tokenizer '
                'that retains text')
        return {
            self._namespace: [
                np.array(ELMoCharacterMapper.convert_word_to_char_ids(text),
                         dtype=np.int64) for text in texts
            ]
        }

Example #14

Show file

    def init_from_params(cls, params, vocab):
        token_embedder_params = params.pop('encoders', None)

        if token_embedder_params is not None:
            token_embedders = [
                Encoder.init_from_params(subparams, vocab=vocab)
                for name, subparams in token_embedder_params.items()
            ]
            # if isinstance(token_embedder_params, Dict):
            #
            # else:
            #     token_embedders = [
            #         Encoder.init_from_params(subparams, vocab=vocab)
            #         for subparams in token_embedder_params
            #     ]
        else:
            raise ConfigureError("The parameters of embeddings is not provided.")

        params.assert_empty(cls.__name__)
        return cls(token_embedders)

Example #15

Show file

File: characters_indexer.py Project: wushanzha/SemMatch

 def tokens_to_indices(self, tokens: List[Token], vocabulary: Vocabulary):
     indices = []
     for token in itertools.chain(self._start_tokens, tokens,
                                  self._end_tokens):
         token_indices = np.zeros(self._max_word_length, dtype=np.int64)
         if token.text is None:
             raise ConfigureError(
                 'TokenCharactersIndexer needs a tokenizer that retains text'
             )
         for character_idx, character in enumerate(
                 self._character_tokenizer.tokenize(token.text)):
             if character_idx >= self._max_word_length:
                 break
             else:
                 if getattr(character, 'text_id', None) is not None:
                     # `text_id` being set on the token means that we aren't using the vocab, we just
                     # use this id instead.
                     index = character.text_id
                 else:
                     index = vocabulary.get_token_index(
                         character.text, self._namespace)
                 token_indices[character_idx] = index
         indices.append(token_indices)
     return {self._namespace: indices}

Example #16

Show file

File: parameters.py Project: wushanzha/SemMatch

 def assert_empty(self, class_name):
     if self._params:
         raise ConfigureError(
             "Extra parameters are provided %s for class %s" %
             (str(self._params), class_name))

Example #17

Show file

    def forward(self, features, labels, mode, params):
        features_embedding = self._embedding_mapping.forward(
            features, labels, mode, params)
        with tf.variable_scope(self._model_name):
            is_training = (mode == tf.estimator.ModeKeys.TRAIN)

            premise_tokens_ids = features.get('premise/tokens', None)
            if premise_tokens_ids is None:
                premise_tokens_ids = features.get('premise/elmo_characters',
                                                  None)
            hypothesis_tokens_ids = features.get('hypothesis/tokens', None)
            if hypothesis_tokens_ids is None:
                hypothesis_tokens_ids = features.get(
                    'hypothesis/elmo_characters', None)

            if premise_tokens_ids is None:
                raise ConfigureError(
                    "The input features should contain premise with vocabulary namespace tokens "
                    "or elmo_characters.")
            if hypothesis_tokens_ids is None:
                raise ConfigureError(
                    "The input features should contain hypothesis with vocabulary namespace tokens "
                    "or elmo_characters.")

            prem_seq_lengths, prem_mask = nn.length(premise_tokens_ids)
            hyp_seq_lengths, hyp_mask = nn.length(hypothesis_tokens_ids)
            if features.get(
                    'premise/elmo_characters', None) is not None or isinstance(
                        self._embedding_mapping.get_encoder('tokens'), Bert):
                prem_mask = nn.remove_bos_eos(prem_mask, prem_seq_lengths)
                prem_seq_lengths -= 2
            if features.get('hypothesis/elmo_characters',
                            None) is not None or isinstance(
                                self._embedding_mapping.get_encoder('tokens'),
                                Bert):
                hyp_mask = nn.remove_bos_eos(hyp_mask, hyp_seq_lengths)
                hyp_seq_lengths -= 2
            prem_mask = tf.expand_dims(prem_mask, -1)
            hyp_mask = tf.expand_dims(hyp_mask, -1)

            premise_tokens = features_embedding.get('premise/tokens', None)
            if premise_tokens is None:
                premise_tokens = features_embedding.get(
                    'premise/elmo_characters', None)
            hypothesis_tokens = features_embedding.get('hypothesis/tokens',
                                                       None)
            if hypothesis_tokens is None:
                hypothesis_tokens = features_embedding.get(
                    'hypothesis/elmo_characters', None)

            h_s, c1 = nn.lstm(premise_tokens,
                              self._hidden_dim,
                              seq_len=prem_seq_lengths,
                              name='premise')
            h_t, c2 = nn.lstm(hypothesis_tokens,
                              self._hidden_dim,
                              seq_len=hyp_seq_lengths,
                              name='hypothesis')

        lstm_m = MatchLSTMCell(self._hidden_dim, h_s, prem_mask)

        k_m, _ = tf.nn.dynamic_rnn(lstm_m,
                                   h_t,
                                   hyp_seq_lengths,
                                   dtype=tf.float32)

        k_valid = select(k_m, hyp_seq_lengths)
        output_dict = self._make_output(k_valid, params)

        if mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL:
            if 'label/labels' not in features:
                raise ConfigureError(
                    "The input features should contain label with vocabulary namespace "
                    "labels int %s dataset." % mode)
            labels_embedding = features_embedding['label/labels']
            labels = features['label/labels']

            loss = self._make_loss(labels=labels_embedding,
                                   logits=output_dict['logits'],
                                   params=params)
            output_dict['loss'] = loss
            metrics = dict()
            metrics['accuracy'] = tf.metrics.accuracy(
                labels=labels, predictions=output_dict['predictions'])
            metrics['precision'] = tf.metrics.precision(
                labels=labels, predictions=output_dict['predictions'])
            metrics['recall'] = tf.metrics.recall(
                labels=labels, predictions=output_dict['predictions'])
            # metrics['auc'] = tf.metrics.auc(labels=labels, predictions=predictions)
            output_dict['metrics'] = metrics
            # output_dict['debugs'] = [hypothesis_tokens, premise_tokens, hypothesis_bi, premise_bi,
            #                          premise_ave, hypothesis_ave, diff, mul, h, h_mlp, logits]
        return output_dict

Example #18

Show file

    def forward(self, features, labels, mode, params):
        features_embedding = self._embedding_mapping.forward(
            features, labels, mode, params)
        with tf.variable_scope(self._model_name):
            is_training = (mode == tf.estimator.ModeKeys.TRAIN)

            premise_tokens_ids = features.get('premise/tokens', None)
            if premise_tokens_ids is None:
                premise_tokens_ids = features.get('premise/elmo_characters',
                                                  None)

            if premise_tokens_ids is None:
                raise ConfigureError(
                    "The input features should contain premise with vocabulary namespace tokens "
                    "or elmo_characters.")

            prem_seq_lengths, prem_mask = nn.length(premise_tokens_ids)
            if features.get(
                    'premise/elmo_characters', None) is not None or isinstance(
                        self._embedding_mapping.get_encoder('tokens'), Bert):
                prem_mask = nn.remove_bos_eos(prem_mask, prem_seq_lengths)
                prem_seq_lengths -= 2

            prem_mask = tf.expand_dims(prem_mask, -1)

            premise_tokens = features_embedding.get('premise/tokens', None)
            if premise_tokens is None:
                premise_tokens = features_embedding.get(
                    'premise/elmo_characters', None)

            premise_outs, c1 = nn.bi_lstm(premise_tokens,
                                          self._hidden_dim,
                                          seq_len=prem_seq_lengths,
                                          name='premise')

            premise_bi = tf.concat(premise_outs, axis=2)

            premise_bi = premise_bi * prem_mask

            eps = 1e-11
            ### Mean pooling
            premise_sum = tf.reduce_sum(premise_bi, 1)
            premise_ave = tf.div(
                premise_sum,
                tf.expand_dims(tf.cast(prem_seq_lengths, tf.float32), -1) +
                eps)

            # MLP layer
            h_mlp = tf.contrib.layers.fully_connected(premise_ave,
                                                      self._hidden_dim,
                                                      scope='fc1')
            # Dropout applied to classifier
            h_drop = tf.layers.dropout(h_mlp,
                                       self._dropout_rate,
                                       training=is_training)
            # Get prediction
            output_dict = self._make_output(h_drop, params)

            if mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL:
                if 'label/labels' not in features:
                    raise ConfigureError(
                        "The input features should contain label with vocabulary namespace "
                        "labels int %s dataset." % mode)
                labels_embedding = features_embedding['label/labels']
                labels = features['label/labels']

                loss = self._make_loss(labels=labels_embedding,
                                       logits=output_dict['logits'],
                                       params=params)
                output_dict['loss'] = loss
                metrics = dict()
                metrics['accuracy'] = tf.metrics.accuracy(
                    labels=labels, predictions=output_dict['predictions'])
                metrics['precision'] = tf.metrics.precision(
                    labels=labels, predictions=output_dict['predictions'])
                metrics['recall'] = tf.metrics.recall(
                    labels=labels, predictions=output_dict['predictions'])
                metrics['map'] = tf.metrics.average_precision_at_k(
                    labels=tf.cast(labels, tf.int64),
                    predictions=output_dict['logits'],
                    k=2)
                metrics['precision_1'] = tf.metrics.precision_at_k(
                    labels=tf.cast(labels, tf.int64),
                    predictions=output_dict['logits'],
                    k=1,
                    class_id=1)

                #tf.metrics.auc(labels=labels, predictions=predictions)
                output_dict['metrics'] = metrics
                # output_dict['debugs'] = [hypothesis_tokens, premise_tokens, hypothesis_bi, premise_bi,
                #                          premise_ave, hypothesis_ave, diff, mul, h, h_mlp, logits]
            return output_dict

Example #19

Show file

File: decomp_attn.py Project: wushanzha/SemMatch

    def forward(self, features, labels, mode, params):
        features_embedding = self._embedding_mapping.forward(
            features, labels, mode, params)
        with tf.variable_scope(self._model_name):
            is_training = (mode == tf.estimator.ModeKeys.TRAIN)

            premise_tokens_ids = features.get('premise/tokens', None)
            if premise_tokens_ids is None:
                premise_tokens_ids = features.get('premise/elmo_characters',
                                                  None)
            hypothesis_tokens_ids = features.get('hypothesis/tokens', None)
            if hypothesis_tokens_ids is None:
                hypothesis_tokens_ids = features.get(
                    'hypothesis/elmo_characters', None)

            if premise_tokens_ids is None:
                raise ConfigureError(
                    "The input features should contain premise with vocabulary namespace tokens "
                    "or elmo_characters.")
            if hypothesis_tokens_ids is None:
                raise ConfigureError(
                    "The input features should contain hypothesis with vocabulary namespace tokens "
                    "or elmo_characters.")

            prem_seq_lengths, prem_mask = nn.length(premise_tokens_ids)
            hyp_seq_lengths, hyp_mask = nn.length(hypothesis_tokens_ids)
            if features.get(
                    'premise/elmo_characters', None) is not None or isinstance(
                        self._embedding_mapping.get_encoder('tokens'), Bert):
                prem_mask = nn.remove_bos_eos(prem_mask, prem_seq_lengths)
                prem_seq_lengths -= 2
            if features.get('hypothesis/elmo_characters',
                            None) is not None or isinstance(
                                self._embedding_mapping.get_encoder('tokens'),
                                Bert):
                hyp_mask = nn.remove_bos_eos(hyp_mask, hyp_seq_lengths)
                hyp_seq_lengths -= 2
            # prem_mask = tf.expand_dims(prem_mask, -1)
            # hyp_mask = tf.expand_dims(hyp_mask, -1)

            premise_tokens = features_embedding.get('premise/tokens', None)
            if premise_tokens is None:
                premise_tokens = features_embedding.get(
                    'premise/elmo_characters', None)
            hypothesis_tokens = features_embedding.get('hypothesis/tokens',
                                                       None)
            if hypothesis_tokens is None:
                hypothesis_tokens = features_embedding.get(
                    'hypothesis/elmo_characters', None)

            with tf.variable_scope("Attend"):
                F_a_bar = self._feedForwardBlock(premise_tokens,
                                                 self._hidden_dim,
                                                 'F',
                                                 is_training=is_training)
                F_b_bar = self._feedForwardBlock(hypothesis_tokens,
                                                 self._hidden_dim,
                                                 'F',
                                                 isReuse=True,
                                                 is_training=is_training)

                # e_i,j = F'(a_hat, b_hat) = F(a_hat).T * F(b_hat) (1)
                #alignment_attention = Attention(self.hidden_size, self.hidden_size)
                #alpha = alignment_attention(F_b_bar, F_a_bar, keys_mask=self.query_mask)
                #beta = alignment_attention(F_a_bar, F_b_bar, keys_mask=self.doc_mask)
                alpha, beta = nn.bi_uni_attention(F_a_bar,
                                                  F_b_bar,
                                                  query_len=prem_seq_lengths,
                                                  key_len=hyp_seq_lengths)

            with tf.variable_scope("Compare"):
                a_beta = tf.concat([premise_tokens, alpha], axis=2)
                b_alpha = tf.concat([hypothesis_tokens, beta], axis=2)

                # v_1,i = G([a_bar_i, beta_i])
                # v_2,j = G([b_bar_j, alpha_j]) (3)
                v_1 = self._feedForwardBlock(a_beta,
                                             self._hidden_dim,
                                             'G',
                                             is_training=is_training)
                v_2 = self._feedForwardBlock(b_alpha,
                                             self._hidden_dim,
                                             'G',
                                             isReuse=True,
                                             is_training=is_training)

            with tf.variable_scope("Aggregate"):
                # v1 = \sum_{i=1}^l_a v_{1,i}
                # v2 = \sum_{j=1}^l_b v_{2,j} (4)
                v1_sum = tf.reduce_sum(v_1, axis=1)
                v2_sum = tf.reduce_sum(v_2, axis=1)

                # y_hat = H([v1, v2]) (5)
                v = tf.concat([v1_sum, v2_sum], axis=1)

                ff_outputs = self._feedForwardBlock(v,
                                                    self._hidden_dim,
                                                    'H',
                                                    is_training=is_training)

                output_dict = self._make_output(ff_outputs, params)

            if mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL:
                if 'label/labels' not in features:
                    raise ConfigureError(
                        "The input features should contain label with vocabulary namespace "
                        "labels int %s dataset." % mode)
                labels_embedding = features_embedding['label/labels']
                labels = features['label/labels']

                loss = self._make_loss(labels=labels_embedding,
                                       logits=output_dict['logits'],
                                       params=params)
                output_dict['loss'] = loss
                metrics = dict()
                metrics['accuracy'] = tf.metrics.accuracy(
                    labels=labels, predictions=output_dict['predictions'])
                metrics['precision'] = tf.metrics.precision(
                    labels=labels, predictions=output_dict['predictions'])
                metrics['recall'] = tf.metrics.recall(
                    labels=labels, predictions=output_dict['predictions'])
                #metrics['auc'] = tf.metrics.auc(labels=labels, predictions=predictions)
                output_dict['metrics'] = metrics
                # output_dict['debugs'] = [tf.shape(hypothesis_tokens), tf.shape(premise_tokens),
                #                          tf.shape(alpha), tf.shape(beta)]
            return output_dict

Example #20

Show file

    def forward(self, features, labels, mode, params):
        eps = 1e-12
        features_embedding = self._embedding_mapping.forward(
            features, labels, mode, params)
        with tf.variable_scope(self._model_name):
            is_training = (mode == tf.estimator.ModeKeys.TRAIN)

            premise_tokens_ids = features.get('premise/tokens', None)
            if premise_tokens_ids is None:
                premise_tokens_ids = features.get('premise/elmo_characters',
                                                  None)
            hypothesis_tokens_ids = features.get('hypothesis/tokens', None)
            if hypothesis_tokens_ids is None:
                hypothesis_tokens_ids = features.get(
                    'hypothesis/elmo_characters', None)

            if premise_tokens_ids is None:
                raise ConfigureError(
                    "The input features should contain premise with vocabulary namespace tokens "
                    "or elmo_characters.")
            if hypothesis_tokens_ids is None:
                raise ConfigureError(
                    "The input features should contain hypothesis with vocabulary namespace tokens "
                    "or elmo_characters.")

            premise_tokens = features_embedding.get('premise/tokens', None)
            if premise_tokens is None:
                premise_tokens = features_embedding.get(
                    'premise/elmo_characters', None)
            hypothesis_tokens = features_embedding.get('hypothesis/tokens',
                                                       None)
            if hypothesis_tokens is None:
                hypothesis_tokens = features_embedding.get(
                    'hypothesis/elmo_characters', None)

            s = self._max_length
            d0 = premise_tokens.get_shape()[2]

            # zero padding to inputs for wide convolution

            def pad_for_wide_conv(x):
                return tf.pad(
                    x,
                    np.array([[0, 0], [0, 0],
                              [self._kernel_size - 1, self._kernel_size - 1],
                              [0, 0]]),
                    "CONSTANT",
                    name="pad_wide_conv")

            def cos_sim(v1, v2):
                norm1 = tf.sqrt(tf.reduce_sum(tf.square(v1), axis=1))
                norm2 = tf.sqrt(tf.reduce_sum(tf.square(v2), axis=1))
                dot_products = tf.reduce_sum(v1 * v2, axis=1, name="cos_sim")

                return dot_products / (norm1 * norm2 + eps)

            def make_attention_mat(x1, x2):
                # x1, x2 = [batch, height, width, 1] = [batch, d, s, 1]
                # x2 => [batch, height, 1, width]
                # [batch, width, wdith] = [batch, s, s]
                euclidean = tf.sqrt(
                    tf.reduce_sum(tf.square(x1 - tf.matrix_transpose(x2)),
                                  axis=1) + eps)
                return 1.0 / (1.0 + euclidean)

            def convolution(name_scope, x, d, reuse):
                with tf.name_scope(name_scope + "-conv"):
                    with tf.variable_scope("conv") as scope:
                        conv = tf.contrib.layers.conv2d(
                            inputs=x,
                            num_outputs=self._hidden_dim,
                            kernel_size=(d, self._kernel_size),
                            stride=1,
                            padding="VALID",
                            activation_fn=tf.nn.tanh,
                            weights_initializer=tf.contrib.layers.
                            xavier_initializer_conv2d(),
                            #weights_regularizer=tf.contrib.layers.l2_regularizer(scale=l2_reg),
                            biases_initializer=tf.constant_initializer(1e-04),
                            reuse=reuse,
                            trainable=True,
                            scope=scope)
                        # Weight: [filter_height, filter_width, in_channels, out_channels]
                        # output: [batch, 1, input_width+filter_Width-1, out_channels] == [batch, 1, s+w-1, di]

                        # [batch, di, s+w-1, 1]
                        conv_trans = tf.transpose(conv, [0, 3, 2, 1],
                                                  name="conv_trans")
                        return conv_trans

            def w_pool(variable_scope, x, attention):
                # x: [batch, di, s+w-1, 1]
                # attention: [batch, s+w-1]
                with tf.variable_scope(variable_scope + "-w_pool"):
                    if self._model_type == "ABCNN2" or self._model_type == "ABCNN3":
                        pools = []
                        # [batch, s+w-1] => [batch, 1, s+w-1, 1]
                        attention = tf.transpose(
                            tf.expand_dims(tf.expand_dims(attention, -1), -1),
                            [0, 2, 1, 3])

                        for i in range(s):
                            # [batch, di, w, 1], [batch, 1, w, 1] => [batch, di, 1, 1]
                            pools.append(
                                tf.reduce_sum(
                                    x[:, :, i:i + self._kernel_size, :] *
                                    attention[:, :,
                                              i:i + self._kernel_size, :],
                                    axis=2,
                                    keep_dims=True))

                        # [batch, di, s, 1]
                        w_ap = tf.concat(pools, axis=2, name="w_ap")
                    else:
                        w_ap = tf.layers.average_pooling2d(
                            inputs=x,
                            # (pool_height, pool_width)
                            pool_size=(1, self._kernel_size),
                            strides=1,
                            padding="VALID",
                            name="w_ap")
                        # [batch, di, s, 1]

                    return w_ap

            def all_pool(variable_scope, x):
                with tf.variable_scope(variable_scope + "-all_pool"):
                    if variable_scope.startswith("input"):
                        pool_width = s
                        d = d0
                    else:
                        pool_width = s + self._kernel_size - 1
                        d = self._hidden_dim

                    all_ap = tf.layers.average_pooling2d(
                        inputs=x,
                        # (pool_height, pool_width)
                        pool_size=(1, pool_width),
                        strides=1,
                        padding="VALID",
                        name="all_ap")
                    # [batch, di, 1, 1]

                    # [batch, di]
                    all_ap_reshaped = tf.reshape(all_ap, [-1, d])
                    # all_ap_reshaped = tf.squeeze(all_ap, [2, 3])

                    return all_ap_reshaped

            def CNN_layer(variable_scope, x1, x2, d):
                # x1, x2 = [batch, d, s, 1]
                with tf.variable_scope(variable_scope):
                    if self._model_type == "ABCNN1" or self._model_type == "ABCNN3":
                        with tf.name_scope("att_mat"):
                            aW = tf.get_variable(
                                name="aW",
                                shape=(s, d),
                                initializer=tf.contrib.layers.
                                xavier_initializer(),
                                #regularizer=tf.contrib.layers.l2_regularizer(scale=l2_reg)
                            )

                            # [batch, s, s]
                            att_mat = make_attention_mat(x1, x2)

                            # [batch, s, s] * [s,d] => [batch, s, d]
                            # matrix transpose => [batch, d, s]
                            # expand dims => [batch, d, s, 1]
                            x1_a = tf.expand_dims(
                                tf.matrix_transpose(
                                    tf.einsum("ijk,kl->ijl", att_mat, aW)), -1)
                            x2_a = tf.expand_dims(
                                tf.matrix_transpose(
                                    tf.einsum("ijk,kl->ijl",
                                              tf.matrix_transpose(att_mat),
                                              aW)), -1)

                            # [batch, d, s, 2]
                            x1 = tf.concat([x1, x1_a], axis=3)
                            x2 = tf.concat([x2, x2_a], axis=3)

                    left_conv = convolution(name_scope="left",
                                            x=pad_for_wide_conv(x1),
                                            d=d,
                                            reuse=False)
                    right_conv = convolution(name_scope="right",
                                             x=pad_for_wide_conv(x2),
                                             d=d,
                                             reuse=True)

                    left_attention, right_attention = None, None

                    if self._model_type == "ABCNN2" or self._model_type == "ABCNN3":
                        # [batch, s+w-1, s+w-1]
                        att_mat = make_attention_mat(left_conv, right_conv)
                        # [batch, s+w-1], [batch, s+w-1]
                        left_attention, right_attention = tf.reduce_sum(
                            att_mat, axis=2), tf.reduce_sum(att_mat, axis=1)

                    left_wp = w_pool(variable_scope="left",
                                     x=left_conv,
                                     attention=left_attention)
                    left_ap = all_pool(variable_scope="left", x=left_conv)
                    right_wp = w_pool(variable_scope="right",
                                      x=right_conv,
                                      attention=right_attention)
                    right_ap = all_pool(variable_scope="right", x=right_conv)

                    return left_wp, left_ap, right_wp, right_ap

            x1_expanded = tf.expand_dims(
                tf.transpose(premise_tokens, [0, 2, 1]), -1)
            x2_expanded = tf.expand_dims(
                tf.transpose(hypothesis_tokens, [0, 2, 1]), -1)

            LO_0 = all_pool(variable_scope="input-left", x=x1_expanded)
            RO_0 = all_pool(variable_scope="input-right", x=x2_expanded)

            LI_1, LO_1, RI_1, RO_1 = CNN_layer(variable_scope="CNN-1",
                                               x1=x1_expanded,
                                               x2=x2_expanded,
                                               d=d0)
            sims = [cos_sim(LO_0, RO_0), cos_sim(LO_1, RO_1)]

            #if self._num_layers > 1:
            for i in range(1, self._num_layers):
                _, LO_2, _, RO_2 = CNN_layer(variable_scope="CNN-2",
                                             x1=LI_1,
                                             x2=RI_1,
                                             d=self._hidden_dim)
                # self.test = LO_2
                # self.test2 = RO_2
                sims.append(cos_sim(LO_2, RO_2))

            with tf.variable_scope("output-layer"):
                output_features = tf.concat([tf.stack(sims, axis=1)],
                                            axis=1,
                                            name="output_features")

                output_dict = self._make_output(output_features, params)

            if mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL:
                if 'label/labels' not in features:
                    raise ConfigureError(
                        "The input features should contain label with vocabulary namespace "
                        "labels int %s dataset." % mode)
                labels_embedding = features_embedding['label/labels']
                labels = features['label/labels']

                loss = self._make_loss(labels=labels_embedding,
                                       logits=output_dict['logits'],
                                       params=params)
                output_dict['loss'] = loss
                metrics = dict()
                metrics['accuracy'] = tf.metrics.accuracy(
                    labels=labels, predictions=output_dict['predictions'])
                metrics['precision'] = tf.metrics.precision(
                    labels=labels, predictions=output_dict['predictions'])
                metrics['recall'] = tf.metrics.recall(
                    labels=labels, predictions=output_dict['predictions'])
                metrics['auc'] = tf.metrics.auc(
                    labels=labels, predictions=output_dict['predictions'])
                output_dict['metrics'] = metrics
            return output_dict

Example #21

Show file

File: drr_net.py Project: wushanzha/SemMatch

    def forward(self, features, labels, mode, params):
        features_embedding = self._embedding_mapping.forward(
            features, labels, mode, params)
        with tf.variable_scope(self._model_name):
            is_training = (mode == tf.estimator.ModeKeys.TRAIN)

            #########Word Embedding####################
            premise_tokens_ids = features.get('premise/tokens', None)
            if premise_tokens_ids is None:
                premise_tokens_ids = features.get('premise/elmo_characters',
                                                  None)
            hypothesis_tokens_ids = features.get('hypothesis/tokens', None)
            if hypothesis_tokens_ids is None:
                hypothesis_tokens_ids = features.get(
                    'hypothesis/elmo_characters', None)

            if premise_tokens_ids is None:
                raise ConfigureError(
                    "The input features should contain premise with vocabulary namespace tokens "
                    "or elmo_characters.")
            if hypothesis_tokens_ids is None:
                raise ConfigureError(
                    "The input features should contain hypothesis with vocabulary namespace tokens "
                    "or elmo_characters.")

            prem_seq_lengths, prem_mask = nn.length(premise_tokens_ids)
            hyp_seq_lengths, hyp_mask = nn.length(hypothesis_tokens_ids)
            if features.get(
                    'premise/elmo_characters', None) is not None or isinstance(
                        self._embedding_mapping.get_encoder('tokens'), Bert):
                prem_mask = nn.remove_bos_eos(prem_mask, prem_seq_lengths)
                prem_seq_lengths -= 2
            if features.get('hypothesis/elmo_characters',
                            None) is not None or isinstance(
                                self._embedding_mapping.get_encoder('tokens'),
                                Bert):
                hyp_mask = nn.remove_bos_eos(hyp_mask, hyp_seq_lengths)
                hyp_seq_lengths -= 2
            prem_mask = tf.expand_dims(prem_mask, -1)
            hyp_mask = tf.expand_dims(hyp_mask, -1)

            premise_ins = []
            hypothesis_ins = []

            premise_tokens = features_embedding.get('premise/tokens', None)
            if premise_tokens is None:
                premise_tokens = features_embedding.get(
                    'premise/elmo_characters', None)
            hypothesis_tokens = features_embedding.get('hypothesis/tokens',
                                                       None)
            if hypothesis_tokens is None:
                hypothesis_tokens = features_embedding.get(
                    'hypothesis/elmo_characters', None)

            premise_ins.append(premise_tokens)
            hypothesis_ins.append(hypothesis_tokens)

            premise_chars = features_embedding.get('premise/chars', None)
            hypothesis_chars = features_embedding.get('hypothesis/chars', None)

            if premise_chars is not None and hypothesis_chars is not None:
                with tf.variable_scope("conv") as scope:
                    conv_pre = nn.multi_conv1d_max(
                        premise_chars,
                        self._char_filter_size,
                        self._char_filter_channel_dims,
                        "VALID",
                        is_training,
                        self._dropout_rate,
                        scope='conv')
                    scope.reuse_variables()
                    conv_hyp = nn.multi_conv1d_max(
                        hypothesis_chars,
                        self._char_filter_size,
                        self._char_filter_channel_dims,
                        "VALID",
                        is_training,
                        self._dropout_rate,
                        scope='conv')
                    # conv_pre = tf.reshape(conv_pre, [-1, self.sequence_length, config.char_out_size])
                    # conv_hyp = tf.reshape(conv_hyp, [-1, self.sequence_length, config.char_out_size])

                    premise_ins.append(conv_pre)
                    hypothesis_ins.append(conv_hyp)

            premise_pos = features_embedding.get('premise/pos_tags', None)
            hypothesis_pos = features_embedding.get('hypothesis/pos_tags',
                                                    None)

            if premise_pos is not None and hypothesis_pos is not None:
                premise_ins.append(premise_pos)
                hypothesis_ins.append(hypothesis_pos)

            premise_exact_match = features.get('premise/exact_match_labels',
                                               None)
            hypothesis_exact_match = features.get(
                'hypothesis/exact_match_labels', None)

            if premise_exact_match is not None and hypothesis_exact_match is not None:
                premise_ins.append(
                    tf.expand_dims(tf.cast(premise_exact_match, tf.float32),
                                   -1))
                hypothesis_ins.append(
                    tf.expand_dims(tf.cast(hypothesis_exact_match, tf.float32),
                                   -1))

            premise_in = tf.concat(premise_ins, axis=2)
            hypothesis_in = tf.concat(hypothesis_ins, axis=2)

            premise_in = nn.highway_network(premise_in,
                                            2,
                                            output_size=self._hidden_dim,
                                            dropout_rate=self._dropout_rate,
                                            is_trainging=is_training,
                                            scope="premise_highway")
            hypothesis_in = nn.highway_network(hypothesis_in,
                                               2,
                                               output_size=self._hidden_dim,
                                               dropout_rate=self._dropout_rate,
                                               is_trainging=is_training,
                                               scope="hypothesis_highway")

            ########Attention Stack-GRU################
            def gru_network(input, input_len, name="gru_network"):
                with tf.variable_scope(name):
                    gru_input = input
                    for i in range(self._num_rnn_layer):
                        with tf.variable_scope("layer_%s" % i):
                            seq, c1 = nn.gru(gru_input,
                                             self._hidden_dim,
                                             seq_len=input_len,
                                             initializer=self._initializer)
                            gru_input = tf.concat([gru_input, seq], axis=2)
                return gru_input

            premise_gru = gru_network(premise_in,
                                      prem_seq_lengths,
                                      name='premise_gru_network')
            hypothesis_gru = gru_network(hypothesis_in,
                                         hyp_seq_lengths,
                                         name='hypothesis_gru_network')

            premise_gru = premise_gru * prem_mask
            hypothesis_gru = hypothesis_gru * hyp_mask
            #########
            premise_att = nn.attention_pool(premise_gru,
                                            self._hidden_dim,
                                            seq_len=prem_seq_lengths,
                                            initializer=self._initializer,
                                            name='premise_attention_pool')
            hypothesis_att = nn.attention_pool(
                hypothesis_gru,
                self._hidden_dim,
                seq_len=hyp_seq_lengths,
                initializer=self._initializer,
                name='hypothesis_attention_pool')

            ############Dynamic Re-read Mechanism################

            def dynamic_reread(h_seq_a,
                               h_a,
                               h_b,
                               h_a_len,
                               name="dymanic_reread"):
                with tf.variable_scope(name):
                    h_a_pre = h_a
                    # h_a_pre = nn.highway_layer(h_a, self._hidden_dim, initializer=self._initializer,
                    #                            scope="h_a_pre_highway")
                    # h_seq_a = nn.highway_layer(h_seq_a, self._hidden_dim, initializer=self._initializer,
                    #                            scope="h_seq_a_highway")
                    # h_b = nn.highway_layer(h_b, self._hidden_dim, initializer=self._initializer,
                    #                        scope="h_b_highway")
                    #####
                    w_d = tf.get_variable(
                        "w_d_weights",
                        (h_seq_a.shape[-1].value, h_a_pre.shape[-1].value),
                        initializer=self._initializer)
                    u_d = tf.get_variable(
                        "u_d_weights",
                        (h_a_pre.shape[-1].value, h_a_pre.shape[-1].value),
                        initializer=self._initializer)
                    m_d = tf.get_variable(
                        "m_d_weights",
                        (h_b.shape[-1].value, h_a_pre.shape[-1].value),
                        initializer=self._initializer)
                    omega_d = tf.get_variable("omega_d_weights",
                                              (h_a_pre.shape[-1].value, 1),
                                              initializer=self._initializer)
                    ##########
                    m_d_h_b = tf.tensordot(h_b, m_d, axes=[-1, 0])
                    h_seq_a_w_d = tf.tensordot(h_seq_a, w_d, axes=[-1, 0])

                    if h_a_len is not None:
                        mask = tf.expand_dims(tf.sequence_mask(
                            h_a_len, tf.shape(h_seq_a)[1], dtype=tf.float32),
                                              axis=2)
                    else:
                        mask = None
                    gru_cell = tf.nn.rnn_cell.GRUCell(
                        h_a_pre.shape[-1].value,
                        kernel_initializer=self._initializer)

                    for i in range(self._reread_length):
                        u_d_h_a_pre = tf.tensordot(h_a_pre, u_d, axes=[-1, 0])
                        m_a = tf.nn.tanh(
                            h_seq_a_w_d +
                            tf.expand_dims(m_d_h_b + u_d_h_a_pre, 1))
                        m_a = tf.tensordot(m_a, omega_d, axes=[-1, 0])
                        if mask is not None:
                            m_a = m_a + (1. - mask) * tf.float32.min
                        alpha = tf.nn.softmax(self._beta * m_a, axis=1)
                        alpha = tf.reduce_sum(alpha * h_seq_a, axis=1)
                        gru_output, gru_state = gru_cell(alpha, h_a_pre)
                        h_a_pre = gru_state
                    return gru_output

            premise_v = dynamic_reread(premise_gru,
                                       premise_att,
                                       hypothesis_att,
                                       prem_seq_lengths,
                                       name='premise_dynamic_reread')
            hypothesis_v = dynamic_reread(hypothesis_gru,
                                          hypothesis_att,
                                          premise_att,
                                          hyp_seq_lengths,
                                          name='hypothesis_dynamic_reread')

            ########label prediction##############

            h = tf.concat([
                premise_att, hypothesis_att, hypothesis_att * premise_att,
                hypothesis_att - premise_att
            ],
                          axis=-1)
            v = tf.concat([
                premise_v, hypothesis_v, hypothesis_v * premise_v,
                hypothesis_v - premise_v
            ],
                          axis=-1)

            # h MLP layer
            h_mlp = tf.layers.dense(h,
                                    self._hidden_dim,
                                    activation=tf.nn.relu,
                                    kernel_initializer=self._initializer,
                                    name='h_fc1')
            # Dropout applied to classifier
            h_drop = tf.layers.dropout(h_mlp,
                                       self._dropout_rate,
                                       training=is_training)
            # Get prediction
            h_logits = tf.layers.dense(h_drop,
                                       self._num_classes,
                                       activation=None,
                                       kernel_initializer=self._initializer,
                                       name='h_logits')

            p_h = tf.nn.softmax(h_logits)

            # # MLP layer
            v_mlp = tf.layers.dense(v,
                                    self._hidden_dim,
                                    activation=tf.nn.relu,
                                    kernel_initializer=self._initializer,
                                    name='v_fc1')
            # Dropout applied to classifier
            v_drop = tf.layers.dropout(v_mlp,
                                       self._dropout_rate,
                                       training=is_training)
            # Get prediction
            v_logits = tf.layers.dense(v_drop,
                                       self._num_classes,
                                       activation=None,
                                       kernel_initializer=self._initializer,
                                       name='v_logits')

            p_v = tf.nn.softmax(v_logits)
            ####
            alpha_h = tf.layers.dense(h,
                                      1,
                                      activation=tf.nn.sigmoid,
                                      kernel_initializer=self._initializer,
                                      bias_initializer=tf.zeros_initializer())
            alpha_v = tf.layers.dense(v,
                                      1,
                                      activation=tf.nn.sigmoid,
                                      kernel_initializer=self._initializer,
                                      bias_initializer=tf.zeros_initializer())
            # # h MLP layer
            fuse_mlp = tf.layers.dense(alpha_h * h + alpha_v * v,
                                       self._hidden_dim,
                                       activation=tf.nn.relu,
                                       kernel_initializer=self._initializer,
                                       name='fuse_fc1')
            # Dropout applied to classifier
            fuse_drop = tf.layers.dropout(fuse_mlp,
                                          self._dropout_rate,
                                          training=is_training)
            #Get prediction
            output_dict = self._make_output(fuse_drop, params)

            if mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL:
                if 'label/labels' not in features:
                    raise ConfigureError(
                        "The input features should contain label with vocabulary namespace "
                        "labels int %s dataset." % mode)
                labels_embedding = features_embedding['label/labels']
                labels = features['label/labels']

                h_loss = tf.reduce_mean(
                    tf.nn.softmax_cross_entropy_with_logits_v2(
                        labels=labels_embedding, logits=h_logits))
                v_loss = tf.reduce_mean(
                    tf.nn.softmax_cross_entropy_with_logits_v2(
                        labels=labels_embedding, logits=v_logits))
                fuse_loss = self._make_loss(labels=labels_embedding,
                                            logits=output_dict['logits'],
                                            params=params)

                output_dict['loss'] = v_loss + h_loss + fuse_loss
                metrics = dict()
                metrics['accuracy'] = tf.metrics.accuracy(
                    labels=labels, predictions=output_dict['predictions'])
                metrics['precision'] = tf.metrics.precision(
                    labels=labels, predictions=output_dict['predictions'])
                metrics['recall'] = tf.metrics.recall(
                    labels=labels, predictions=output_dict['predictions'])
                output_dict['metrics'] = metrics
                # output_dict['debugs'] = [hypothesis_tokens, premise_tokens, hypothesis_bi, premise_bi,
                #                          premise_ave, hypothesis_ave, diff, mul, h, h_mlp, logits]
            return output_dict

Example #22

Show file

    def forward(self, features, labels, mode, params):
        features_embedding = self._embedding_mapping.forward(features, labels, mode, params)
        with tf.variable_scope(self._model_name):
            is_training = (mode == tf.estimator.ModeKeys.TRAIN)

            premise_tokens_ids = features.get('premise/tokens', None)
            if premise_tokens_ids is None:
                premise_tokens_ids = features.get('premise/elmo_characters', None)
            hypothesis_tokens_ids = features.get('hypothesis/tokens', None)
            if hypothesis_tokens_ids is None:
                hypothesis_tokens_ids = features.get('hypothesis/elmo_characters', None)

            if premise_tokens_ids is None:
                raise ConfigureError("The input features should contain premise with vocabulary namespace tokens "
                                     "or elmo_characters.")
            if hypothesis_tokens_ids is None:
                raise ConfigureError("The input features should contain hypothesis with vocabulary namespace tokens "
                                     "or elmo_characters.")

            prem_seq_lengths, prem_mask = nn.length(premise_tokens_ids)
            hyp_seq_lengths, hyp_mask = nn.length(hypothesis_tokens_ids)
            if features.get('premise/elmo_characters', None) is not None or isinstance(self._embedding_mapping.get_encoder('tokens'), Bert):
                prem_mask = nn.remove_bos_eos(prem_mask, prem_seq_lengths)
                prem_seq_lengths -= 2
            if features.get('hypothesis/elmo_characters', None) is not None or isinstance(self._embedding_mapping.get_encoder('tokens'), Bert):
                hyp_mask = nn.remove_bos_eos(hyp_mask, hyp_seq_lengths)
                hyp_seq_lengths -= 2
            prem_mask = tf.expand_dims(prem_mask, -1)
            hyp_mask = tf.expand_dims(hyp_mask, -1)

            premise_tokens = features_embedding.get('premise/tokens', None)
            if premise_tokens is None:
                premise_tokens = features_embedding.get('premise/elmo_characters', None)
            hypothesis_tokens = features_embedding.get('hypothesis/tokens', None)
            if hypothesis_tokens is None:
                hypothesis_tokens = features_embedding.get('hypothesis/elmo_characters', None)

            # 2.Input Encoder
            # 2.1 Highway Encoder
            query_emb = premise_tokens
            doc_emb = hypothesis_tokens
            query_len = prem_seq_lengths
            doc_len = hyp_seq_lengths
            query_mask = prem_mask
            doc_mask = hyp_mask
            project_dim = premise_tokens.shape[-1].value
            query_length = tf.shape(premise_tokens)[1]
            doc_length = tf.shape(hypothesis_tokens)[1]

            query_output = nn.highway_network(query_emb, 1, dropout_rate=self._dropout_rate, is_trainging=is_training,
                                              scope="query_highway")
            doc_output = nn.highway_network(doc_emb, 1, dropout_rate=self._dropout_rate, is_trainging=is_training,
                                            scope="doc_highway")

            # # 2.2 Co-Attention
            M = tf.Variable(tf.random_normal([project_dim, project_dim], stddev=0.1))
            tmp = tf.einsum("ijk,kl->ijl", query_output, M)
            S = tf.matmul(tmp, doc_output, transpose_b=True)  # [batch, q, d]
            S_mask = tf.matmul(query_mask, doc_mask, transpose_b=True)
            S_mean = S * S_mask #
            S_align_max = S + (1. - S_mask) * tf.float32.min

            # 2.2.1 Extractive Pooling
            # Max Pooling
            query_score = tf.nn.softmax(tf.reduce_max(S_align_max, axis=2, keepdims=True), axis=1)
            query_maxpooling = tf.reduce_sum(query_score * query_output, axis=1) # [batch, r]

            doc_score = tf.nn.softmax(tf.reduce_max(S_align_max, axis=1, keepdims=True), axis=2)
            doc_maxpooling = tf.reduce_sum(tf.transpose(doc_score, [0, 2, 1]) * doc_output, axis=1) # [batch, r]

            # Mean Pooling
            query_score = tf.nn.softmax(tf.reduce_sum(S_mean, axis=2, keepdims=True)/(tf.expand_dims(tf.expand_dims(tf.cast(doc_len, tf.float32)+self._eps, -1), -1)), axis=1)
            query_meanpooling = tf.reduce_sum(query_score * query_output, axis=1)  # [batch, r]
            doc_score = tf.nn.softmax(tf.reduce_sum(S_mean, axis=1, keepdims=True)/(tf.expand_dims(tf.expand_dims(tf.cast(query_len, tf.float32)+self._eps, -1), -1)), axis=2)
            doc_meanpooling = tf.reduce_sum(tf.transpose(doc_score, [0, 2, 1]) * doc_output, axis=1)  # [batch, r]

            # 2.2.2 Alignment Pooling
            query_alignment = tf.matmul(tf.nn.softmax(S_align_max, axis=2), doc_output)  # [batch, q, r]
            doc_alignment = tf.matmul(tf.nn.softmax(S_align_max, axis=1), query_output, transpose_a=True)  # [batch, d, r]

            # 2.2.3 Intra Attention
            query_selfattn = nn.self_attention(query_output, query_len)
            doc_selfattn = nn.self_attention(doc_output, doc_len)

            # 2.3 Multi-Cast Attention
            query_maxpooling = tf.tile(tf.expand_dims(query_maxpooling, axis=1), [1, query_length, 1])
            query_meanpooling = tf.tile(tf.expand_dims(query_meanpooling, axis=1), [1, query_length, 1])
            doc_maxpooling = tf.tile(tf.expand_dims(doc_maxpooling, axis=1), [1, doc_length, 1])
            doc_meanpooling = tf.tile(tf.expand_dims(doc_meanpooling, axis=1), [1, doc_length, 1])

            query_max_fc, query_max_fm, query_max_fs = self.cast_attention(query_maxpooling, query_emb, self.nn_fc, name="query_max_pooling")
            query_mean_fc, query_mean_fm, query_mean_fs = self.cast_attention(query_meanpooling, query_emb, self.nn_fc, name="query_mean_pooling")
            query_align_fcm, query_align_fm, query_align_fs = self.cast_attention(query_alignment, query_emb, self.nn_fc, name="query_align_pooling")
            query_selfattn_fc, query_selfattn_fm, query_selfattn_fs = self.cast_attention(query_selfattn, query_emb, self.nn_fc, name="query_self_pooling")

            doc_max_fc, doc_max_fm, doc_max_fs = self.cast_attention(doc_maxpooling, doc_emb, self.nn_fc, name="doc_max_pooling")
            doc_mean_fc, doc_mean_fm, doc_mean_fs = self.cast_attention(doc_meanpooling, doc_emb, self.nn_fc, name="doc_mean_pooling")
            doc_align_fcm, doc_align_fm, doc_align_fs = self.cast_attention(doc_alignment, doc_emb, self.nn_fc, name="doc_align_pooling")
            doc_selfattn_fc, doc_selfattn_fm, doc_selfattn_fs = self.cast_attention(doc_selfattn, doc_emb, self.nn_fc, name="doc_self_pooling")

            query_cast = tf.concat(
                [query_max_fc, query_max_fm, query_max_fs, query_mean_fc, query_mean_fm, query_mean_fs, query_align_fcm,
                 query_align_fm, query_align_fs, query_selfattn_fc, query_selfattn_fm, query_selfattn_fs, query_output],
                axis=2)
            doc_cast = tf.concat(
                [doc_max_fc, doc_max_fm, doc_max_fs, doc_mean_fc, doc_mean_fm, doc_mean_fs, doc_align_fcm,
                 doc_align_fm, doc_align_fs, doc_selfattn_fc, doc_selfattn_fm, doc_selfattn_fs, doc_output], axis=2)

            # query_cast = tf.concat(
            #     [
            #      query_output],
            #     axis=2)
            # doc_cast = tf.concat(
            #     [doc_output], axis=2)

            query_cast = tf.layers.dropout(query_cast, self._dropout_rate, training=is_training)
            doc_cast = tf.layers.dropout(doc_cast, self._dropout_rate, training=is_training)

            query_hidden, _ = nn.bi_lstm(query_cast, self._hidden_dim, name="query_lstm")
            doc_hidden, _ = nn.bi_lstm(doc_cast, self._hidden_dim, name="doc_lstm")

            query_hidden = tf.concat(query_hidden, axis=2)
            doc_hidden = tf.concat(doc_hidden, axis=2)
            query_hidden = tf.layers.dropout(query_hidden, self._dropout_rate, training=is_training)
            doc_hidden = tf.layers.dropout(doc_hidden, self._dropout_rate, training=is_training)

            #query_hidden_max = query_hidden + (1. - query_mask) * tf.float32.min
            #doc_hidden_max = doc_hidden + (1. - doc_mask) * tf.float32.min
            query_hidden_mean = query_hidden * query_mask
            doc_hidden_mean = doc_hidden * doc_mask

            query_sum = tf.reduce_sum(query_hidden_mean, axis=1)
            query_mean = tf.div(query_sum, tf.expand_dims(tf.cast(query_len, tf.float32), -1) + self._eps)

            query_max = tf.reduce_max(query_hidden_mean, axis=1)
            query_final = tf.concat([query_mean, query_max], axis=1)

            doc_sum = tf.reduce_sum(doc_hidden_mean, axis=1)
            doc_mean = tf.div(doc_sum, tf.expand_dims(tf.cast(doc_len, tf.float32), -1) + self._eps)

            doc_max = tf.reduce_max(doc_hidden_mean, axis=1)
            doc_final = tf.concat([doc_mean, doc_max], axis=1)

            final = tf.concat([query_final, doc_final, query_final * doc_final, query_final - doc_final], axis=1)
            #yout = nn.highway_network(final, 2, dropout_rate=self._drop_rate, is_trainging=is_training)
            # MLP layer
            yout = tf.contrib.layers.fully_connected(final, self._hidden_dim, scope='fc1')
            # Dropout applied to classifier

            output_dict = self._make_output(yout, params)

            if mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL:
                if 'label/labels' not in features:
                    raise ConfigureError("The input features should contain label with vocabulary namespace "
                                         "labels int %s dataset."%mode)
                labels_embedding = features_embedding['label/labels']
                labels = features['label/labels']

                loss = self._make_loss(labels=labels_embedding, logits=output_dict['logits'], params=params)
                output_dict['loss'] = loss
                metrics = dict()
                metrics['accuracy'] = tf.metrics.accuracy(labels=labels, predictions=output_dict['predictions'])
                metrics['precision'] = tf.metrics.precision(labels=labels, predictions=output_dict['predictions'])
                metrics['recall'] = tf.metrics.recall(labels=labels, predictions=output_dict['predictions'])
                output_dict['metrics'] = metrics
                # output_dict['debugs'] = []
                # debug_ops = [query_mean_fs]#[query_maxpooling, query_max_fc] [query_max_fm, query_max_fs],[query_mean_fc, query_mean_fm] , ,
                # for op in debug_ops:
                #     output_dict['debugs'].append(tf.shape(op))
                # output_dict['debugs'].append(query_length)
            return output_dict

Example #23

Show file

    def forward(self, features, labels, mode, params):
        outputs = dict()
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        for (feature_key, feature) in features.items():
            if '/' not in feature_key:
                continue
            feature_namespace = feature_key.split("/")[1].strip()
            if feature_namespace == self._vocab_namespace:
                with tf.variable_scope("embedding/" + self._vocab_namespace,
                                       reuse=tf.AUTO_REUSE):
                    if self._weight is None:
                        if not self._trainable:
                            logger.warning(
                                "No pretrained embedding is assigned. The embedding should be trainable."
                            )
                        logger.debug("loading random embedding.")
                        if self._padding_zero:
                            word_embeddings = tf.get_variable(
                                "embedding_weight",
                                shape=(self._num_embeddings - 1,
                                       self._embedding_dim),
                                initializer=initializers.xavier_initializer(),
                                trainable=self._trainable)
                            pad_embeddings = tf.constant(np.zeros(
                                [1, self._embedding_dim]),
                                                         dtype=tf.float32)
                            self._embeddings = tf.concat(
                                [pad_embeddings, word_embeddings], axis=0)
                        else:
                            self._embeddings = tf.get_variable(
                                "embedding_weight",
                                shape=(self._num_embeddings,
                                       self._embedding_dim),
                                initializer=initializers.xavier_initializer(),
                                trainable=self._trainable)
                    else:
                        if self._weight.shape != (self._num_embeddings,
                                                  self._embedding_dim):
                            raise ConfigureError(
                                "The parameter of embedding with shape (%s, %s), "
                                "but the pretrained embedding with shape %s." %
                                (self._num_embeddings, self._embedding_dim,
                                 self._weight.shape))
                        logger.debug(
                            "loading pretrained embedding with trainable %s." %
                            self._trainable)
                        if self._padding_zero:
                            word_embeddings = tf.get_variable(
                                "embedding_weight",
                                initializer=self._weight[1:, :],
                                trainable=self._trainable)
                            pad_embeddings = tf.constant(np.zeros(
                                [1, self._embedding_dim]),
                                                         dtype=tf.float32)
                            self._embeddings = tf.concat(
                                [pad_embeddings, word_embeddings], axis=0)
                        else:
                            self._embeddings = tf.get_variable(
                                "embedding_weight",
                                initializer=self._weight,
                                trainable=self._trainable)
                            # tf.Variable(self._weight, trainable=self._trainable, name='embedding_weight')
                    emb = tf.nn.embedding_lookup(self._embeddings, feature)

                    dropout_rate = params.get('dropout_rate')
                    if dropout_rate is None:
                        dropout_rate = self._dropout_rate
                    emb_drop = tf.layers.dropout(emb,
                                                 dropout_rate,
                                                 training=is_training)
                    if self._projection_dim:
                        emb_drop = tf.layers.dense(
                            emb_drop,
                            self._projection_dim,
                            use_bias=False,
                            kernel_initializer=initializers.xavier_initializer(
                            ))
                    outputs[feature_key] = emb_drop
        return outputs

Example #24

Show file

File: bert_classifier_test.py Project: wushanzha/SemMatch

    def forward(self, features, labels, mode, params):
        features_embedding = self._embedding_mapping.forward(
            features, labels, mode, params)
        with tf.variable_scope(self._model_name):
            is_training = (mode == tf.estimator.ModeKeys.TRAIN)

            premise_tokens_ids = features.get('premise/tokens', None)
            hypothesis_tokens_ids = features.get('hypothesis/tokens', None)

            if premise_tokens_ids is None:
                raise ConfigureError(
                    "The input features should contain premise with vocabulary namespace tokens "
                    "or elmo_characters.")
            if hypothesis_tokens_ids is None:
                raise ConfigureError(
                    "The input features should contain hypothesis with vocabulary namespace tokens "
                    "or elmo_characters.")

            premise_tokens = features_embedding.get('premise/tokens', None)
            hypothesis_tokens = features_embedding.get('hypothesis/tokens',
                                                       None)

            hidden_size = premise_tokens.shape[-1].value

            with tf.variable_scope("pooler"):
                # We "pool" the model by simply taking the hidden state corresponding
                # to the first token. We assume that this has been pre-trained
                premise_first_token_tensor = tf.squeeze(premise_tokens[:,
                                                                       0:1, :],
                                                        axis=1)
                hypothesis_first_token_tensor = tf.squeeze(
                    hypothesis_tokens[:, 0:1, :], axis=1)

                dense_input = tf.concat([
                    premise_first_token_tensor, hypothesis_first_token_tensor,
                    premise_first_token_tensor - hypothesis_first_token_tensor,
                    premise_first_token_tensor * hypothesis_first_token_tensor
                ],
                                        axis=-1)
                output_layer = tf.layers.dense(
                    dense_input,
                    hidden_size,
                    activation=tf.tanh,
                    kernel_initializer=create_initializer(
                        self._initializer_range))

            if is_training:
                # I.e., 0.1 dropout
                output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)

            output_dict = self._make_output(output_layer, params)

            if mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL:
                if 'label/labels' not in features:
                    raise ConfigureError(
                        "The input features should contain label with vocabulary namespace "
                        "labels int %s dataset." % mode)
                labels_embedding = features_embedding['label/labels']
                labels = features['label/labels']

                loss = self._make_loss(labels=labels_embedding,
                                       logits=output_dict['logits'],
                                       params=params)
                output_dict['loss'] = loss
                metrics = dict()
                metrics['accuracy'] = tf.metrics.accuracy(
                    labels=labels, predictions=output_dict['predictions'])
                metrics['precision'] = tf.metrics.precision(
                    labels=labels, predictions=output_dict['predictions'])
                metrics['recall'] = tf.metrics.recall(
                    labels=labels, predictions=output_dict['predictions'])
                # metrics['auc'] = tf.metrics.auc(labels=labels, predictions=predictions)

                output_dict['metrics'] = metrics
                # output_dict['debugs'] = [hypothesis_tokens, premise_tokens, hypothesis_bi, premise_bi,
                #                          v_1_ave, v_2_ave, h_mlp, logits]
            return output_dict

Example #25

Show file

File: san_classification.py Project: wushanzha/SemMatch

    def forward(self, features, labels, mode, params):
        features_embedding = self._embedding_mapping.forward(
            features, labels, mode, params)
        with tf.variable_scope(self._model_name):
            is_training = (mode == tf.estimator.ModeKeys.TRAIN)

            premise_tokens_ids = features.get('premise/tokens', None)
            if premise_tokens_ids is None:
                premise_tokens_ids = features.get('premise/elmo_characters',
                                                  None)

            if premise_tokens_ids is None:
                raise ConfigureError(
                    "The input features should contain premise with vocabulary namespace tokens "
                    "or elmo_characters.")

            prem_seq_lengths, prem_mask = nn.length(premise_tokens_ids)
            if features.get(
                    'premise/elmo_characters', None) is not None or isinstance(
                        self._embedding_mapping.get_encoder('tokens'), Bert):
                prem_mask = nn.remove_bos_eos(prem_mask, prem_seq_lengths)
                prem_seq_lengths -= 2

            #prem_mask = tf.expand_dims(prem_mask, -1)
            prem_mask = tf.cast(prem_mask, tf.bool)

            premise_tokens = features_embedding.get('premise/tokens', None)
            if premise_tokens is None:
                premise_tokens = features_embedding.get(
                    'premise/elmo_characters', None)

            with tf.variable_scope('san_fb1'):
                x_fw1 = query_encode_san(premise_tokens, prem_mask,
                                         'forward')  # bs, ql, vec
                x_bw1 = query_encode_san(premise_tokens, prem_mask,
                                         'backward')  # bs, ql, vec
                x_fusion = fusion_gate(premise_tokens, prem_mask, x_fw1,
                                       x_bw1)  # bs, ql, vec
            with tf.variable_scope('san_md'):
                x_code = query_encode_md(x_fusion, prem_mask)  # bs, vec

                pre_logits = tf.nn.relu(
                    linear(x_code,
                           self._hidden_dim,
                           True,
                           scope='pre_logits_linear',
                           is_train=True))  # bs, vec
                logits = linear(pre_logits,
                                self._num_classes,
                                False,
                                scope='get_output',
                                is_train=True)  # bs, cn

            output_dict = self._make_output(logits, params)

            if mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL:
                if 'label/labels' not in features:
                    raise ConfigureError(
                        "The input features should contain label with vocabulary namespace "
                        "labels int %s dataset." % mode)
                labels_embedding = features_embedding['label/labels']
                labels = features['label/labels']

                loss = self._make_loss(labels=labels_embedding,
                                       logits=output_dict['logits'],
                                       params=params)
                output_dict['loss'] = loss
                metrics = dict()
                metrics['accuracy'] = tf.metrics.accuracy(
                    labels=labels, predictions=output_dict['predictions'])
                metrics['precision'] = tf.metrics.precision(
                    labels=labels, predictions=output_dict['predictions'])
                metrics['recall'] = tf.metrics.recall(
                    labels=labels, predictions=output_dict['predictions'])
                #tf.metrics.auc(labels=labels, predictions=predictions)
                output_dict['metrics'] = metrics
                # output_dict['debugs'] = [hypothesis_tokens, premise_tokens, hypothesis_bi, premise_bi,
                #                          premise_ave, hypothesis_ave, diff, mul, h, h_mlp, logits]
            return output_dict

Example #26

Show file

 def get_vocab_index_to_token(self, namespace='tokens'):
     if namespace not in self._token_to_index: raise ConfigureError("namespace %s not in vocabulary."%namespace)
     return self._index_to_token[namespace]

Example #27

Show file

    def __init__(self,
                 data_reader=None,
                 train_input_fn=None,
                 valid_input_fn=None,
                 test_input_fn=None,
                 serving_feature_spec=None,
                 model=None,
                 hparams=HParams(),
                 run_config: RunConfig = RunConfig()):
        if data_reader is not None and train_input_fn is None:
            self._train_input_fn, self._valid_input_fn, self._test_input_fn = self.make_input_fns(
                data_reader)
            self._serving_feature_spec = data_reader.get_raw_serving_input_receiver_features(
                DataSplit.EVAL)
        else:
            self._train_input_fn = train_input_fn
            self._valid_input_fn = valid_input_fn
            self._test_input_fn = test_input_fn
            self._serving_feature_spec = serving_feature_spec
        if self._train_input_fn is None:
            raise ConfigureError("The train dataset is not provided.")

        if data_reader:
            hparams.add_hparam("num_retrieval",
                               data_reader.get_num_retrieval())

        if model is None:
            raise ConfigureError("Please provide model for training.")
        self._model_fn = model.make_estimator_model_fn()

        if hparams.per_process_gpu_memory_fraction is not None and 0 < hparams.per_process_gpu_memory_fraction <= 1:
            session_config = tf.ConfigProto(log_device_placement=True,
                                            allow_soft_placement=True)
            session_config.gpu_options.per_process_gpu_memory_fraction = hparams.per_process_gpu_memory_fraction
            run_config = run_config.replace(session_config=session_config)

        self._estimator = tf.estimator.Estimator(
            model_fn=self._model_fn,
            config=run_config,
            params=hparams,
            warm_start_from=model.get_warm_start_setting())

        train_hooks = []
        if tf_version[1] >= 10 and tf_version[1] <= 13:
            early_stopping = tf.contrib.estimator.stop_if_no_decrease_hook(
                self._estimator,
                metric_name='loss',
                max_steps_without_decrease=hparams.
                early_stopping_max_steps_without_decrease,
                min_steps=hparams.early_stopping_min_steps)
            train_hooks.append(early_stopping)

        exporters = None
        if self._serving_feature_spec:
            serving_input_receiver_fn = (
                tf.estimator.export.build_raw_serving_input_receiver_fn(
                    self._serving_feature_spec))
            exporters = []
            if tf_version[1] >= 9:
                best_exporter = tf.estimator.BestExporter(
                    name="best_exporter",
                    serving_input_receiver_fn=serving_input_receiver_fn,
                    exports_to_keep=5)
                exporters.append(best_exporter)
            latest_export = tf.estimator.LatestExporter(
                name='latest_exporter',
                serving_input_receiver_fn=serving_input_receiver_fn,
                exports_to_keep=5)
            exporters.append(latest_export)

        self._train_spec = tf.estimator.TrainSpec(
            input_fn=self._train_input_fn,
            max_steps=hparams.train_steps,
            hooks=train_hooks)
        if self._valid_input_fn:
            self._valid_spec = tf.estimator.EvalSpec(
                input_fn=self._valid_input_fn,
                steps=hparams.eval_steps,
                exporters=exporters,
                throttle_secs=hparams.throttle_secs)
        #self._estimator.evaluate(self._valid_input_fn, steps=hparams.eval_steps, name=DataSplit.TEST)
        tf.estimator.train_and_evaluate(self._estimator, self._train_spec,
                                        self._valid_spec)

Example #28

Show file

File: diin.py Project: wushanzha/SemMatch

    def forward(self, features, labels, mode, params):
        global_step = tf.train.get_or_create_global_step()
        dropout_keep_rate = tf.train.exponential_decay(self._keep_prob, global_step,
                                                       self._dropout_decay_step, self._dropout_decay_rate,
                                                       staircase=False, name='dropout_keep_rate')
        tf.summary.scalar('dropout_keep_rate', dropout_keep_rate)

        params.add_hparam('dropout_rate', 1 - dropout_keep_rate)
        features_embedding = self._embedding_mapping.forward(features, labels, mode, params)
        with tf.variable_scope(self._model_name):
            is_training = (mode == tf.estimator.ModeKeys.TRAIN)

            premise_tokens_ids = features.get('premise/tokens', None)
            if premise_tokens_ids is None:
                premise_tokens_ids = features.get('premise/elmo_characters', None)
            hypothesis_tokens_ids = features.get('hypothesis/tokens', None)
            if hypothesis_tokens_ids is None:
                hypothesis_tokens_ids = features.get('hypothesis/elmo_characters', None)

            if premise_tokens_ids is None:
                raise ConfigureError("The input features should contain premise with vocabulary namespace tokens "
                                     "or elmo_characters.")
            if hypothesis_tokens_ids is None:
                raise ConfigureError("The input features should contain hypothesis with vocabulary namespace tokens "
                                     "or elmo_characters.")

            prem_seq_lengths, prem_mask = nn.length(premise_tokens_ids)
            hyp_seq_lengths, hyp_mask = nn.length(hypothesis_tokens_ids)
            if features.get('premise/elmo_characters', None) is not None or isinstance(self._embedding_mapping.get_encoder('tokens'), Bert):
                prem_mask = nn.remove_bos_eos(prem_mask, prem_seq_lengths)
                prem_seq_lengths -= 2
            if features.get('hypothesis/elmo_characters', None) is not None or isinstance(self._embedding_mapping.get_encoder('tokens'), Bert):
                hyp_mask = nn.remove_bos_eos(hyp_mask, hyp_seq_lengths)
                hyp_seq_lengths -= 2
            prem_mask = tf.expand_dims(prem_mask, -1)
            hyp_mask = tf.expand_dims(hyp_mask, -1)

            premise_ins = []
            hypothesis_ins = []

            premise_tokens = features_embedding.get('premise/tokens', None)
            if premise_tokens is None:
                premise_tokens = features_embedding.get('premise/elmo_characters', None)
            hypothesis_tokens = features_embedding.get('hypothesis/tokens', None)
            if hypothesis_tokens is None:
                hypothesis_tokens = features_embedding.get('hypothesis/elmo_characters', None)

            premise_ins.append(premise_tokens)
            hypothesis_ins.append(hypothesis_tokens)

            premise_chars = features_embedding.get('premise/chars', None)
            hypothesis_chars = features_embedding.get('hypothesis/chars', None)

            if premise_chars is not None and hypothesis_chars is not None:

                with tf.variable_scope("conv") as scope:
                    conv_pre = nn.multi_conv1d_max(premise_chars, self._char_filter_size, self._char_filter_channel_dims,
                                                   "VALID", is_training, dropout_keep_rate, scope='conv')
                    scope.reuse_variables()
                    conv_hyp = nn.multi_conv1d_max(hypothesis_chars, self._char_filter_size, self._char_filter_channel_dims,
                                                   "VALID", is_training, dropout_keep_rate, scope='conv')
                    #conv_pre = tf.reshape(conv_pre, [-1, self.sequence_length, config.char_out_size])
                    #conv_hyp = tf.reshape(conv_hyp, [-1, self.sequence_length, config.char_out_size])

                    premise_ins.append(conv_pre)
                    hypothesis_ins.append(conv_hyp)

            premise_pos = features_embedding.get('premise/pos_tags', None)
            hypothesis_pos = features_embedding.get('hypothesis/pos_tags', None)

            if premise_pos is not None and hypothesis_pos is not None:
                premise_ins.append(premise_pos)
                hypothesis_ins.append(hypothesis_pos)

            premise_exact_match = features.get('premise/exact_match_labels', None)
            hypothesis_exact_match = features.get('hypothesis/exact_match_labels', None)

            if premise_exact_match is not None and hypothesis_exact_match is not None:
                premise_ins.append(tf.expand_dims(tf.cast(premise_exact_match, tf.float32), -1))
                hypothesis_ins.append(tf.expand_dims(tf.cast(hypothesis_exact_match, tf.float32), -1))

            premise_in = tf.concat(premise_ins, axis=2)
            hypothesis_in = tf.concat(hypothesis_ins, axis=2)

            with tf.variable_scope("highway") as scope:
                premise_in = nn.highway_network(premise_in, self._highway_num_layers)
                scope.reuse_variables()
                hypothesis_in = nn.highway_network(hypothesis_in, self._highway_num_layers)

            with tf.variable_scope("prepro") as scope:
                pre = premise_in
                hyp = hypothesis_in
                for i in range(self._num_self_att_enc_layers):
                    with tf.variable_scope("attention_encoder_%s" % i, reuse=False):
                        pre_att = nn.self_attention(pre, prem_seq_lengths, func='tri_linear',
                                                    scope="premise_self_attention")
                        p = nn.fuse_gate(pre, pre_att, scope="premise_fuse_gate")
                        hyp_att = nn.self_attention(hyp, hyp_seq_lengths, func='tri_linear',
                                                    scope="hypothesis_self_attention")
                        h = nn.fuse_gate(hyp, hyp_att, scope="hypothesis_fuse_gate")

                        pre = p
                        hyp = h
                        nn.variable_summaries(p, "p_self_enc_summary_layer_{}".format(i))
                        nn.variable_summaries(h, "h_self_enc_summary_layer_{}".format(i))

            with tf.variable_scope("main") as scope:
                pre = p
                hyp = h

                with tf.variable_scope("interaction"):
                    pre_length = tf.shape(pre)[1]
                    hyp_length = tf.shape(hyp)[1]
                    pre_new = tf.tile(tf.expand_dims(pre, 2), [1, 1, hyp_length, 1])
                    hyp_new = tf.tile(tf.expand_dims(hyp, 1), [1, pre_length, 1, 1])
                    bi_att_mx = pre_new * hyp_new

                    # mask = tf.expand_dims(tf.sequence_mask(query_len, tf.shape(query)[1], dtype=tf.float32),
                    #                       axis=2) * \
                    #        tf.expand_dims(tf.sequence_mask(key_len, tf.shape(key)[1], dtype=tf.float32), axis=1)
                    bi_att_mx = tf.layers.dropout(bi_att_mx, 1-dropout_keep_rate, training=is_training)

                with tf.variable_scope("dense_net"):
                    dim = bi_att_mx.get_shape().as_list()[-1]
                    act = tf.nn.relu if self._first_scale_down_layer_relu else None
                    fm = tf.contrib.layers.convolution2d(bi_att_mx,
                                                         int(dim * self._dense_net_first_scale_down_ratio),
                                                         self._first_scale_down_kernel, padding="SAME",
                                                         activation_fn=act)

                    fm = nn.dense_net_block(fm, self._dense_net_growth_rate, self._num_dense_net_layers,
                                            self._dense_net_kernel_size, scope="first_dense_net_block")
                    fm = nn.dense_net_transition_layer(fm, self._dense_net_transition_rate,
                                                       scope='second_transition_layer')
                    fm = nn.dense_net_block(fm, self._dense_net_growth_rate, self._num_dense_net_layers,
                                            self._dense_net_kernel_size, scope="second_dense_net_block")
                    fm = nn.dense_net_transition_layer(fm, self._dense_net_transition_rate,
                                                       scope='third_transition_layer')
                    fm = nn.dense_net_block(fm, self._dense_net_growth_rate, self._num_dense_net_layers,
                                            self._dense_net_kernel_size, scope="third_dense_net_block")

                    fm = nn.dense_net_transition_layer(fm, self._dense_net_transition_rate,
                                                       scope='fourth_transition_layer')

                    shape_list = list(fm.get_shape())
                    #print(shape_list)
                    premise_final = tf.reshape(fm, [-1, shape_list[1] * shape_list[2] * shape_list[3]])

            output_dict = self._make_output(premise_final, params)

            if mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL:
                if 'label/labels' not in features:
                    raise ConfigureError("The input features should contain label with vocabulary namespace "
                                         "labels int %s dataset."%mode)
                labels_embedding = features_embedding['label/labels']
                labels = features['label/labels']

                loss = self._make_loss(labels=labels_embedding, logits=output_dict['logits'], params=params)
                #######l2 loss#################
                if self._l2_loss:
                    if self._sigmoid_growing_l2loss:
                        weights_added = tf.add_n([tf.nn.l2_loss(tensor) for tensor in tf.trainable_variables() if
                                                  tensor.name.endswith("weights:0") or tensor.name.endswith('kernel:0') or tensor.name.endswith('filter:0')])
                        full_l2_step = tf.constant(self._weight_l2loss_step_full_reg, dtype=tf.int32, shape=[],
                                                   name='full_l2reg_step')
                        full_l2_ratio = tf.constant(self._l2_regularization_ratio, dtype=tf.float32, shape=[],
                                                    name='l2_regularization_ratio')
                        gs_flt = tf.cast(global_step, tf.float32)
                        half_l2_step_flt = tf.cast(full_l2_step / 2, tf.float32)

                        # (self.global_step - full_l2_step / 2)
                        # tf.cast((self.global_step - full_l2_step / 2) * 8, tf.float32) / tf.cast(full_l2_step / 2 ,tf.float32)
                        # l2loss_ratio = tf.sigmoid( tf.cast((self.global_step - full_l2_step / 2) * 8, tf.float32) / tf.cast(full_l2_step / 2 ,tf.float32)) * full_l2_ratio
                        l2loss_ratio = tf.sigmoid(((gs_flt - half_l2_step_flt) * 8) / half_l2_step_flt) * full_l2_ratio
                        tf.summary.scalar('l2loss_ratio', l2loss_ratio)
                        l2loss = weights_added * l2loss_ratio
                    else:
                        l2loss = tf.add_n([tf.nn.l2_loss(tensor) for tensor in tf.trainable_variables() if
                                           tensor.name.endswith("weights:0") or tensor.name.endswith(
                                               'kernel:0')]) * tf.constant(self._l2_regularization_ratio,
                                                                           dtype='float', shape=[],
                                                                           name='l2_regularization_ratio')
                    tf.summary.scalar('l2loss', l2loss)
                ######diff loss###############################
                diffs = []
                for i in range(self._num_self_att_enc_layers):
                    for tensor in tf.trainable_variables():
                        #print(tensor.name)
                        if tensor.name == "diin/prepro/attention_encoder_{}/premise_self_attention/similar_mat/similar_func/arg/kernel:0".format(
                                i):
                            l_lg = tensor
                        elif tensor.name == "diin/prepro/attention_encoder_{}/hypothesis_self_attention/similar_mat/similar_func/arg/kernel:0".format(
                                i):
                            r_lg = tensor
                        elif tensor.name == "diin/prepro/attention_encoder_{}/premise_fuse_gate/lhs_1/kernel:0".format(i):
                            l_fg_lhs_1 = tensor
                        elif tensor.name == "diin/prepro/attention_encoder_{}/hypothesis_fuse_gate/lhs_1/kernel:0".format(
                                i):
                            r_fg_lhs_1 = tensor
                        elif tensor.name == "diin/prepro/attention_encoder_{}/premise_fuse_gate/rhs_1/kernel:0".format(i):
                            l_fg_rhs_1 = tensor
                        elif tensor.name == "diin/prepro/attention_encoder_{}/hypothesis_fuse_gate/rhs_1/kernel:0".format(
                                i):
                            r_fg_rhs_1 = tensor
                        elif tensor.name == "diin/prepro/attention_encoder_{}/premise_fuse_gate/lhs_2/kernel:0".format(i):
                            l_fg_lhs_2 = tensor
                        elif tensor.name == "diin/prepro/attention_encoder_{}/hypothesis_fuse_gate/lhs_2/kernel:0".format(
                                i):
                            r_fg_lhs_2 = tensor
                        elif tensor.name == "diin/prepro/attention_encoder_{}/premise_fuse_gate/rhs_2/kernel:0".format(i):
                            l_fg_rhs_2 = tensor
                        elif tensor.name == "diin/prepro/attention_encoder_{}/hypothesis_fuse_gate/rhs_2/kernel:0".format(
                                i):
                            r_fg_rhs_2 = tensor

                        if tensor.name == "diin/prepro/attention_encoder_{}/premise_fuse_gate/lhs_3/kernel:0".format(
                                i):
                            l_fg_lhs_3 = tensor
                        elif tensor.name == "diin/prepro/attention_encoder_{}/hypothesis_fuse_gate/lhs_3/kernel:0".format(
                                i):
                            r_fg_lhs_3 = tensor
                        elif tensor.name == "diin/prepro/attention_encoder_{}/premise_fuse_gate/rhs_3/kernel:0".format(
                                i):
                            l_fg_rhs_3 = tensor
                        elif tensor.name == "diin/prepro/attention_encoder_{}/hypothesis_fuse_gate/rhs_3/kernel:0".format(
                                i):
                            r_fg_rhs_3 = tensor

                    diffs += [l_lg - r_lg, l_fg_lhs_1 - r_fg_lhs_1, l_fg_rhs_1 - r_fg_rhs_1, l_fg_lhs_2 - r_fg_lhs_2,
                              l_fg_rhs_2 - r_fg_rhs_2]
                    diffs += [l_fg_lhs_3 - r_fg_lhs_3, l_fg_rhs_3 - r_fg_rhs_3]
                diff_loss = tf.add_n([tf.nn.l2_loss(tensor) for tensor in diffs]) * tf.constant(
                    self._diff_penalty_loss_ratio, dtype='float', shape=[], name='diff_penalty_loss_ratio')
                tf.summary.scalar('diff_loss', diff_loss)
                ###############################
                output_dict['loss'] = loss + l2loss + diff_loss
                metrics = dict()
                metrics['accuracy'] = tf.metrics.accuracy(labels=labels, predictions=output_dict['predictions'])
                metrics['precision'] = tf.metrics.precision(labels=labels, predictions=output_dict['predictions'])
                metrics['recall'] = tf.metrics.recall(labels=labels, predictions=output_dict['predictions'])

                output_dict['metrics'] = metrics
                # output_dict['debugs'] = [hypothesis_tokens, premise_tokens, hypothesis_bi, premise_bi,
                #                          premise_ave, hypothesis_ave, diff, mul, h, h_mlp, logits]
            return output_dict

Example #29

Show file

File: evaluate.py Project: wushanzha/SemMatch

    def __init__(self,
                 data_reader=None,
                 eval_input_fn=None,
                 num_classes=None,
                 vocab=None,
                 export_dir=None,
                 output_file=None,
                 hparams=HParams()):
        if data_reader is not None and eval_input_fn is None:
            self._eval_input_fn = data_reader.make_estimator_input_fn(
                DataSplit.EVAL, force_repeat=False)
            vocab = data_reader.get_vocab()
        else:
            self._eval_input_fn = eval_input_fn
        if num_classes is None:
            num_classes = vocab.get_vocab_size(namespace='labels')
        task = hparams.get('task', 'classification')
        task_type = hparams.get('task_type', 'multiclass')

        labels = list(range(num_classes))

        dataset = self._eval_input_fn()
        iterator = dataset.make_initializable_iterator()
        dataset.make_initializable_iterator()
        next_element = iterator.get_next()

        self.saved_model_loader = loader_impl.SavedModelLoader(export_dir)

        mode = DataSplit.PREDICT
        signature_def = get_signature_def_for_mode(self.saved_model_loader,
                                                   mode)

        input_map = generate_input_map(signature_def, next_element)
        output_tensor_names = [
            value.name for value in signature_def.outputs.values()
        ]
        try:
            tags = model_fn.EXPORT_TAG_MAP[mode]
        except AttributeError as e:
            tags = ['serve']
        saver, output_tensors = self.saved_model_loader.load_graph(
            tf.get_default_graph(),
            tags,
            input_map=input_map,
            return_elements=output_tensor_names)
        output_map = dict(zip(output_tensor_names, output_tensors))
        outputs = {
            key: output_map[value.name]
            for (key, value) in signature_def.outputs.items()
        }

        # predict_fn = tf.contrib.predictor.from_saved_model(export_dir)

        #####xlsx wirte######
        tsv_file = open(output_file, 'w')
        # wb = Workbook(write_only=True)
        # ws = wb.create_sheet('examples')
        # ws.append(['question', 'answer', 'true_label', 'predict', 'score'])

        y_true = []
        y_pred = []
        total_num = 0
        # accuracy = 0
        # confusion_matrix = [[0 for j in range(num_classes)] for i in range(num_classes)]
        if hparams.per_process_gpu_memory_fraction is not None and 0 < hparams.per_process_gpu_memory_fraction <= 1:
            session_config = tf.ConfigProto(log_device_placement=True,
                                            allow_soft_placement=True)
            session_config.gpu_options.per_process_gpu_memory_fraction = hparams.per_process_gpu_memory_fraction
        else:
            session_config = tf.ConfigProto()

        with tf.Session(config=session_config) as sess:
            self.saved_model_loader.restore_variables(sess, saver)
            self.saved_model_loader.run_init_ops(sess, tags)
            sess.run(iterator.initializer)
            while True:
                try:
                    outputs['inputs'] = next_element
                    output_vals = sess.run(outputs)

                    data_batch = output_vals['inputs']
                    if 'premise/tokens' in data_batch.keys(
                    ) and 'hypothesis/tokens' in data_batch.keys():
                        premise_tokens_val, hypothesis_tokens_val, true_label_val = \
                            data_batch['premise/tokens'], data_batch['hypothesis/tokens'], data_batch['label/labels']
                    else:
                        true_label_val = data_batch['label/labels']
                        premise_tokens_val = [
                            [] for i in range(len(true_label_val))
                        ]
                        hypothesis_tokens_val = [
                            [] for i in range(len(true_label_val))
                        ]
                    # probs = output_vals['output_score']
                    probs = output_vals['output']
                    num_batch = probs.shape[0]
                    total_num += num_batch
                    print("processing %s/%s" % (num_batch, total_num))
                    #######################
                    # print(probs)
                    if task_type == 'multiclass':
                        predictions_val = np.argmax(probs, axis=1)
                    elif task_type == 'multilabel':
                        threshold = hparams.get('threshold', 0.5)
                        predictions_val = (probs > threshold).astype(
                            dtype=np.int32)
                    elif task_type == 'topk':
                        predictions_val = (probs > 0).astype(dtype=np.int32)
                    else:
                        raise ConfigureError(
                            "Task type %s is not support for task %s. "
                            "Only multiclass and multilabel is support for task %s"
                            % (task_type, task, task))
                    # predictions = (probs > 0.5).astype(np.int32)
                    # print(predictions)
                    y_true.append(true_label_val)
                    y_pred.append(predictions_val)
                    # print(predictions)
                    # for i in range(probs.shape[0]):
                    #     predictions = (probs > 0.5).astype(np.int32)
                    #     predict = predictions[i]
                    #     label = true_label_val[i]
                    #     if predict == label:
                    #         accuracy += 1
                    #     confusion_matrix[label][predict] += 1
                    ################
                    for i in range(num_batch):
                        premise_str = vocab.convert_indexes_to_tokens(
                            premise_tokens_val[i], 'tokens')
                        premise_str = " ".join(premise_str)
                        hypothesis_str = vocab.convert_indexes_to_tokens(
                            hypothesis_tokens_val[i], 'tokens')
                        hypothesis_str = " ".join(hypothesis_str)

                        if task_type == 'multilabel' or task_type == 'topk':
                            predictions = [[] for i in range(num_batch)]
                            for (row,
                                 col) in np.argwhere(predictions_val == 1):
                                predictions[row].append(col)
                            true_labels = [[] for i in range(num_batch)]
                            for row, col in np.argwhere(true_label_val == 1):
                                true_labels[row].append(col)
                        else:
                            predictions = predictions_val
                            true_labels = true_label_val

                        true_label = true_labels[i]
                        predict = predictions[i]
                        prob = probs[i]
                        if task_type == 'multiclass':
                            tsv_str = "\t".join([
                                premise_str, hypothesis_str,
                                vocab.get_index_token(true_label,
                                                      namespace='labels'),
                                vocab.get_index_token(predict,
                                                      namespace='labels'),
                                str(prob)
                            ])
                        elif task_type == 'multilabel' or task_type == 'topk':
                            tsv_str = "\t".join([
                                premise_str, hypothesis_str, " ".join([
                                    vocab.get_index_token(l,
                                                          namespace='labels')
                                    for l in true_label
                                ]), " ".join([
                                    vocab.get_index_token(p,
                                                          namespace='labels')
                                    for p in predict
                                ]),
                                str(prob)
                            ])
                        else:
                            raise ConfigureError(
                                "Task type %s is not support for task %s. "
                                "Only multiclass and multilabel is support for task %s"
                                % (task_type, task, task))

                        # tsv_str = "\t".join([premise_str, hypothesis_str, str(true_label), str(predict), str(prob),
                        #            json.dumps(output_vals['query_embedding'][i].tolist()), json.dumps(output_vals['title_embedding'][i].tolist()),
                        #            json.dumps(output_vals['query_lstm_1'][i].tolist()), json.dumps(output_vals['title_lstm_1'][i].tolist()),
                        #            json.dumps(output_vals['query_attention'][i].tolist()), json.dumps(output_vals['title_attention'][i].tolist()),
                        #            json.dumps(output_vals['query_lstm_2'][i].tolist()), json.dumps(output_vals['title_lstm_2'][i].tolist()),
                        #            json.dumps(output_vals['fc1'][i].tolist()), json.dumps(output_vals['fc2'][i].tolist())
                        #                      ])
                        tsv_file.write(tsv_str + "\n")

                    # print("process %s/%s correct/total instances with accuracy %s." % (accuracy, total_num, accuracy/float(total_num)))
                except tf.errors.OutOfRangeError as e:
                    logger.info("processed all the evalutation data")
                    break

            # logger.warning(e)
            y_true = np.concatenate(y_true, axis=0)
            y_pred = np.concatenate(y_pred, axis=0)
            avg_param = 'micro'
            if num_classes == 2:
                avg_param = 'binary'
            accuracy = metrics.accuracy_score(y_true,
                                              y_pred)  # accuracy/total_num
            precise, recall, f1score, support = metrics.precision_recall_fscore_support(
                y_true, y_pred, labels=labels, average=avg_param)
            if task_type == 'multiclass':
                confusion_matrix = metrics.confusion_matrix(y_true,
                                                            y_pred,
                                                            labels=labels)
                print("metrics:")
                confmx_str = "label \ predict "
                for i in range(num_classes):
                    confmx_str += "| %s | " % vocab.get_index_token(
                        i, namespace='labels')
                confmx_str += "\n"
                for i in range(num_classes):
                    confmx_str += "| %s | " % vocab.get_index_token(
                        i, namespace='labels')
                    for j in range(num_classes):
                        confmx_str += "| %s | " % confusion_matrix[i][j]
                    confmx_str += "\n"

                print(confmx_str)

            elif task_type == 'multilabel' or task_type == 'topk':
                confusion_matrix = metrics.multilabel_confusion_matrix(
                    y_true, y_pred)
                print("metrics:")
                for k in range(num_classes):
                    print("confusion matrix for label %s" %
                          vocab.get_index_token(k, namespace='labels'))
                    confmx_str = "label \ predict "
                    for i in range(2):
                        confmx_str += "| %s | " % i
                    confmx_str += "\n"
                    for i in range(2):
                        confmx_str += "| %s | " % i
                        for j in range(2):
                            confmx_str += "| %s | " % confusion_matrix[k][i][j]
                        confmx_str += "\n"

                    print(confmx_str)

            else:
                raise ConfigureError(
                    "Task type %s is not support for task %s. "
                    "Only multiclass and multilabel is support for task %s" %
                    (task_type, task, task))
            # confusion_matrix[1][1]/(confusion_matrix[0][1]+confusion_matrix[1][1])
            # recall = confusion_matrix[1][1]/(confusion_matrix[1][0]+confusion_matrix[1][1])
            # f1score = (precise+recall)/2
            print("micro total accuracy precise recall f1-score")
            print(
                "accuracy: %.2f, precise: %.2f, recall: %.2f, f1-score: %.2f" %
                (accuracy, precise, recall, f1score))

            precisions, recalls, fbeta_scores, supports = metrics.precision_recall_fscore_support(
                y_true, y_pred, labels=labels)
            print("accuracy precise recall f1-score for each class")
            print(
                '======================================================================================'
            )
            for lab_idx, (precision, recall, fbeta_score,
                          support) in enumerate(
                              zip(precisions, recalls, fbeta_scores,
                                  supports)):
                print(
                    "label:%s\tprecision:%.2f\trecall:%.2f\tf1-score:%.2f\tsupport:%.2f"
                    % (vocab.get_index_token(lab_idx, namespace='labels'),
                       precision, recall, fbeta_score, support))
            # legend = ["label \ predict "]
            # for i in range(num_classes):
            #     legend.append(str(i))
            # ws.append(legend)
            # for i in range(num_classes):
            #     row = [str(i)]
            #     for j in range(num_classes):
            #         row.append(str(confusion_matrix[i][j]))
            #     ws.append(row)
            # ws.append([])
            # ws.append([])
            # ws.append(['accuracy', 'precise', 'recall', 'f1-score'])
            # ws.append([str(accuracy), str(precise), str(recall), str(f1score)])
            # if output_file:
            #     if not output_file.endswith(".xlsx"):
            #         output_file += '.xlsx'
            #     wb.save(output_file)
            tsv_file.close()

Example #30

Show file

File: duet.py Project: wushanzha/SemMatch

    def forward(self, features, labels, mode, params):
        features_embedding = self._embedding_mapping.forward(features, labels, mode, params)
        with tf.variable_scope(self._model_name):
            is_training = (mode == tf.estimator.ModeKeys.TRAIN)

            premise_tokens_ids = features.get('premise/tokens', None)
            if premise_tokens_ids is None:
                premise_tokens_ids = features.get('premise/elmo_characters', None)
            hypothesis_tokens_ids = features.get('hypothesis/tokens', None)
            if hypothesis_tokens_ids is None:
                hypothesis_tokens_ids = features.get('hypothesis/elmo_characters', None)

            if premise_tokens_ids is None:
                raise ConfigureError("The input features should contain premise with vocabulary namespace tokens "
                                     "or elmo_characters.")
            if hypothesis_tokens_ids is None:
                raise ConfigureError("The input features should contain hypothesis with vocabulary namespace tokens "
                                     "or elmo_characters.")

            prem_seq_lengths, prem_mask = nn.length(premise_tokens_ids)
            hyp_seq_lengths, hyp_mask = nn.length(hypothesis_tokens_ids)
            if features.get('premise/elmo_characters', None) is not None or isinstance(self._embedding_mapping.get_encoder('tokens'), Bert):
                prem_mask = nn.remove_bos_eos(prem_mask, prem_seq_lengths)
                prem_seq_lengths -= 2
            if features.get('hypothesis/elmo_characters', None) is not None or isinstance(self._embedding_mapping.get_encoder('tokens'), Bert):
                hyp_mask = nn.remove_bos_eos(hyp_mask, hyp_seq_lengths)
                hyp_seq_lengths -= 2

            prem_mask = tf.expand_dims(prem_mask, -1)
            hyp_mask = tf.expand_dims(hyp_mask, -1)

            premise_tokens = features_embedding.get('premise/tokens', None)
            if premise_tokens is None:
                premise_tokens = features_embedding.get('premise/elmo_characters', None)
            hypothesis_tokens = features_embedding.get('hypothesis/tokens', None)
            if hypothesis_tokens is None:
                hypothesis_tokens = features_embedding.get('hypothesis/elmo_characters', None)

            lm_xor = keras.layers.Lambda(self._xor_match)([premise_tokens_ids, hypothesis_tokens_ids])
            lm_conv = keras.layers.Conv1D(
                self._lm_filters,
                premise_tokens_ids.shape[1].value,
                padding='valid',
                activation=self._activation_func
            )(lm_xor)

            lm_conv = keras.layers.Dropout(self._dropout_rate)(
                lm_conv, training=is_training)
            lm_feat = keras.layers.Reshape((lm_conv.shape[2].value, ))(lm_conv)
            for hidden_size in self._lm_hidden_sizes:
                lm_feat = keras.layers.Dense(
                    hidden_size,
                    activation=self._activation_func
                )(lm_feat)
            lm_drop = keras.layers.Dropout(self._dropout_rate)(
                lm_feat, training=is_training)
            lm_score = keras.layers.Dense(1)(lm_drop)

            dm_q_conv = keras.layers.Conv1D(
                self._dm_filters,
                self._dm_kernel_size,
                padding='same',
                activation=self._activation_func
            )(premise_tokens)
            dm_q_conv = keras.layers.Dropout(self._dropout_rate)(
                dm_q_conv, training=is_training)
            dm_q_mp = keras.layers.MaxPooling1D(
                pool_size=premise_tokens_ids.shape[1].value)(dm_q_conv)
            dm_q_rep = keras.layers.Reshape((dm_q_mp.shape[2].value, ))(dm_q_mp)
            dm_q_rep = keras.layers.Dense(self._dm_q_hidden_size)(
                dm_q_rep)
            dm_q_rep = keras.layers.Lambda(lambda x: tf.expand_dims(x, 1))(
                dm_q_rep)

            dm_d_conv1 = keras.layers.Conv1D(
                self._dm_filters,
                self._dm_kernel_size,
                padding='same',
                activation=self._activation_func
            )(hypothesis_tokens)
            dm_d_conv1 = keras.layers.Dropout(self._dropout_rate)(
                dm_d_conv1, training=is_training)
            dm_d_mp = keras.layers.MaxPooling1D(
                pool_size=self._dm_d_mpool)(dm_d_conv1)
            dm_d_conv2 = keras.layers.Conv1D(
                self._dm_filters, 1,
                padding='same',
                activation=self._activation_func
            )(dm_d_mp)
            dm_d_conv2 = keras.layers.Dropout(self._dropout_rate)(
                dm_d_conv2, training=is_training)

            h_dot = dm_q_rep * dm_d_conv2 #keras.layers.Lambda(self._hadamard_dot)([dm_q_rep, dm_d_conv2])
            dm_feat = keras.layers.Reshape((h_dot.shape[1].value*h_dot.shape[2].value, ))(h_dot)
            for hidden_size in self._dm_hidden_sizes:
                dm_feat = keras.layers.Dense(hidden_size)(dm_feat)
            dm_feat_drop = keras.layers.Dropout(self._dropout_rate)(
                dm_feat, training=is_training)
            dm_score = keras.layers.Dense(1)(dm_feat_drop)

            add = keras.layers.Add()([lm_score, dm_score])

            # Get prediction
            output_dict = self._make_output(add, params)

            if mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL:
                if 'label/labels' not in features:
                    raise ConfigureError("The input features should contain label with vocabulary namespace "
                                         "labels int %s dataset."%mode)
                labels_embedding = features_embedding['label/labels']
                labels = features['label/labels']

                loss = self._make_loss(labels=labels_embedding, logits=output_dict['logits'], params=params)
                output_dict['loss'] = loss
                metrics = dict()
                metrics['accuracy'] = tf.metrics.accuracy(labels=labels, predictions=output_dict['predictions'])
                metrics['precision'] = tf.metrics.precision(labels=labels, predictions=output_dict['predictions'])
                metrics['recall'] = tf.metrics.recall(labels=labels, predictions=output_dict['predictions'])
                # metrics['map'] = tf.metrics.average_precision_at_k(labels=tf.cast(labels, tf.int64), predictions=output_dict['logits'],
                #                                                    k=2)
                # metrics['precision_1'] = tf.metrics.precision_at_k(labels=tf.cast(labels, tf.int64), predictions=output_dict['logits'],
                #                                                    k=1, class_id=1)

                    #tf.metrics.auc(labels=labels, predictions=predictions)
                output_dict['metrics'] = metrics
                # output_dict['debugs'] = [hypothesis_tokens, premise_tokens, hypothesis_bi, premise_bi,
                #                          premise_ave, hypothesis_ave, diff, mul, h, h_mlp, logits]
            return output_dict