Beispiel #1
0
    def update_config_with_metadata(output_feature, feature_metadata, *args,
                                    **kwargs):
        output_feature["vocab_size"] = feature_metadata["{}_vocab_size".format(
            output_feature["level"])]
        output_feature["max_sequence_length"] = feature_metadata[
            "{}_max_sequence_length".format(output_feature["level"])]
        if isinstance(output_feature[LOSS]["class_weights"], (list, tuple)):
            # [0, 0] for UNK and PAD
            output_feature[LOSS]["class_weights"] = [
                0, 0
            ] + output_feature[LOSS]["class_weights"]
            if len(output_feature[LOSS]
                   ["class_weights"]) != output_feature["vocab_size"]:
                raise ValueError(
                    "The length of class_weights ({}) is not compatible with "
                    "the number of classes ({})".format(
                        len(output_feature[LOSS]["class_weights"]),
                        output_feature["vocab_size"]))

        if output_feature[LOSS]["class_similarities_temperature"] > 0:
            if "class_similarities" in output_feature:
                distances = output_feature["class_similarities"]
                temperature = output_feature[LOSS][
                    "class_similarities_temperature"]
                for i in range(len(distances)):
                    distances[i, :] = softmax(distances[i, :],
                                              temperature=temperature)
                output_feature[LOSS]["class_similarities"] = distances
            else:
                raise ValueError("class_similarities_temperature > 0,"
                                 "but no class similarities are provided "
                                 "for feature {}".format(
                                     output_feature[COLUMN]))
Beispiel #2
0
    def update_config_with_metadata(
            output_feature,
            feature_metadata,
            *args,
            **kwargs
    ):
        output_feature['num_classes'] = feature_metadata[
            '{}_vocab_size'.format(output_feature['level'])
        ]
        output_feature['max_sequence_length'] = feature_metadata[
            '{}_max_sequence_length'.format(output_feature['level'])
        ]
        if isinstance(output_feature[LOSS]['class_weights'], (list, tuple)):
            # [0, 0] for UNK and PAD
            output_feature[LOSS]['class_weights'] = (
                    [0, 0] + output_feature[LOSS]['class_weights']
            )
            if (len(output_feature[LOSS]['class_weights']) !=
                    output_feature['num_classes']):
                raise ValueError(
                    'The length of class_weights ({}) is not compatible with '
                    'the number of classes ({})'.format(
                        len(output_feature[LOSS]['class_weights']),
                        output_feature['num_classes']
                    )
                )

        if output_feature[LOSS]['class_similarities_temperature'] > 0:
            if 'class_similarities' in output_feature:
                distances = output_feature['class_similarities']
                temperature = output_feature[LOSS][
                    'class_similarities_temperature']
                for i in range(len(distances)):
                    distances[i, :] = softmax(
                        distances[i, :],
                        temperature=temperature
                    )
                output_feature[LOSS]['class_similarities'] = distances
            else:
                raise ValueError(
                    'class_similarities_temperature > 0,'
                    'but no class similarities are provided '
                    'for feature {}'.format(output_feature[NAME])
                )

        if output_feature[LOSS][TYPE] == 'sampled_softmax_cross_entropy':
            level_str2freq = '{}_str2freq'.format(output_feature['level'])
            level_idx2str = '{}_idx2str'.format(output_feature['level'])
            output_feature[LOSS]['class_counts'] = [
                feature_metadata[level_str2freq][cls]
                for cls in feature_metadata[level_idx2str]
            ]
    def update_model_definition_with_metadata(
            output_feature,
            feature_metadata,
            *args,
            **kwargs
    ):
        output_feature['num_classes'] = feature_metadata['vocab_size']
        output_feature['max_sequence_length'] = (
            feature_metadata['max_sequence_length']
        )
        if isinstance(output_feature[LOSS]['class_weights'], (list, tuple)):
            output_feature[LOSS]['class_weights'] = (
                    [0, 0] + output_feature[LOSS]['class_weights']

            )
            # for UNK and PAD
            if (len(output_feature[LOSS]['class_weights']) !=
                    output_feature['num_classes']):
                raise ValueError(
                    'The length of class_weights ({}) is not compatible with '
                    'the number of classes ({})'.format(
                        len(output_feature[LOSS]['class_weights']),
                        output_feature['num_classes']
                    )
                )

        if output_feature[LOSS]['class_distance_temperature'] > 0:
            if 'distances' in feature_metadata:
                distances = feature_metadata['distances']
                temperature = output_feature[LOSS]['class_distance_temperature']
                for i in range(len(distances)):
                    distances[i, :] = softmax(
                        distances[i, :],
                        temperature=temperature
                    )
                output_feature[LOSS]['distances'] = distances
            else:
                raise ValueError(
                    'No class distance metadata available '
                    'for feature {}'.format(output_feature['name'])
                )

        if output_feature[LOSS]['type'] == 'sampled_softmax_cross_entropy':
            output_feature[LOSS]['class_counts'] = [(
                feature_metadata['str2freq'][cls]
                for cls in feature_metadata['idx2str']
            )]
Beispiel #4
0
    def update_config_with_metadata(output_feature, feature_metadata, *args,
                                    **kwargs):
        output_feature['num_classes'] = feature_metadata['vocab_size']
        output_feature['top_k'] = min(output_feature['num_classes'],
                                      output_feature['top_k'])

        if isinstance(output_feature[LOSS]['class_weights'], (list, tuple)):
            if (len(output_feature[LOSS]['class_weights']) !=
                    output_feature['num_classes']):
                raise ValueError(
                    'The length of class_weights ({}) is not compatible with '
                    'the number of classes ({}) for feature {}. '
                    'Check the metadata JSON file to see the classes '
                    'and their order and consider there needs to be a weight '
                    'for the <UNK> class too.'.format(
                        len(output_feature[LOSS]['class_weights']),
                        output_feature['num_classes'], output_feature[COLUMN]))

        if isinstance(output_feature[LOSS]['class_weights'], dict):
            if (feature_metadata['str2idx'].keys() !=
                    output_feature[LOSS]['class_weights'].keys()):
                raise ValueError(
                    'The class_weights keys ({}) are not compatible with '
                    'the classes ({}) of feature {}. '
                    'Check the metadata JSON file to see the classes '
                    'and consider there needs to be a weight '
                    'for the <UNK> class too.'.format(
                        output_feature[LOSS]['class_weights'].keys(),
                        feature_metadata['str2idx'].keys(),
                        output_feature[COLUMN]))
            else:
                class_weights = output_feature[LOSS]['class_weights']
                idx2str = feature_metadata['idx2str']
                class_weights_list = [class_weights[s] for s in idx2str]
                output_feature[LOSS]['class_weights'] = class_weights_list

        if output_feature[LOSS]['class_similarities_temperature'] > 0:
            if 'class_similarities' in output_feature[LOSS]:
                similarities = output_feature[LOSS]['class_similarities']
                temperature = output_feature[LOSS][
                    'class_similarities_temperature']

                curr_row = 0
                first_row_length = 0
                is_first_row = True
                for row in similarities:
                    if is_first_row:
                        first_row_length = len(row)
                        is_first_row = False
                        curr_row += 1
                    else:
                        curr_row_length = len(row)
                        if curr_row_length != first_row_length:
                            raise ValueError(
                                'The length of row {} of the class_similarities '
                                'of {} is {}, different from the length of '
                                'the first row {}. All rows must have '
                                'the same length.'.format(
                                    curr_row, output_feature[COLUMN],
                                    curr_row_length, first_row_length))
                        else:
                            curr_row += 1
                all_rows_length = first_row_length

                if all_rows_length != len(similarities):
                    raise ValueError('The class_similarities matrix of {} has '
                                     '{} rows and {} columns, '
                                     'their number must be identical.'.format(
                                         output_feature[COLUMN],
                                         len(similarities), all_rows_length))

                if all_rows_length != output_feature['num_classes']:
                    raise ValueError(
                        'The size of the class_similarities matrix of {} is '
                        '{}, different from the number of classe ({}). '
                        'Check the metadata JSON file to see the classes '
                        'and their order and '
                        'consider <UNK> class too.'.format(
                            output_feature[COLUMN], all_rows_length,
                            output_feature['num_classes']))

                similarities = np.array(similarities, dtype=np.float32)
                for i in range(len(similarities)):
                    similarities[i, :] = softmax(similarities[i, :],
                                                 temperature=temperature)

                output_feature[LOSS]['class_similarities'] = similarities
            else:
                raise ValueError('class_similarities_temperature > 0, '
                                 'but no class_similarities are provided '
                                 'for feature {}'.format(
                                     output_feature[COLUMN]))

        if output_feature[LOSS][TYPE] == 'sampled_softmax_cross_entropy':
            output_feature[LOSS]['class_counts'] = [
                feature_metadata['str2freq'][cls]
                for cls in feature_metadata['idx2str']
            ]
Beispiel #5
0
    def update_config_with_metadata(output_feature, feature_metadata, *args,
                                    **kwargs):
        output_feature["vocab_size"] = feature_metadata["vocab_size"]
        output_feature["max_sequence_length"] = feature_metadata[
            "max_sequence_length"]
        if isinstance(output_feature[LOSS]["class_weights"], (list, tuple)):
            if len(output_feature[LOSS]
                   ["class_weights"]) != output_feature["vocab_size"]:
                raise ValueError(
                    "The length of class_weights ({}) is not compatible with "
                    "the number of classes ({}) for feature {}. "
                    "Check the metadata JSON file to see the classes "
                    "and their order and consider there needs to be a weight "
                    "for the <UNK> and <PAD> class too.".format(
                        len(output_feature[LOSS]["class_weights"]),
                        output_feature["vocab_size"],
                        output_feature[COLUMN],
                    ))

        if output_feature[LOSS]["class_similarities_temperature"] > 0:
            if "class_similarities" in output_feature[LOSS]:
                similarities = output_feature[LOSS]["class_similarities"]
                temperature = output_feature[LOSS][
                    "class_similarities_temperature"]

                curr_row = 0
                first_row_length = 0
                is_first_row = True
                for row in similarities:
                    if is_first_row:
                        first_row_length = len(row)
                        is_first_row = False
                        curr_row += 1
                    else:
                        curr_row_length = len(row)
                        if curr_row_length != first_row_length:
                            raise ValueError(
                                "The length of row {} of the class_similarities "
                                "of {} is {}, different from the length of "
                                "the first row {}. All rows must have "
                                "the same length.".format(
                                    curr_row, output_feature[COLUMN],
                                    curr_row_length, first_row_length))
                        else:
                            curr_row += 1
                all_rows_length = first_row_length

                if all_rows_length != len(similarities):
                    raise ValueError("The class_similarities matrix of {} has "
                                     "{} rows and {} columns, "
                                     "their number must be identical.".format(
                                         output_feature[COLUMN],
                                         len(similarities), all_rows_length))

                if all_rows_length != output_feature["vocab_size"]:
                    raise ValueError(
                        "The size of the class_similarities matrix of {} is "
                        "{}, different from the number of classes ({}). "
                        "Check the metadata JSON file to see the classes "
                        "and their order and "
                        "consider <UNK> and <PAD> class too.".format(
                            output_feature[COLUMN], all_rows_length,
                            output_feature["vocab_size"]))

                similarities = np.array(similarities, dtype=np.float32)
                for i in range(len(similarities)):
                    similarities[i, :] = softmax(similarities[i, :],
                                                 temperature=temperature)
                output_feature[LOSS]["class_similarities"] = similarities
            else:
                raise ValueError("class_similarities_temperature > 0, "
                                 "but no class_similarities are provided "
                                 "for feature {}".format(
                                     output_feature[COLUMN]))
Beispiel #6
0
 def _predictions_training(self, inputs, training=None):  # not executed
     # inputs == logits
     probs = softmax(inputs)
     preds = tf.argmax(inputs)
     return {PREDICTIONS: preds, PROBABILITIES: probs}