def update_config_with_metadata(output_feature, feature_metadata, *args, **kwargs): output_feature["vocab_size"] = feature_metadata["{}_vocab_size".format( output_feature["level"])] output_feature["max_sequence_length"] = feature_metadata[ "{}_max_sequence_length".format(output_feature["level"])] if isinstance(output_feature[LOSS]["class_weights"], (list, tuple)): # [0, 0] for UNK and PAD output_feature[LOSS]["class_weights"] = [ 0, 0 ] + output_feature[LOSS]["class_weights"] if len(output_feature[LOSS] ["class_weights"]) != output_feature["vocab_size"]: raise ValueError( "The length of class_weights ({}) is not compatible with " "the number of classes ({})".format( len(output_feature[LOSS]["class_weights"]), output_feature["vocab_size"])) if output_feature[LOSS]["class_similarities_temperature"] > 0: if "class_similarities" in output_feature: distances = output_feature["class_similarities"] temperature = output_feature[LOSS][ "class_similarities_temperature"] for i in range(len(distances)): distances[i, :] = softmax(distances[i, :], temperature=temperature) output_feature[LOSS]["class_similarities"] = distances else: raise ValueError("class_similarities_temperature > 0," "but no class similarities are provided " "for feature {}".format( output_feature[COLUMN]))
def update_config_with_metadata( output_feature, feature_metadata, *args, **kwargs ): output_feature['num_classes'] = feature_metadata[ '{}_vocab_size'.format(output_feature['level']) ] output_feature['max_sequence_length'] = feature_metadata[ '{}_max_sequence_length'.format(output_feature['level']) ] if isinstance(output_feature[LOSS]['class_weights'], (list, tuple)): # [0, 0] for UNK and PAD output_feature[LOSS]['class_weights'] = ( [0, 0] + output_feature[LOSS]['class_weights'] ) if (len(output_feature[LOSS]['class_weights']) != output_feature['num_classes']): raise ValueError( 'The length of class_weights ({}) is not compatible with ' 'the number of classes ({})'.format( len(output_feature[LOSS]['class_weights']), output_feature['num_classes'] ) ) if output_feature[LOSS]['class_similarities_temperature'] > 0: if 'class_similarities' in output_feature: distances = output_feature['class_similarities'] temperature = output_feature[LOSS][ 'class_similarities_temperature'] for i in range(len(distances)): distances[i, :] = softmax( distances[i, :], temperature=temperature ) output_feature[LOSS]['class_similarities'] = distances else: raise ValueError( 'class_similarities_temperature > 0,' 'but no class similarities are provided ' 'for feature {}'.format(output_feature[NAME]) ) if output_feature[LOSS][TYPE] == 'sampled_softmax_cross_entropy': level_str2freq = '{}_str2freq'.format(output_feature['level']) level_idx2str = '{}_idx2str'.format(output_feature['level']) output_feature[LOSS]['class_counts'] = [ feature_metadata[level_str2freq][cls] for cls in feature_metadata[level_idx2str] ]
def update_model_definition_with_metadata( output_feature, feature_metadata, *args, **kwargs ): output_feature['num_classes'] = feature_metadata['vocab_size'] output_feature['max_sequence_length'] = ( feature_metadata['max_sequence_length'] ) if isinstance(output_feature[LOSS]['class_weights'], (list, tuple)): output_feature[LOSS]['class_weights'] = ( [0, 0] + output_feature[LOSS]['class_weights'] ) # for UNK and PAD if (len(output_feature[LOSS]['class_weights']) != output_feature['num_classes']): raise ValueError( 'The length of class_weights ({}) is not compatible with ' 'the number of classes ({})'.format( len(output_feature[LOSS]['class_weights']), output_feature['num_classes'] ) ) if output_feature[LOSS]['class_distance_temperature'] > 0: if 'distances' in feature_metadata: distances = feature_metadata['distances'] temperature = output_feature[LOSS]['class_distance_temperature'] for i in range(len(distances)): distances[i, :] = softmax( distances[i, :], temperature=temperature ) output_feature[LOSS]['distances'] = distances else: raise ValueError( 'No class distance metadata available ' 'for feature {}'.format(output_feature['name']) ) if output_feature[LOSS]['type'] == 'sampled_softmax_cross_entropy': output_feature[LOSS]['class_counts'] = [( feature_metadata['str2freq'][cls] for cls in feature_metadata['idx2str'] )]
def update_config_with_metadata(output_feature, feature_metadata, *args, **kwargs): output_feature['num_classes'] = feature_metadata['vocab_size'] output_feature['top_k'] = min(output_feature['num_classes'], output_feature['top_k']) if isinstance(output_feature[LOSS]['class_weights'], (list, tuple)): if (len(output_feature[LOSS]['class_weights']) != output_feature['num_classes']): raise ValueError( 'The length of class_weights ({}) is not compatible with ' 'the number of classes ({}) for feature {}. ' 'Check the metadata JSON file to see the classes ' 'and their order and consider there needs to be a weight ' 'for the <UNK> class too.'.format( len(output_feature[LOSS]['class_weights']), output_feature['num_classes'], output_feature[COLUMN])) if isinstance(output_feature[LOSS]['class_weights'], dict): if (feature_metadata['str2idx'].keys() != output_feature[LOSS]['class_weights'].keys()): raise ValueError( 'The class_weights keys ({}) are not compatible with ' 'the classes ({}) of feature {}. ' 'Check the metadata JSON file to see the classes ' 'and consider there needs to be a weight ' 'for the <UNK> class too.'.format( output_feature[LOSS]['class_weights'].keys(), feature_metadata['str2idx'].keys(), output_feature[COLUMN])) else: class_weights = output_feature[LOSS]['class_weights'] idx2str = feature_metadata['idx2str'] class_weights_list = [class_weights[s] for s in idx2str] output_feature[LOSS]['class_weights'] = class_weights_list if output_feature[LOSS]['class_similarities_temperature'] > 0: if 'class_similarities' in output_feature[LOSS]: similarities = output_feature[LOSS]['class_similarities'] temperature = output_feature[LOSS][ 'class_similarities_temperature'] curr_row = 0 first_row_length = 0 is_first_row = True for row in similarities: if is_first_row: first_row_length = len(row) is_first_row = False curr_row += 1 else: curr_row_length = len(row) if curr_row_length != first_row_length: raise ValueError( 'The length of row {} of the class_similarities ' 'of {} is {}, different from the length of ' 'the first row {}. All rows must have ' 'the same length.'.format( curr_row, output_feature[COLUMN], curr_row_length, first_row_length)) else: curr_row += 1 all_rows_length = first_row_length if all_rows_length != len(similarities): raise ValueError('The class_similarities matrix of {} has ' '{} rows and {} columns, ' 'their number must be identical.'.format( output_feature[COLUMN], len(similarities), all_rows_length)) if all_rows_length != output_feature['num_classes']: raise ValueError( 'The size of the class_similarities matrix of {} is ' '{}, different from the number of classe ({}). ' 'Check the metadata JSON file to see the classes ' 'and their order and ' 'consider <UNK> class too.'.format( output_feature[COLUMN], all_rows_length, output_feature['num_classes'])) similarities = np.array(similarities, dtype=np.float32) for i in range(len(similarities)): similarities[i, :] = softmax(similarities[i, :], temperature=temperature) output_feature[LOSS]['class_similarities'] = similarities else: raise ValueError('class_similarities_temperature > 0, ' 'but no class_similarities are provided ' 'for feature {}'.format( output_feature[COLUMN])) if output_feature[LOSS][TYPE] == 'sampled_softmax_cross_entropy': output_feature[LOSS]['class_counts'] = [ feature_metadata['str2freq'][cls] for cls in feature_metadata['idx2str'] ]
def update_config_with_metadata(output_feature, feature_metadata, *args, **kwargs): output_feature["vocab_size"] = feature_metadata["vocab_size"] output_feature["max_sequence_length"] = feature_metadata[ "max_sequence_length"] if isinstance(output_feature[LOSS]["class_weights"], (list, tuple)): if len(output_feature[LOSS] ["class_weights"]) != output_feature["vocab_size"]: raise ValueError( "The length of class_weights ({}) is not compatible with " "the number of classes ({}) for feature {}. " "Check the metadata JSON file to see the classes " "and their order and consider there needs to be a weight " "for the <UNK> and <PAD> class too.".format( len(output_feature[LOSS]["class_weights"]), output_feature["vocab_size"], output_feature[COLUMN], )) if output_feature[LOSS]["class_similarities_temperature"] > 0: if "class_similarities" in output_feature[LOSS]: similarities = output_feature[LOSS]["class_similarities"] temperature = output_feature[LOSS][ "class_similarities_temperature"] curr_row = 0 first_row_length = 0 is_first_row = True for row in similarities: if is_first_row: first_row_length = len(row) is_first_row = False curr_row += 1 else: curr_row_length = len(row) if curr_row_length != first_row_length: raise ValueError( "The length of row {} of the class_similarities " "of {} is {}, different from the length of " "the first row {}. All rows must have " "the same length.".format( curr_row, output_feature[COLUMN], curr_row_length, first_row_length)) else: curr_row += 1 all_rows_length = first_row_length if all_rows_length != len(similarities): raise ValueError("The class_similarities matrix of {} has " "{} rows and {} columns, " "their number must be identical.".format( output_feature[COLUMN], len(similarities), all_rows_length)) if all_rows_length != output_feature["vocab_size"]: raise ValueError( "The size of the class_similarities matrix of {} is " "{}, different from the number of classes ({}). " "Check the metadata JSON file to see the classes " "and their order and " "consider <UNK> and <PAD> class too.".format( output_feature[COLUMN], all_rows_length, output_feature["vocab_size"])) similarities = np.array(similarities, dtype=np.float32) for i in range(len(similarities)): similarities[i, :] = softmax(similarities[i, :], temperature=temperature) output_feature[LOSS]["class_similarities"] = similarities else: raise ValueError("class_similarities_temperature > 0, " "but no class_similarities are provided " "for feature {}".format( output_feature[COLUMN]))
def _predictions_training(self, inputs, training=None): # not executed # inputs == logits probs = softmax(inputs) preds = tf.argmax(inputs) return {PREDICTIONS: preds, PROBABILITIES: probs}