Python read_label_file Examples, changed_scripts.dataset_utils.read_label_file Python Examples

Example #1

0

Show file

        possible_channels = int(
            graph.get_tensor_by_name("import/" + layer + ':0').get_shape()[-1])

        if channel == 0:
            print(
                "You are going to show all %s channels. This may take a while"
                % (possible_channels))
            for channel_i in range(possible_channels):
                used_method(T(used_layer)[:, :, :, channel_i],
                            filename=filename)
        elif channel > 0:
            print(used_layer, channel)
            used_method(T(used_layer)[:, :, :, channel - 1], filename=filename)
        else:
            print(
                "You are going to show %s of %s possible channels. This may take a while"
                % (possible_channels))
            no_channels = math.fabs(channel)
            for i in range(no_channels):
                channel_i = randint(0, possible_channels)
                used_method(T(used_layer)[:, :, :, channel_i],
                            filename=filename)

    elif opts.adjust_most_accurate == "adjust":
        layer = 'my_fc_1/BiasAdd'

        most_acc = dataset_utils.read_label_file("mydata/labels",
                                                 "most_accurate_images.txt")

        adjust_most_accurate(most_acc)

Example #2

0

Show file

File: layerwise_generalization.py Project: ynalcakan/FlowerIdentification

import my_resnet_preprocessing
import changed_scripts.dataset_utils

from tensorflow.contrib.framework.python.ops import variables

##########################################################################
##########################################################################

flowers_data_dir = 'mydata/joinedDataset' 

num_families = 124
num_genus= 516
num_species = 1000
num_organs = 7

labels_dict =  dataset_utils.read_label_file("mydata/PlantClefTraining2015", "labels.txt")
class_id_to_family = my_functions.my_read_label_file("mydata/labels", "class_id_to_family.txt")
family_to_one_hot = my_functions.my_read_label_file("mydata/labels", "family_one_hot.txt")
class_id_to_genus = my_functions.my_read_label_file("mydata/labels", "class_id_to_genus.txt")
genus_to_one_hot = my_functions.my_read_label_file("mydata/labels", "genus_one_hot.txt")
class_id_to_species = my_functions.my_read_label_file("mydata/labels", "class_id_to_species.txt")
species_to_one_hot = my_functions.my_read_label_file("mydata/labels", "species_one_hot.txt")


##########################################################################
##########################################################################

def load_batch_intermediate(dataset, batch_size=28, height=224, width=224, is_training=False): 
    """Loads a single batch of data.
    
    Args:

Example #3

0

Show file

File: dataVisualisation.py Project: ynalcakan/FlowerIdentification

def get_split(split_name,
              dataset_dir,
              file_pattern=None,
              reader=None,
              label_type="multiple"):
    """Gets a dataset tuple with instructions for reading flowers.

  Args:
    split_name: A train/validation split name.
    dataset_dir: The base directory of the dataset sources.
    file_pattern: The file pattern to use when matching the dataset sources.
      It is assumed that the pattern contains a '%s' string so that the split
      name can be inserted.
    reader: The TensorFlow reader type.
    label_type: Do you want to use the Dataset with 'multiple' labels (organ, family, genus, species) or with 'one' label (species)

  Returns:
    A `Dataset` namedtuple.

  Raises:
    ValueError: if `split_name` is not a valid train/validation split.
  """
    if split_name not in SPLITS_TO_SIZES:
        raise ValueError('split name %s was not recognized.' % split_name)

    if not file_pattern:
        file_pattern = _FILE_PATTERN
    file_pattern = os.path.join(dataset_dir, file_pattern % split_name)

    # Allowing None in the signature so that dataset_factory can use the default.
    if reader is None:
        reader = tf.TFRecordReader

    if label_type == "multiple":

        keys_to_features = {
            'image/encoded':
            tf.FixedLenFeature((), tf.string, default_value=''),
            'image/format':
            tf.FixedLenFeature((), tf.string, default_value='jpg'),
            'image/class/label_species':
            tf.FixedLenFeature([],
                               tf.int64,
                               default_value=tf.zeros([], dtype=tf.int64)),
            'image/class/label_genus':
            tf.FixedLenFeature([],
                               tf.int64,
                               default_value=tf.zeros([], dtype=tf.int64)),
            'image/class/label_family':
            tf.FixedLenFeature([],
                               tf.int64,
                               default_value=tf.zeros([], dtype=tf.int64)),
            'image/class/label_organ':
            tf.FixedLenFeature([],
                               tf.int64,
                               default_value=tf.zeros([], dtype=tf.int64)),
        }

        items_to_handlers = {
            'image':
            slim.tfexample_decoder.Image(),
            'label_species':
            slim.tfexample_decoder.Tensor('image/class/label_species'),
            'label_genus':
            slim.tfexample_decoder.Tensor('image/class/label_genus'),
            'label_family':
            slim.tfexample_decoder.Tensor('image/class/label_family'),
            'label_organ':
            slim.tfexample_decoder.Tensor('image/class/label_organ'),
        }

    elif label_type == "one":
        keys_to_features = {
            'image/encoded':
            tf.FixedLenFeature((), tf.string, default_value=''),
            'image/format':
            tf.FixedLenFeature((), tf.string, default_value='jpg'),
            'image/class/label':
            tf.FixedLenFeature([],
                               tf.int64,
                               default_value=tf.zeros([], dtype=tf.int64)),
        }

        items_to_handlers = {
            'image': slim.tfexample_decoder.Image(),
            'label': slim.tfexample_decoder.Tensor('image/class/label'),
        }

    decoder = slim.tfexample_decoder.TFExampleDecoder(keys_to_features,
                                                      items_to_handlers)

    labels_to_names = None
    if dataset_utils.has_labels(dataset_dir):
        labels_to_names = dataset_utils.read_label_file(dataset_dir)

    return slim.dataset.Dataset(data_sources=file_pattern,
                                reader=reader,
                                decoder=decoder,
                                num_samples=SPLITS_TO_SIZES[split_name],
                                items_to_descriptions=_ITEMS_TO_DESCRIPTIONS,
                                num_classes=_NUM_CLASSES,
                                labels_to_names=labels_to_names)

Example #4

0

Show file

File: train_my_network.py Project: ynalcakan/FlowerIdentification

def final_evaluation_generic(label_dir, dataset_dir, checkpoint_paths, preprocessing_methods, filename="predictions.txt"):
    """
    Evaulates a CNN on the test-set and saves the predictions in the form  <ImageId;ClassId;Probability> into a txt-file (filename)
    Can use multiple models, is not limited to 3
    
    Args:
        label_dir: Directory where labels dictionary can be found (mapping from one-hot encodings to class_id)
        dataset_dir: Directory where test dataset can be found
        checkpoint_paths: checkpoints of the used models
        preprocessing_methods: corresponding preprocessing methods for the models
        filename: filename of txt-file to save predictions
        
    Returns:
        Saves the predictions into "filename"
    """
    number_images =8000
        

    output_list = []
    labels_list = []
    
    for checkpoint, preprocessing_method in zip(checkpoint_paths, preprocessing_methods):
        with tf.Graph().as_default() as graph:
            dataset = dataVisualisation.get_split('test_set', dataset_dir,label_type="one")
            data_provider = slim.dataset_data_provider.DatasetDataProvider(dataset, 
                                                                           shuffle=False,
                                                                           common_queue_capacity=8000,
                                                                           common_queue_min=0)
            
            image_raw, label = data_provider.get(['image', 'label'])
                            

            
            # Preprocessing return original image, center_crop and 4 corner crops with adjusted color values
            image  = preprocessing_method(image_raw, 224, 224) 
            
            images, labels = tf.train.batch([image,  label], 
                                            batch_size=1,
                                            num_threads=1,
                                            capacity=2 * 1)
            
            
            
            
            logits1 = resNetClassifier.my_cnn(images, is_training = False, dropout_rate =1)

            total_output = np.empty([number_images * 1, dataset.num_classes])
            total_labels = np.empty([number_images * 1], dtype=np.int32)
            offset = 0
            
            with tf.Session() as sess:
                coord = tf.train.Coordinator()
                saver = tf.train.Saver()
                saver.restore(sess, checkpoint)
       

           
                
                
                threads = tf.train.start_queue_runners(sess=sess, coord=coord)
                for i in range(number_images):
                    #print('step: %d/%d' % (i+1, number_images))
                           
                    logit1, media_id = sess.run([logits1, labels])
                    
                    media_id = media_id[0]

                                       
                    # Passing logits through softmax function to receive "probabilities"
                    logits = my_functions.numpy_softmax(logit1)
                    
                    
                    total_output[offset:offset + 1] = logits
                    total_labels[offset:offset + 1] = media_id
                    offset += 1
                coord.request_stop()
                coord.join(threads)

            output_list.append(total_output)
            labels_list.append(total_labels)

            
    with tf.gfile.Open(filename, 'a') as f:
        for i in range(number_images):
            image_id = labels_list[0][i]
            
            for p in range(1000):
                predictions = []
                for index in range(len(output_list)):
                    predictions.append(output_list[index][i][p])
                
                probability = np.sum(predictions)/len(predictions) #np.amax(predictions)
                
                class_id = dataset_utils.read_label_file(label_dir)[p]
                f.write('%s;%s;%f\n' % (image_id, class_id, probability)) # <ImageId;ClassId;Probability>

Example #5

0

Show file

File: train_my_network.py Project: ynalcakan/FlowerIdentification

def final_evaluation(label_dir, dataset_dir, filename="predictions.txt", visualize_kernel = False):
    """
    Evaulates a CNN on the test-set and saves the predictions in the form  <ImageId;ClassId;Probability> into a txt-file (filename)
    
    Args:
        label_dir: Directory where labels dictionary can be found (mapping from one-hot encodings to class_id)
        dataset_dir: Directory where test dataset can be found
        filename: filename of txt-file to save predictions
        visualize_kernel: Do you want to visualize the first layer of convolutions?
        
    Returns:
        Saves the predictions into "filename"
    """
    number_images =8000
    
    # Choose the three networks to evaluate on
    checkpoint_paths = [ "mydata/resnet_finetuned_plantclef2015_5/model.ckpt-150000",  "mydata/resnet_finetuned_plantclef2015_6/model.ckpt-150000","mydata/resnet_finetuned_plantclef2015_7/model.ckpt-102500"]
    
    output_list = []
    labels_list = []
    
    for index in range(len(checkpoint_paths)):
        with tf.Graph().as_default() as graph:
            dataset = dataVisualisation.get_split('test_set', dataset_dir,label_type="one")
            
            data_provider = slim.dataset_data_provider.DatasetDataProvider(dataset, 
                                                                           shuffle=False,
                                                                           common_queue_capacity=8000,
                                                                           common_queue_min=0)
            
            image_raw, label = data_provider.get(['image', 'label'])
                            

            
            # Preprocessing return original image, center_crop and 4 corner crops with adjusted color values
            image, augmented_image1, augmented_image2, augmented_image3, augmented_image4, augmented_image5 = my_resnet_preprocessing.preprocess_for_final_run2(image_raw, 224, 224) 
            
            image,augmented_image1,augmented_image2, augmented_image3, augmented_image4,augmented_image5, labels = tf.train.batch([image, augmented_image1, augmented_image2, augmented_image3, augmented_image4, augmented_image5, label], 
                                            batch_size=1,
                                            num_threads=1,
                                            capacity=2 * 1)
            
            
            
            
            logits1 = resNetClassifier.my_cnn(image, is_training = False, dropout_rate =1)
            logits2 = resNetClassifier.my_cnn(augmented_image1, is_training = False, dropout_rate =1)
            logits3 = resNetClassifier.my_cnn(augmented_image2, is_training = False, dropout_rate =1)
            logits4 = resNetClassifier.my_cnn(augmented_image3, is_training = False, dropout_rate =1)
            logits5 = resNetClassifier.my_cnn(augmented_image4, is_training = False, dropout_rate =1)            
            logits6 = resNetClassifier.my_cnn(augmented_image5, is_training = False, dropout_rate =1)

            total_output = np.empty([number_images * 1, dataset.num_classes])
            total_labels = np.empty([number_images * 1], dtype=np.int32)
            offset = 0
            
            with tf.Session() as sess:
                coord = tf.train.Coordinator()
                saver = tf.train.Saver()
                saver.restore(sess, checkpoint_paths[index])
              
                
                if visualize_kernel:
                    tf.get_variable_scope().reuse_variables()
                    
                    #for v in tf.global_variables():
                    #    print(v.name)
                        
                    weights = tf.get_variable("resnet_v2_50/conv1/weights")
                    print(weights.get_shape()[0].value, weights.get_shape()[1].value, weights.get_shape()[2].value, weights.get_shape()[3].value)
                    
                    weights = tf.slice(weights,[0,0,0,1] , [weights.get_shape()[0].value, weights.get_shape()[1].value, weights.get_shape()[2].value, 2])
                    
                    grid = kernel_visualization.put_kernels_on_grid (weights)
                    
                    
                    sum1 = tf.summary.image('conv1/kernels', grid, max_outputs=1)
                    _, summary1, img = sess.run([merged, sum1, tf.squeeze(grid)])
                    visualize_writer.add_summary(summary1,2)
                    fig = plt.figure()
                    plt.imshow(img)
                    plt.savefig("images/kernelsOne_%s.png" % (index))
                    #plt.show() 
                    plt.close(fig)
                
                
                threads = tf.train.start_queue_runners(sess=sess, coord=coord)
                for i in range(number_images):
                    #print('step: %d/%d' % (i+1, number_images))
                    
                    logit1, logit2, logit3, logit4,logit5, logit6, media_id = sess.run([logits1, logits2, logits3, logits4, logits5, logits6, labels])
                    
                    media_id = media_id[0]

                    # Use Average for voting of logits
                    logits = tuple(i + j for i, j in zip(logit1[0], logit2[0]))
                    logits = tuple(i + j for i, j in zip(logits, logit3[0]))
                    logits = tuple(i + j for i, j in zip(logits, logit4[0]))
                    logits = tuple(i + j for i, j in zip(logits, logit5[0]))
                    logits = tuple(i + j for i, j in zip(logits, logit6[0]))
                    logits = [x / 6 for x in logits] 
                    
                    
                    # Passing logits through softmax function to receive "probabilities"
                    logits = my_functions.numpy_softmax(logits)
                    
                    
                    total_output[offset:offset + 1] = logits
                    total_labels[offset:offset + 1] = media_id
                    offset += 1
                coord.request_stop()
                coord.join(threads)

            output_list.append(total_output)
            labels_list.append(total_labels)
            
        
    prediction_filename = filename
    #os.remove(prediction_filename)
            
    for i in range(number_images):
        image_id = labels_list[0][i]
        
        for p in range(1000):
            p1 = output_list[0][i][p]
            p2 = output_list[1][i][p]
            p3 = output_list[2][i][p]
            
            probability = np.amax([p1, p2, p3])
           
            
            class_id = dataset_utils.read_label_file(label_dir)[p]
            
            with tf.gfile.Open(prediction_filename, 'a') as f:
                f.write('%s;%s;%f\n' % (image_id, class_id, probability)) # <ImageId;ClassId;Probability>

Example #6

0

Show file

def find_most_accurate(
        label_directory="mydata/PlantClefTraining2015",
        dataset_dir="mydata/train/",
        model_directory="mydata/resnet_finetuned_plantclef2015_2/model.ckpt-150000",
        dict_directory="mydata/labels",
        dict_name="most_accurate_images.txt"):
    """
    Finds the images in Filepath, that have the highest output probability, while beeing true.
    
    Args:
        label_directory: Where to find the dictionary mapping from class_id to one-hot-labels
        dataset_dir: where to find the images
        model_directory: where to find your models checkpoints
        dict_directory: where to save the images with the highest activation
        dict_name: name of the txt-file of the dictionary containing images with the highest activation
    Returns:
        best_act: dictionary containing images with the highest activation
    
    """
    best_act = {}
    label_dict = dataset_utils.read_label_file(label_directory)
    label_dict = dict([int(v), k] for k, v in label_dict.items())

    with tf.Graph().as_default():

        X = tf.placeholder(tf.float32)

        image_pre = my_resnet_preprocessing.preprocess_image(
            X, 224, 224, False)
        image_pre = tf.reshape(image_pre, shape=[-1, 224, 224, 3])
        image_pre = tf.to_float(image_pre)

        logits = resNetClassifier.my_cnn(image_pre,
                                         is_training=False,
                                         dropout_rate=1.0,
                                         layer=None)

        with tf.Session() as sess:
            coord = tf.train.Coordinator()
            saver = tf.train.Saver()
            saver.restore(sess, model_directory)

            threads = tf.train.start_queue_runners(sess=sess, coord=coord)

            dataset_dir = "optimal_stimuli"
            for filename in os.listdir(dataset_dir):
                if filename.endswith(".jpg"):
                    filepath = ("%s/%s" % (dataset_dir, filename))
                    im = my_functions.get_img(filepath, ).astype(np.float32)

                    layer_act = sess.run([logits], feed_dict={X: im})
                    layer_act = my_functions.numpy_softmax(layer_act)

                    label, _, _, _, _, _ = plantclef_download_and_convert_data.get_class_name_from_xml(
                        "%s.xml" % (str.split(filepath, ".jpg")[0]))
                    label = label_dict[label]

                    if np.argmax(layer_act) == label:
                        best_act.setdefault(label, [-1, "test.jpg"])

                        if best_act[label][0] < np.amax(layer_act):
                            best_act[label] = [np.amax(layer_act), filename]

    dataset_utils.write_label_file(best_act,
                                   dict_directory,
                                   filename=dict_name)
    return best_act

Example #7

0

Show file

File: plantclef_download_and_convert_data.py Project: ynalcakan/FlowerIdentification

def _plantclef_dicts(dataset_dir):
    """Creates Dictionaries and saves them.
    Args:
        dataset_dir: A directory containing a set of subdirectories representing
                    class names. Each subdirectory should contain PNG or JPG encoded images.
    Returns:
        None
        
    Saves:
        class_id_to_family: Dictionary mapping from all possible class_ids to the corresponding family name
        class_id_to_species: Dictionary mapping from from all possible class_ids to the corresponding speciy name
        class_id_to_genus: Dictionary mapping from from all possible class_ids to the corresponding genus
        
        family_to_hot: Dictionary mapping from all possiible families to the corresponding class_ids
        species_to_hot: Dictionary mapping from all possiible species to the corresponding class_ids
        genus_to_hot: Dictionary mapping from all possiible genii to the corresponding class_ids
        
        family_one_hot: Dictionary mapping from a family name to the index on the one hot encoding
        species_one_hot: Dictionary mapping from speci name to the index on the one hot encoding
        genus_one_hot: Dictionary mapping from genus name to the index on the one hot encoding
    
    """
    flower_root = os.path.join(dataset_dir, 'train')

    class_id_to_family = {}
    class_id_to_species = {}
    class_id_to_genus = {}

    family_to_hot = {}
    species_to_hot = {}
    genus_to_hot = {}

    labels = dataset_utils.read_label_file("mydata/PlantClefTraining2015/")
    reverse_labels = {int(y): x for x, y in labels.items()}

    i = 1
    for filename in os.listdir(flower_root):
        #sys.stdout.write('\r>> Getting image information %d/%d' % (
        #                    i+1, len(os.listdir(flower_root))))
        #sys.stdout.flush()
        path = os.path.join(flower_root, filename)
        i = i + 1

        if path.endswith(".xml"):
            class_id, family, species, genus, _, _ = get_class_name_from_xml(
                path)

            class_id_to_family[class_id] = family
            class_id_to_species[class_id] = species
            class_id_to_genus[class_id] = genus

            class_id_vector = reverse_labels[class_id]

            if family in family_to_hot.keys():
                family_to_hot[family].append(class_id_vector)
            else:
                family_to_hot[family] = []
                family_to_hot[family].append(class_id_vector)

            if genus in genus_to_hot.keys():
                genus_to_hot[genus].append(class_id_vector)
            else:
                genus_to_hot[genus] = []
                genus_to_hot[genus].append(class_id_vector)

            if species in species_to_hot.keys():
                species_to_hot[species].append(class_id_vector)
            else:
                species_to_hot[species] = []
                species_to_hot[species].append(class_id_vector)

    family_one_hot = dict(
        zip(family_to_hot.keys(), range(len(family_to_hot.keys()))))
    species_one_hot = dict(
        zip(species_to_hot, range(len(species_to_hot.keys()))))
    genus_one_hot = dict(zip(genus_to_hot, range(len(genus_to_hot.keys()))))

    dataset_utils.write_label_file(class_id_to_family,
                                   dataset_dir,
                                   filename="labels/class_id_to_family.txt")
    dataset_utils.write_label_file(class_id_to_species,
                                   dataset_dir,
                                   filename="labels/class_id_to_species.txt")
    dataset_utils.write_label_file(class_id_to_genus,
                                   dataset_dir,
                                   filename="labels/class_id_to_genus.txt")
    dataset_utils.write_label_file(family_to_hot,
                                   dataset_dir,
                                   filename="labels/family_to_hot.txt")
    dataset_utils.write_label_file(species_to_hot,
                                   dataset_dir,
                                   filename="labels/species_to_hot.txt")
    dataset_utils.write_label_file(genus_to_hot,
                                   dataset_dir,
                                   filename="labels/genus_to_hot.txt")
    dataset_utils.write_label_file(family_one_hot,
                                   dataset_dir,
                                   filename="labels/family_one_hot.txt")
    dataset_utils.write_label_file(species_one_hot,
                                   dataset_dir,
                                   filename="labels/species_one_hot.txt")
    dataset_utils.write_label_file(genus_one_hot,
                                   dataset_dir,
                                   filename="labels/genus_one_hot.txt")