Beispiel #1
0
def get_random_cached_bottlenecks(image_lists,
                                  how_many,
                                  category,
                                  bottleneck_dir,
                                  image_dir,
                                  bottle_func,
                                  architecture='inception_v3'):
    """Retrieves bottleneck values for cached images.

    If no distortions are being applied, this function can retrieve the cached
    bottleneck values directly from disk for images. It picks a random set of
    images from the specified category.

    """
    class_count = len(image_lists.keys())
    bottlenecks = []
    ground_truths = []
    filenames = []
    if how_many >= 0:
        # Retrieve a random sample of bottlenecks.
        for unused_i in range(how_many):
            label_index = random.randrange(class_count)
            label_name = list(image_lists.keys())[label_index]
            image_index = random.randrange(MAX_NUM_IMAGES_PER_CLASS + 1)
            image_name = get_image_path(image_lists, label_name, image_index,
                                        image_dir, category)
            bottleneck = get_or_create_bottleneck(image_lists, label_name,
                                                  image_index, image_dir,
                                                  category, bottleneck_dir,
                                                  bottle_func, architecture)
            bottlenecks.append(bottleneck)
            # y = np.zeros(class_count)
            # y[label_index] = 1
            ground_truths.append(label_index)
            filenames.append(image_name)
    else:
        # Retrieve all bottlenecks.
        for label_index, label_name in enumerate(image_lists.keys()):
            for image_index, image_name in enumerate(
                    image_lists[label_name][category]):
                image_name = get_image_path(image_lists, label_name,
                                            image_index, image_dir, category)
                bottleneck = get_or_create_bottleneck(image_lists, label_name,
                                                      image_index, image_dir,
                                                      category, bottleneck_dir,
                                                      bottle_func,
                                                      architecture)
                bottlenecks.append(bottleneck)
                # y = np.zeros(class_count)
                # y[label_index] = 1
                ground_truths.append(label_index)
                filenames.append(image_name)
    return np.array(bottlenecks), np.array(ground_truths), np.array(filenames)
Beispiel #2
0
def get_or_create_bottleneck(image_lists,
                             label_name,
                             image_index,
                             image_dir,
                             category,
                             bottleneck_dir,
                             bottle_func,
                             distorted=False,
                             architecture='inception_v3'):
    # label_lists = image_lists[label_name]
    # sub_dir = label_lists['dir']
    # sub_dir_path = os.path.join(bottleneck_dir, sub_dir)
    # ensure_dir_exists(sub_dir_path)
    target_size = (IM_WIDTH, IM_HEIGHT)
    image_file = get_image_path(image_lists, label_name, image_index,
                                image_dir, category)
    bottle_file = get_image_path(
        image_lists, label_name, image_index, bottleneck_dir, category) +\
        '_' + architecture + '.npy'
    try:
        bottleneck_values = np.load(bottle_file)
        return bottleneck_values
    except Exception as e:
        print('Bottleneck not found, creating bottleneck...\n{}'.format(e))
        if not distorted:
            img = image.load_img(image_file, target_size=target_size)
            np.save(bottle_file, bottle_func(img))
        else:
            img = image.load_img(re.sub('_\d+.jpg', '', image_file),
                                 target_size=target_size)
            distorted_image = distort_image(img, ROTATION_RANGE,
                                            WIDTH_SHIFT_RANGE,
                                            BRIGHTNESS_RANGE, SHEAR_RANGE,
                                            CHANNEL_SHIFT_RANGE,
                                            HORIZONTAL_FLIP)
            np.save(bottle_file, bottle_func(distorted_image))
        bottleneck_values = np.load(bottle_file)
        return bottleneck_values
def feed_data(image_lists,
              category,
              image_dir,
              generator=False,
              how_many=None):
    class_count = len(image_lists.keys())
    inputs, truths = [], []
    target_size = (IM_WIDTH, IM_HEIGHT)
    if generator:
        # Retrieve a random sample of bottlenecks.
        class TrainBatchGen:
            def __init__(self, image_lists, category, image_dir, how_many):
                self.image_lists = image_lists
                self.category = category
                self.image_dir = image_dir
                self.how_many = how_many

            def __iter__(self):
                return self

            def __next__(self):
                for unused_i in range(how_many):
                    label_index = random.randrange(class_count)
                    label_name = list(image_lists.keys())[label_index]
                    image_index = random.randrange(MAX_NUM_IMAGES_PER_CLASS +
                                                   1)
                    file = get_image_path(image_lists, label_name, image_index,
                                          image_dir, category)
                    img = image.load_img(file, target_size=target_size)
                    inp = preprocess_img(img, expand_dim=False)
                    inputs.append(inp)
                    y = np.zeros(class_count)
                    y[label_index] = 1
                    truths.append(y)
                return (np.array(inputs), np.array(truths))

        return TrainBatchGen(image_lists, category, image_dir, how_many)
    else:
        for label_index, label_name in enumerate(image_lists.keys()):
            for image_index, image_name in enumerate(
                    image_lists[label_name][category]):
                file = get_image_path(image_lists, label_name, image_index,
                                      image_dir, category)
                img = image.load_img(file, target_size=target_size)
                inp = preprocess_img(img, expand_dim=False)
                inputs.append(inp)
                y = np.zeros(class_count)
                y[label_index] = 1
                truths.append(y)
        return (np.array(inputs), np.array(truths))
 def __next__(self):
     for unused_i in range(how_many):
         label_index = random.randrange(class_count)
         label_name = list(image_lists.keys())[label_index]
         image_index = random.randrange(MAX_NUM_IMAGES_PER_CLASS +
                                        1)
         file = get_image_path(image_lists, label_name, image_index,
                               image_dir, category)
         img = image.load_img(file, target_size=target_size)
         inp = preprocess_img(img, expand_dim=False)
         inputs.append(inp)
         y = np.zeros(class_count)
         y[label_index] = 1
         truths.append(y)
     return (np.array(inputs), np.array(truths))
Beispiel #5
0
def cache_distort_bottlenecks(image_lists,
                              bottle_func,
                              architecture='inception_v3'):
    '''generat distorted bottlenecks from image in image_lists'''
    distorted_image_lists = {}
    target_size = (IM_WIDTH, IM_HEIGHT)

    class Count():
        def __init__(self):
            self.n_skipped = 0
            self.n_created = 0

        def __call__(self, count_type='create'):
            if count_type == 'skipped':
                self.n_skipped += 1
                if self.n_skipped % 100 == 0:
                    print('{} existing bottlenecks skipped.'.format(
                        self.n_skipped))
            else:
                self.n_created += 1
                if self.n_created % 100 == 0:
                    print('{} bottlenecks created.'.format(self.n_created))

        def print_total(self):
            print('{} bottlenecks created in total.'.format(self.n_created))
            print('{} existing bottlenecks skipped in total.'.format(
                self.n_skipped))

    count = Count()
    # creat bottlenecks for every distorted imgs
    for label_index, label_name in enumerate(image_lists.keys()):
        print('Current label: {}'.format(label_name))
        label_lists = image_lists[label_name]  # image_lists['label 0']
        for category in ['training', 'testing', 'validation']:
            category_list = label_lists[category]  # label_lists['training']
            print('Current category: {}'.format(category))
            for image_index, image_name in enumerate(category_list):
                # 0, 'foo.jpg'
                # if label_index > 0 or image_index > 10:
                if random.randrange(1000) < 998:  # ### DEBUG ONLY ###
                    continue
                image_file = get_image_path(image_lists, label_name,
                                            image_index, ARGS.image_dir,
                                            category)
                img = image.load_img(image_file, target_size=target_size)
                # save original bottlenecks
                count()
                # save distorted image to path randomly
                if category in ['testing', 'validation']:
                    continue
                # distort training image for {times_per_image} times
                for i in range(ARGS.times_per_image):
                    distorted_image_name = image_name + \
                        '_{}'.format(i) + '.jpg'
                    distorted_image = distort_image(img, ROTATION_RANGE,
                                                    WIDTH_SHIFT_RANGE,
                                                    BRIGHTNESS_RANGE,
                                                    SHEAR_RANGE,
                                                    CHANNEL_SHIFT_RANGE,
                                                    HORIZONTAL_FLIP)
                    # randomly save distorted images for check
                    if i == 0:
                        label_image_dir = ARGS.image_dir + '_distorted'
                        ensure_dir_exists(label_image_dir)
                        img.save(os.path.join(label_image_dir, image_name))
                    img_save_path = os.path.join(label_image_dir,
                                                 distorted_image_name)
                    distorted_image.save(img_save_path)
                    count()
    count.print_total()
    return distorted_image_lists