コード例 #1
0
def random_augment(img, mask, aug_dict):
    image_datagen = ImageDataGenerator(**aug_dict)
    seed = np.random.randint(1e9)

    out_img = image_datagen.random_transform(img,seed=seed)
    out_mask = image_datagen.random_transform(mask,seed=seed)

    return out_img, out_mask
コード例 #2
0
ファイル: train.py プロジェクト: burokoron/StaDeep
class TrainSequence(Sequence):
    directory: str  # 画像が保存されているフォルダ
    df: pd.DataFrame  # データの情報がかかれたDataFrame
    image_size: tuple  # 入力画像サイズ
    classes: int  # 分類クラス数
    batch_size: int  # バッチサイズ
    aug_params: dict  # ImageDataGenerator画像増幅のパラメータ

    def __post_init__(self):
        self.df_index = list(self.df.index)
        self.train_datagen = ImageDataGenerator(**self.aug_params)

    def __len__(self):
        return math.ceil(len(self.df_index) / self.batch_size)

    def __getitem__(self, idx):
        batch_x = self.df_index[idx * self.batch_size:(idx + 1) *
                                self.batch_size]

        x = []
        y = []
        for i in batch_x:
            rand = np.random.randint(0, int(1e9))
            # 入力画像
            img = cv2.imread(f'{self.directory}/{self.df.at[i, "filename"]}')
            img = cv2.resize(img,
                             self.image_size,
                             interpolation=cv2.INTER_LANCZOS4)
            img = np.array(img, dtype=np.float32)
            img = self.train_datagen.random_transform(img, seed=rand)
            img *= 1. / 255
            x.append(img)

            # セグメンテーション画像
            img = cv2.imread(f'{self.directory}/{self.df.at[i, "label"]}',
                             cv2.IMREAD_GRAYSCALE)
            img = cv2.resize(img,
                             self.image_size,
                             interpolation=cv2.INTER_LANCZOS4)
            img = np.array(img, dtype=np.float32)
            img = np.reshape(img, (self.image_size[0], self.image_size[1], 1))
            img = self.train_datagen.random_transform(img, seed=rand)
            img = np.reshape(img, (self.image_size[0], self.image_size[1]))
            seg = []
            for label in range(self.classes):
                seg.append(img == label)
            seg = np.array(seg, np.float32)
            seg = seg.transpose(1, 2, 0)
            y.append(seg)

        x = np.array(x)
        y = np.array(y)

        return x, y
コード例 #3
0
    def preprocess_img(self, img_path):
        """处理每张图片,大小, 数据增强
        :param img_path:
        :return:
        """
        # 1、读取图片对应内容,做形状,内容处理, (h, w)
        img = Image.open(img_path)
        # [180, 200, 3]
        scale = self.img_size[0] / max(img.size[:2])
        img = img.resize((int(img.size[0] * scale), int(img.size[1] * scale)))
        img = img.convert('RGB')
        img = np.array(img)

        # 2、数据增强:如果是训练集进行数据增强操作
        if self.use_aug:

            # 1、随机擦处
            img = self.eraser(img)

            # 2、翻转
            datagen = ImageDataGenerator(
                width_shift_range=0.05,
                height_shift_range=0.05,
                horizontal_flip=True,
                vertical_flip=True,
            )
            img = datagen.random_transform(img)

        # 4、处理一下形状 【300, 300, 3】
        # 改变到[300, 300] 建议不要进行裁剪操作,变形操作,保留数据增强之后的效果,填充到300x300
        img = self.center_img(img, self.img_size[0])
        return img
コード例 #4
0
def augment_input(img,mask,aug_dict,batch_size=32,random_crop_size=(256,256),only_crop=False):
    LX,LY,LZ = img.shape
    image_datagen = ImageDataGenerator(**aug_dict)

    out_imgs = np.zeros((batch_size,random_crop_size[0],random_crop_size[1],1))
    out_masks = np.zeros((batch_size,random_crop_size[0],random_crop_size[1],1))
    for b in range(batch_size):
        seed = np.random.randint(1e9)
        crop_img, crop_mask = random_crop(img,mask,random_crop_size)
        if not only_crop:
            out_imgs[b,...] = image_datagen.random_transform(crop_img,seed=seed)
            out_masks[b,...] = image_datagen.random_transform(crop_mask,seed=seed)
        else:
            out_imgs[b,...] = crop_img
            out_masks[b,...] = crop_mask

    return out_imgs, out_masks
コード例 #5
0
class SegmentationSequence(Sequence):
    def __init__(self, images, masks, batch_size, jitter=False):
        self.masks = masks
        self.images = images
        self.batch_size = batch_size
        self.shuffled_indices = np.random.permutation(self.images.shape[0])
        self.jitter = jitter
        if self.jitter:
            self.jitter_datagen = ImageDataGenerator(rotation_range=5,
                                                     width_shift_range=0.05,
                                                     height_shift_range=0.05,
                                                     fill_mode="nearest")

    def __len__(self):
        return self.images.shape[0] // self.batch_size

    def __getitem__(self, idx):

        # The shuffled indices in this batch
        batch_inds = self.shuffled_indices[idx * self.batch_size:(idx + 1) *
                                           self.batch_size]

        if self.jitter:

            batch_images_list = []
            batch_masks_list = []

            for i in batch_inds:
                # Stack mask and image together to ensure that they are transformed
                # in exactly the same way
                stacked = np.dstack([
                    self.images[i, :, :, :].astype(np.uint8),
                    self.masks[i, :, :, :]
                ])
                transformed = self.jitter_datagen.random_transform(stacked)

                batch_images_list.append(transformed[:, :, 0].astype(float))
                batch_masks_list.append(transformed[:, :, 1])

            batch_images = np.dstack(batch_images_list)
            batch_images = np.transpose(batch_images[:, :, :, np.newaxis],
                                        [2, 0, 1, 3])
            batch_masks = np.dstack(batch_masks_list)
            batch_masks = np.transpose(batch_masks[:, :, :, np.newaxis],
                                       [2, 0, 1, 3])

        else:

            # Slice images and labels for this batch
            batch_images = self.images[batch_inds, :, :, :]
            batch_masks = self.masks[batch_inds, :, :, :]

        return (batch_images, batch_masks)

    def on_epoch_end(self):
        # Shuffle the dataset indices again
        self.shuffled_indices = np.random.permutation(self.images.shape[0])
コード例 #6
0
 def generator():
     idg = ImageDataGenerator(horizontal_flip=True,
                              rotation_range=20,
                              zoom_range=0.2)
     while True:
         for i in range(0, len(X), batch_size):
             X_batch = X[i:i + batch_size].copy()
             y_batch = [x[i:i + batch_size] for x in y]
             if aug:
                 for j in range(len(X_batch)):
                     X_batch[j] = idg.random_transform(X_batch[j])
             yield X_batch, y_batch
コード例 #7
0
def data_generator(X: Union[np.ndarray, list],
                   Y: Union[np.ndarray, list],
                   batch_size: int,
                   target_shape: tuple = (224, 224, 3),
                   if_shuffle: bool = False,
                   augment: bool = False,
                   rotation_range: int = 30,
                   horizontal_flip: bool = True):
    """
    Function to generate mini batches of data with possibility of performing data augmentation
    Args:
        X : Images to take the batches from for training data
        Y: Images to take the batches from for test data
        batch_size: Size of the batch used
        target_shape (tuple): Shape of an image to be transformed to
        if_shuffle (bool): Whether to randomly shuffle dataset
        augment (bool): Whether to augment the dataset
        rotation_range (int): Rotation range for an image if it is augmented
        horizontal_flip (bool): Whether to flip image horizontally

    Yields:
        Mini-batch of data
    """
    start = 0
    end = start + batch_size
    num_of_batches = X.shape[0]
    if if_shuffle:
        X, Y = shuffle(X, Y)
    if augment:
        data_augmenter = ImageDataGenerator(rotation_range=rotation_range,
                                            horizontal_flip=horizontal_flip)
    while True:
        X_batch = X[start:end]
        Y_batch = Y[start:end]
        X_batch_resized = resize_images(X_batch, target_shape)
        if augment:
            X_batch_resized = np.array([
                data_augmenter.random_transform(image, seed=42)
                for image in X_batch_resized
            ])
        X_preprocessed = preprocess_input(X_batch_resized)
        start += batch_size
        end += batch_size
        if start >= num_of_batches:
            start = 0
            end = batch_size
            if if_shuffle:
                X, Y = shuffle(X, Y)
        yield (X_preprocessed, Y_batch)
コード例 #8
0
    def add_augmented_images(*ids):
        from tensorflow.keras.preprocessing.image import ImageDataGenerator
        # Takes a list of ids containing the old id and the new id
        # Applies augmentation to the old image and saves it as a new one
        image_paths = [
            str(Path(Config.DATA_DIR, 'images', f'{id}.jpg')) for id in ids
        ]
        image = tf.image.decode_jpeg(tf.io.read_file(image_paths[0]))
        image = tf.image.resize(image, Config.IMG_SHAPE)
        dg = ImageDataGenerator(
            rotation_range=30,
            zoom_range=0.5,
            shear_range=0.3,
            horizontal_flip=True,
            width_shift_range=0.3,
            height_shift_range=0.3,
        )

        tf.keras.preprocessing.image.save_img(
            image_paths[1], dg.random_transform(image.numpy()))
コード例 #9
0
class SliceSelectionSequence(Sequence):
    def __init__(self,
                 labels,
                 image_dir,
                 batch_size,
                 batches_per_epoch,
                 jitter=False,
                 sigmoid_scale=None):
        self.labels = labels
        self.image_dir = image_dir
        self.batch_size = batch_size
        self.batches_per_epoch = batches_per_epoch
        self.jitter = jitter
        self.sigmoid_scale = sigmoid_scale
        self.shuffled_indices = np.random.permutation(len(labels))
        if self.jitter:
            self.jitter_datagen = ImageDataGenerator(rotation_range=5,
                                                     width_shift_range=0.05,
                                                     height_shift_range=0.05,
                                                     fill_mode="constant",
                                                     cval=0)

    def __len__(self):
        return self.batches_per_epoch

    def __getitem__(self, idx):

        # The shuffled indices in this batch
        batch_inds = self.shuffled_indices[idx * self.batch_size:(idx + 1) *
                                           self.batch_size]

        # Labels for this batch
        batch_labels = self.labels[batch_inds]

        # Soft-threshold the distances using a sigmoid
        if self.sigmoid_scale is not None:
            batch_labels = expit(batch_labels / self.sigmoid_scale)

        # The images for this batch
        images_list = []
        for i in batch_inds:

            # Load in image
            filename = os.path.join(self.image_dir, str(i).zfill(6) + '.png')
            im = resize(imread(filename), (256, 256),
                        mode='constant',
                        preserve_range=True,
                        anti_aliasing=True)[:, :, np.newaxis]

            # Apply random jitter (rotation, shift, zoom)
            if self.jitter:
                im = self.jitter_datagen.random_transform(im)

            images_list.append(im)

        batch_images = np.dstack(images_list).astype(float)
        batch_images = np.transpose(batch_images[:, :, :, np.newaxis],
                                    [2, 0, 1, 3])

        return (batch_images, batch_labels)

    def on_epoch_end(self):
        # Shuffle the dataset indices again
        required = self.batches_per_epoch * self.batch_size
        use_replacement = required > len(self.labels)
        self.shuffled_indices = np.random.choice(len(self.labels),
                                                 required,
                                                 replace=use_replacement)
])

model2.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

model2.fit(X, y, epochs=6)

"""## Validation et prédictions"""

test_loss, test_accuracy = model1.evaluate(X_test, y_test, steps=math.ceil(len(X_test)/50))
print('Accuracy on test dataset:', test_accuracy)

test_loss, test_accuracy = model2.evaluate(X_test, y_test, steps=math.ceil(len(X_test)/50))
print('Accuracy on test dataset:', test_accuracy)

#Changer l'indice pour predire dans les arrays X_test et y_test

a = random.randint(0,1500)


img = X_test[a]
plt.imshow(img)
img = np.array([img],dtype="float16")
print(CATEGORIES[y_test[a][0]])

predictions = model1.predict(img)
print(CATEGORIES[np.argmax(predictions[0])])

img_trans = datagen.random_transform(X[0])
plt.imshow(img_trans)
コード例 #11
0
class BalanceCovidDataset(keras.utils.Sequence):
    'Generates data for Keras'

    def __init__(self,
                 data_dir,
                 csv_file,
                 is_training=True,
                 batch_size=8,
                 input_shape=(224, 224),
                 n_classes=3,
                 num_channels=3,
                 mapping={
                     'normal': 0,
                     'pneumonia': 1,
                     'COVID-19': 2
                 },
                 shuffle=True,
                 augmentation=True,
                 covid_percent=0.3,
                 class_weights=[1., 1., 6.]):
        'Initialization'
        self.datadir = data_dir
        self.dataset = _process_csv_file(csv_file)
        self.is_training = is_training
        self.batch_size = batch_size
        self.N = len(self.dataset)
        self.input_shape = input_shape
        self.n_classes = n_classes
        self.num_channels = num_channels
        self.mapping = mapping
        self.shuffle = True
        self.covid_percent = covid_percent
        self.class_weights = class_weights
        self.n = 0

        if augmentation:
            self.augmentation = ImageDataGenerator(
                featurewise_center=False,
                featurewise_std_normalization=False,
                rotation_range=10,
                width_shift_range=0.1,
                height_shift_range=0.1,
                horizontal_flip=True,
                brightness_range=(0.9, 1.1),
                zoom_range=(0.85, 1.15),
                fill_mode='constant',
                cval=0.,
            )

        datasets = {'normal': [], 'pneumonia': [], 'COVID-19': []}
        for l in self.dataset:
            datasets[l.split()[2]].append(l)
        self.datasets = [
            datasets['normal'] + datasets['pneumonia'],
            datasets['COVID-19'],
        ]

        self.on_epoch_end()

    def __next__(self):
        # Get one batch of data
        batch_x, batch_y = self.__getitem__(self.n)
        # Batch index
        self.n += 1

        # If we have processed the entire dataset then
        if self.n >= self.__len__():
            self.on_epoch_end
            self.n = 0

        return batch_x, batch_y

    def __len__(self):
        return int(np.ceil(len(self.datasets[0]) / float(self.batch_size)))

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        if self.shuffle == True:
            for v in self.datasets:
                np.random.shuffle(v)

    def __getitem__(self, idx):
        batch_x, batch_y = np.zeros(
            (self.batch_size, *self.input_shape,
             self.num_channels)), np.zeros(self.batch_size)
        if (idx % 200 == 0 and idx != 0):
            batch_files = np.random.choice(self.datasets[1],
                                           size=self.batch_size,
                                           replace=False)
        else:
            batch_files = self.datasets[0][idx * self.batch_size:(idx + 1) *
                                           self.batch_size]

            # upsample covid cases
            covid_size = max(int(len(batch_files) * self.covid_percent), 1)
            covid_inds = np.random.choice(np.arange(len(batch_files)),
                                          size=covid_size,
                                          replace=False)
            covid_files = np.random.choice(self.datasets[1],
                                           size=covid_size,
                                           replace=False)
            for i in range(covid_size):
                batch_files[covid_inds[i]] = covid_files[i]

        for i in range(len(batch_files)):
            sample = batch_files[i].split()

            if self.is_training:
                folder = 'train'
            else:
                folder = 'test'

            x = cv2.imread(os.path.join(self.datadir, folder, sample[1]))
            h, w, c = x.shape
            x = x[int(h / 6):, :]
            x = cv2.resize(x, self.input_shape)

            if self.is_training and hasattr(self, 'augmentation'):
                x = self.augmentation.random_transform(x)

                x = x.astype('float32') / 255.0
                y = self.mapping[sample[2]]

                batch_x[i] = x
                batch_y[i] = y

        class_weights = self.class_weights
        weights = np.take(class_weights, batch_y.astype('int64'))

        return batch_x, keras.utils.to_categorical(batch_y,
                                                   num_classes=self.n_classes)
コード例 #12
0
from tensorflow.keras.preprocessing.image import ImageDataGenerator
img_gen = ImageDataGenerator(rotation_range=20,
                                           width_shift_range = 0.1,
                                           height_shift_range=0.1,
                                           shear_range=0.1,
                                           zoom_range=0.1,
                                           horizontal_flip = True,
                                           fill_mode = 'nearest')


#showing the sample working of image data generator
sample_im.shape = imread(sample_image)
plt.imshow(sample_im)

# show a sample image of a randomly augumented image 
plt.imshow(img_gen.random_transform(sample_im))

# the below tow code lines displays the labeling of the train and test
img_gen.flow_from_directory(train_set)
img_gen.flow_from_directory(test_set)


# =============================================================================
# Creating the data augumentoin instance for the train and test set
# =============================================================================
batch_size = 16
train_image_gen  = img_gen.flow_from_directory(train_set,
                                               target_size = image_shape[:2],
                                               color_mode = 'rgb',
                                               batch_size = batch_size,
                                               class_mode = 'binary')
コード例 #13
0
# Print a random paintings and it's random augmented version
fig, axes = plt.subplots(1, 2, figsize=(20, 10))

random_artist = random.choice(artists_top_name)
random_image = random.choice(
    os.listdir(os.path.join(images_dir, random_artist)))
random_image_file = os.path.join(images_dir, random_artist, random_image)

# Original image
image = plt.imread(random_image_file)
axes[0].imshow(image)
axes[0].set_title("An original Image of " + random_artist.replace('_', ' '))
axes[0].axis('off')

# Transformed image
aug_image = train_datagen.random_transform(image)
axes[1].imshow(aug_image)
axes[1].set_title("A transformed Image of " + random_artist.replace('_', ' '))
axes[1].axis('off')

plt.show()

# %% [markdown]
# ## Build Model
# CNN model
# Build model
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(32, (3, 3),
                           input_shape=train_input_shape,
                           activation=tf.keras.activations.relu,
                           padding='same'),
コード例 #14
0
ファイル: data_generator.py プロジェクト: ayocucu/ayocucu
class DataGenerator(keras.utils.Sequence):
    """
    Generates frame batches for CNN
    """
    def __init__(self,
                 list_IDs,
                 targets,
                 train_test,
                 batch_size=128,
                 dim=(96, 96),
                 n_channels=1,
                 shuffle=True):
        'Initialization'
        self.dim = dim
        self.batch_size = batch_size
        self.list_IDs = list_IDs
        self.targets = targets
        self.n_channels = n_channels
        self.shuffle = shuffle
        self.train_test = train_test
        self.on_epoch_end()
        if train_test == 'train':
            self.image_gen = ImageDataGenerator(rotation_range=15,
                                                width_shift_range=0.1,
                                                height_shift_range=0.1,
                                                shear_range=0.01,
                                                zoom_range=[0.9, 1.25],
                                                horizontal_flip=True,
                                                vertical_flip=False,
                                                fill_mode='reflect',
                                                data_format='channels_last',
                                                brightness_range=[0.5, 1.5])

    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.list_IDs) / self.batch_size))

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        indexes = self.indexes[index * self.batch_size:(index + 1) *
                               self.batch_size]

        # Find list of IDs
        list_IDs_temp = [self.list_IDs[k] for k in indexes]

        # Generate data
        X, y = self.__data_generation(list_IDs_temp)

        return X, y

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __data_generation(self, list_IDs_temp):
        'Generates data containing batch_size samples'  # X : (n_samples, *dim, n_channels)
        # Initialization
        X = np.empty((self.batch_size, *self.dim, self.n_channels))
        y = np.empty((self.batch_size, 2), dtype=np.float32)

        # Generate data
        for i, ID in enumerate(list_IDs_temp):
            # Store sample
            preprocessed_image = process_image(ID, self.dim)
            augmented_image = preprocessed_image
            if self.train_test == 'train':
                augmented_image = self.image_gen.random_transform(
                    preprocessed_image, seed=RANDOM_STATE)
            augmented_image = (augmented_image / 255.).astype(np.float32)
            X[i, ] = augmented_image

            # Store target
            y[i] = self.targets[ID]

        # X = utils.preprocess_input(X, version=1)

        return X, y
コード例 #15
0
def loadData():
    datagen = ImageDataGenerator(rotation_range=80,
                                 width_shift_range=0.3,
                                 height_shift_range=0.3,
                                 shear_range=0.3,
                                 zoom_range=0.3,
                                 horizontal_flip=True,
                                 vertical_flip=True,
                                 fill_mode='nearest')
    data_img = []
    data_label = []
    for file in os.listdir(fileRoot + path_healthy):
        img = cv2.imread(fileRoot + path_healthy + file)
        res = preprocess(img)
        data_img.append(res)
        data_label.append(2)
        data_img.append(cv2.flip(res, 1))
        data_label.append(2)
        data_img.append(cv2.flip(res, 0))
        data_label.append(2)
        data_img.append(cv2.flip(res, -1))
        data_label.append(2)
        data_img.append(datagen.random_transform(res))
        data_label.append(2)
        data_img.append(datagen.random_transform(res))
        data_label.append(2)
        data_img.append(datagen.random_transform(res))
        data_label.append(2)

    for file in os.listdir(fileRoot + path_leaf):
        img = cv2.imread(fileRoot + path_leaf + file)
        if img is None:
            print(file)
            continue
        res = preprocess(img)
        data_label.append(0)
        data_img.append(res)
        data_img.append(cv2.flip(res, 1))
        data_label.append(0)
        data_img.append(cv2.flip(res, 0))
        data_label.append(0)
        data_img.append(cv2.flip(res, -1))
        data_label.append(0)
        data_img.append(datagen.random_transform(res))
        data_label.append(0)
        data_img.append(datagen.random_transform(res))
        data_label.append(0)
        data_img.append(datagen.random_transform(res))
        data_label.append(0)

    for file in os.listdir(fileRoot + path_rust):
        img = cv2.imread(fileRoot + path_rust + file)
        res = preprocess(img)
        data_img.append(res)
        data_label.append(1)
        data_img.append(cv2.flip(res, 1))
        data_label.append(1)
        data_img.append(cv2.flip(res, 0))
        data_label.append(1)
        data_img.append(cv2.flip(res, -1))
        data_label.append(1)
        data_img.append(datagen.random_transform(res))
        data_label.append(1)
        data_img.append(datagen.random_transform(res))
        data_label.append(1)
        data_img.append(datagen.random_transform(res))
        data_label.append(1)

    for i in range(len(data_img)):
        data_img[i] = data_img[i] / 255
    data_img = np.array(data_img)
    return data_img, data_label
コード例 #16
0
class ImageGeneratorParallel(keras.utils.Sequence):
    """Generates data for Keras"""
    def __init__(self,
                 features,
                 targets,
                 n_classes=2,
                 batch_size=32,
                 shuffle=True,
                 repeats=1,
                 parallel=True):
        self.n_classes = n_classes
        self.n_vals = len(targets)
        # since we are using data agumentation we repeat the number of times we show each image.
        # we show the same original image but it can be rotated or flipper each time, so it is not the "same" image
        self.list_IDs = np.repeat(
            np.arange(self.n_vals), repeats
        )  # OJO con esto, deberian ser las imagenes validas si queremos hacer bien las cosas
        self.batch_size = batch_size
        self.features = features
        self.shuffle = shuffle
        self.targets = targets
        self.targets_mc = keras.utils.to_categorical(
            targets, num_classes=self.n_classes)
        self.indexes = np.arange(len(self.list_IDs))
        self.pool = None
        self.parallel = parallel

        self.agumentator = ImageDataGenerator(
            # featurewise_center=True,
            # featurewise_std_normalization=True,
            rescale=1 / 255,
            rotation_range=40,
            zoom_range=0.2,
            width_shift_range=0.2,
            height_shift_range=0.2,
            horizontal_flip=True,
            fill_mode='nearest')

    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.list_IDs) / self.batch_size))

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        if self.parallel:
            if self.pool is None:
                self.pool = ThreadPool(4)
        indexes = self.indexes[index * self.batch_size:(index + 1) *
                               self.batch_size]

        # Find list of IDs
        list_IDs_temp = [self.list_IDs[k] for k in indexes]

        # Generate data
        if self.parallel:
            X, y = self.__data_generation_threads(list_IDs_temp)
        else:
            X, y = self.__data_generation(list_IDs_temp)
        return X, y

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle:
            np.random.shuffle(self.indexes)

    def __data_generation(self, list_IDs_temp):
        'Generates data containing batch_size samples'  # X : (n_samples, *dim, n_channels)
        # Initialization
        X = self.features[list_IDs_temp]
        y = self.targets_mc[list_IDs_temp]
        X = np.array(
            self.agumentator.flow(X,
                                  batch_size=self.batch_size,
                                  shuffle=self.shuffle).next())

        return X, y

    def __data_generation_threads(self, list_IDs_temp):
        'Generates data containing batch_size samples'  # X : (n_samples, *dim, n_channels)
        # Initialization
        X = self.features[list_IDs_temp]
        y = self.targets_mc[list_IDs_temp]
        X = np.array(
            self.pool.map(
                lambda xi: self.agumentator.random_transform(
                    self.agumentator.standardize(xi)), X))

        return X, y
コード例 #17
0
from tensorflow.keras.preprocessing.image import ImageDataGenerator

image_gen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.1,
    height_shift_range=0.1,
    #rescale = 1 / 255
    shear_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True,
    fill_mode='nearest')

plt.imshow(un)
plt.show()

plt.imshow(image_gen.random_transform(un))
plt.show()

print(image_gen.flow_from_directory(train_path))
print(image_gen.flow_from_directory(test_path))

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, MaxPool2D, Dropout, Flatten

model = Sequential()

model.add(
    Conv2D(filters=32,
           kernel_size=(3, 3),
           input_shape=image_shape,
           activation='relu'))
コード例 #18
0
plt.imshow(img)
img = np.array([img],dtype="float16")
print(CATEGORIES[y_test[a][0]])

predictions = model1.predict(img)
print(CATEGORIES[np.argmax(predictions[0])])

photo = Image.open("drive/My Drive/photo/14.jpg")
photo = photo.resize((50, 50))

photo = np.array(photo)
photo = np.concatenate(([photo, 255 * np.ones((50, 50, 1), dtype=np.uint8)]), axis=-1)

print(photo.shape)
plt.imshow(photo)

img = np.array([photo],dtype="float16")


result = []

predictions = model1.predict(img)
print(predictions[0])
result.append(np.argmax(predictions[0]))

DIVISIONS = ["bend", "bend sinister", "chevron", "fess", "gyronny", "pale", "quarter", "saltire", "aucun"]

print(result[0])

img_trans = datagen.random_transform(X_test[0])
plt.imshow(img_trans)
コード例 #19
0

help(ImageDataGenerator)

image_gen = ImageDataGenerator(rotation_range=20, # rotate the image 20 degrees
                               width_shift_range=0.10, # Shift the pic width by a max of 5%
                               height_shift_range=0.10, # Shift the pic height by a max of 5%
                               rescale=1/255, # Rescale the image by normalzing it.
                               shear_range=0.1, # Shear means cutting away part of the image (max 10%)
                               zoom_range=0.1, # Zoom in by 10% max
                               horizontal_flip=True, # Allo horizontal flipping
                               fill_mode='nearest' # Fill in missing pixels with the nearest filled value
                              )

plt.imshow(para_img)
plt.imshow(image_gen.random_transform(para_img))

#Generating many manipulated images from a directory
image_gen.flow_from_directory(train_path)
image_gen.flow_from_dirctory(test_path)


#https://stats.stackexchange.com/questions/148139/rules-for-selecting-convolutional-neural-network-hyperparameters
model = Sequential()

model.add(Conv2D(filters=32, kernel_size=(3,3),input_shape=image_shape, activation='relu',))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(filters=64, kernel_size=(3,3),input_shape=image_shape, activation='relu',))
model.add(MaxPooling2D(pool_size=(2, 2)))
コード例 #20
0
ファイル: datasets.py プロジェクト: henrygouk/mars-finetuning
class ImageDataSequence(Sequence):
    def __init__(self,
                 directories,
                 batch_size,
                 target_size,
                 frac=1.0,
                 **kwargs):
        if not isinstance(directories, list):
            directories = [directories]

        self.batch_size = batch_size
        self.target_size = target_size
        self.classes = sorted(os.listdir(directories[0]))
        self.num_classes = len(self.classes)
        self.filenames = []
        self.labels = []
        self.transformer = ImageDataGenerator(**kwargs)

        for d in range(0, len(directories)):
            for idx, cls in zip(range(len(self.classes)), self.classes):
                filenames = os.listdir(os.path.join(directories[d], cls))

                for f in filenames:
                    self.filenames.append(os.path.join(directories[d], cls, f))
                    self.labels.append(idx)

        self.on_epoch_end()

        num_instances = int(frac * len(self.filenames))
        self.filenames = self.filenames[0:num_instances]
        self.labels = self.labels[0:num_instances]

    def __getitem__(self, batch_idx):
        X_batch = np.zeros((self.batch_size, ) + self.target_size)
        y_batch = np.zeros((self.batch_size, len(self.classes)))
        filenames = []
        labels = []
        aux = []
        current_index = batch_idx * self.batch_size

        for i in range(self.batch_size):
            filenames.append(self.filenames[current_index])
            labels.append(self.labels[current_index])

            if hasattr(self, "auxiliary"):
                aux.append(self.auxiliary[current_index])

            current_index += 1

        color_mode = "rgb" if self.target_size[2] == 3 else "grayscale"
        target_size = (self.target_size[0], self.target_size[1])
        X_batch = np.array([
            self.transformer.random_transform(
                img_to_array(
                    load_img(fn,
                             color_mode=color_mode,
                             target_size=target_size))) for fn in filenames
        ])
        y_batch = to_categorical(labels, num_classes=len(self.classes))

        if hasattr(self, "auxiliary"):
            return X_batch, [y_batch, aux]
        else:
            return X_batch, y_batch

    def __len__(self):
        return int(len(self.filenames) / self.batch_size)

    def on_epoch_end(self):
        X = self.filenames
        y = self.labels

        if hasattr(self, "auxiliary"):
            aux = self.auxiliary
            insts = list(zip(X, y, aux))
            random.shuffle(insts)
            X, y, aux = zip(*insts)
            self.auxiliary = aux
        else:
            insts = list(zip(X, y))
            random.shuffle(insts)
            X, y = zip(*insts)

        self.filenames = X
        self.labels = y
コード例 #21
0
class OntheflyAugmentedImages(BaseDataset):
    """Use a tensorflow.keras ImageDataGenerator to augment images on the fly in a
    determenistic way."""
    def __init__(self,
                 dataset,
                 augmentation_params,
                 N=None,
                 random_state=0,
                 cache_size=None):
        # Initialize some member variables
        self.dataset = dataset
        self.generator = ImageDataGenerator(**augmentation_params)
        self.N = N or (len(self.dataset.train_data) * 10)
        self.random_state = random_state
        assert len(self.dataset.shape) == 3

        # Figure out the base images for each of the augmented ones
        self.idxs = np.random.choice(len(self.dataset.train_data), self.N)

        # Fit the generator
        self.generator.fit(self.dataset.train_data[:][0])

        # Standardize the test data
        self._x_test = np.copy(self.dataset.test_data[:][0])
        self._x_test = self.generator.standardize(self._x_test)
        self._y_test = self.dataset.test_data[:][1]

        # Create an LRU cache to speed things up a bit for the transforms
        cache_size = cache_size or len(self.dataset.train_data)
        self.cache = OrderedDict([(-i, i) for i in range(cache_size)])
        self.cache_data = np.empty(shape=(cache_size, ) + self.dataset.shape,
                                   dtype=np.float32)

    def _transform(self, idx, x):
        # if it is not cached add it
        if idx not in self.cache:
            # Remove the first in and add the new idx (i is the offset in
            # cache_data)
            _, i = self.cache.popitem(last=False)
            self.cache[idx] = i

            # Do the transformation and add it to the data
            np.random.seed(idx + self.random_state)
            x = self.generator.random_transform(x)
            x = self.generator.standardize(x)
            self.cache_data[i] = x

        # and if it is update it as the most recently used
        else:
            self.cache[idx] = self.cache.pop(idx)

        return self.cache_data[self.cache[idx]]

    def _train_data(self, idxs=slice(None)):
        # Make sure we accept everything that numpy accepts as indices
        idxs = np.arange(self.N)[idxs]

        # Get the original images and then transform them
        x, y = self.dataset.train_data[self.idxs[idxs]]
        x_hat = np.copy(x)
        random_state = np.random.get_state()
        for i, idx in enumerate(idxs):
            x_hat[i] = self._transform(idx, x_hat[i])
        np.random.set_state(random_state)

        return x_hat, y

    def _test_data(self, idxs=slice(None)):
        return self._x_test[idxs], self._y_test[idxs]

    def _train_size(self):
        return self.N

    @property
    def shape(self):
        return self.dataset.shape

    @property
    def output_size(self):
        return self.dataset.output_size
コード例 #22
0
ファイル: data.py プロジェクト: maxwelltsai/Covid19-HPML
class BalanceDataGenerator(tf.keras.utils.Sequence):
    'Generates data for tf.keras'

    def __init__(self,
                 dataset,
                 images=None,
                 labels=None,
                 le=None,
                 is_training=True,
                 batch_size=8,
                 input_shape=(512, 512),
                 n_classes=3,
                 num_channels=3,
                 mapping={
                     'normal': 0,
                     'pneumonia': 1,
                     'COVID-19': 2
                 },
                 shuffle=True,
                 augmentation=True,
                 datadir='data',
                 args=None):

        if args.datapipeline == 'chexpert':
            'CHexPert Initialization'
            images, labels, le = get_data_references()
            self.train_images = images[:int(len(images) * args.val_split)]
            self.train_labels = labels[:int(len(labels) * args.val_split)]
            if args.datapipeline == 'chexpert':
                self.n_classes = len(le.classes_)

        'Initialization'
        self.datadir = datadir
        self.dataset = dataset
        self.is_training = is_training
        self.batch_size = args.bs
        self.N = len(self.dataset)
        self.input_shape = input_shape
        if args.datapipeline == 'covidx':
            self.n_classes = n_classes
        self.num_channels = num_channels
        self.mapping = mapping
        self.shuffle = True
        self.args = args
        if self.args.datapipeline == 'chexpert':
            'CHexPert Initialization'
            self.images = images
            self.labels = labels
            self.n_classes = len(le.classes_)

        if augmentation:
            self.augmentation = ImageDataGenerator(
                featurewise_center=False,
                featurewise_std_normalization=False,
                rotation_range=10,
                width_shift_range=0.1,
                height_shift_range=0.1,
                horizontal_flip=True,
                brightness_range=(0.9, 1.1),
                fill_mode='constant',
                cval=0.,
            )

        datasets = {'normal': [], 'pneumonia': [], 'COVID-19': []}
        for l in dataset:
            datasets[l.split()[-1]].append(l)
        self.datasets = [
            datasets['normal'] + datasets['pneumonia'],
            datasets['COVID-19'],
        ]
        print(
            f"Train: NO-COVID={len(self.datasets[0])}, COVID={len(self.datasets[1])}"
        )
        self.on_epoch_end()

    def __len__(self):
        if self.args.datapipeline == 'covidx':
            return int(np.ceil(len(self.datasets[0]) / float(self.batch_size)))
        elif self.args.datapipeline == 'chexpert':
            return int(np.ceil(len(self.images) / float(self.batch_size)))

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        if self.shuffle == True:
            if self.args.datapipeline == 'covidx':
                for v in self.datasets:
                    np.random.shuffle(v)
            elif self.args.datapipeline == 'chexpert':
                self.images = shuffle(self.images, random_state=0)
                self.labels = shuffle(self.labels, random_state=0)

    def __getitem__(self, idx):
        batch_x, batch_y = np.zeros(
            (self.batch_size, *self.input_shape,
             self.num_channels)), np.zeros(self.batch_size)

        # COVIDX Pipeline
        if self.args.datapipeline == 'covidx':
            batch_files = self.datasets[0][idx * self.batch_size:(idx + 1) *
                                           self.batch_size]
            batch_files[np.random.randint(self.batch_size)] = np.random.choice(
                self.datasets[1])

            for i in range(self.batch_size):

                sample = batch_files[i].split()
                if self.is_training:
                    folder = 'train'
                else:
                    folder = 'test'

                x = cv2.imread(os.path.join(self.datadir, folder, sample[1]))
                x = cv2.resize(x, self.input_shape)

                if self.is_training and hasattr(self, 'augmentation'):
                    x = self.augmentation.random_transform(x)

                    x = x.astype('float32') / 255.0
                    y = self.mapping[sample[2]]

                    batch_x[i] = x
                    batch_y[i] = y

        # ChexPert Pipeline
        elif self.args.datapipeline == 'chexpert':
            idx = min(idx,
                      BalanceDataGenerator.__len__(self) - self.batch_size)
            batch_files_images = self.images[idx * self.batch_size:(idx + 1) *
                                             self.batch_size]
            batch_files_labels = self.labels[idx * self.batch_size:(idx + 1) *
                                             self.batch_size]

            for i in range(self.batch_size):
                try:
                    x = cv2.imread(batch_files_images[i])
                    x = cv2.resize(x, self.input_shape)

                    if self.is_training and hasattr(self, 'augmentation'):
                        x = self.augmentation.random_transform(x)
                        x = 2 * (x.astype('float32') / 255.0) - 1
                        y = batch_files_labels[i]

                        batch_x[i] = x
                        batch_y[i] = y
                except Exception as e:
                    print(e)

        return batch_x, tf.keras.utils.to_categorical(
            batch_y, num_classes=self.n_classes)
コード例 #23
0
class SiameseBatchGenerator(BaseBatchGenerator):
    """ For loading utterances from input dataset with categories
    and making batches of pairs of images, during training
    Positive pair (1) - utterances of one class
    Negative pair (0) - utterances from different classes

    # Arguments
    """
    def __init__(self,
                 X,
                 y,
                 batch_size=32,
                 flow_from_dir=False,
                 augment=False,
                 **kwargs):

        self.x = X
        self.y = y
        self.batch_size = batch_size
        self.flow_from_dir = flow_from_dir
        self.augment = augment

        if flow_from_dir:
            # we already have all statistics
            self.__dict__.update(kwargs)
        else:
            self.__count_stats()

        # augmentation
        if self.augment:
            self.__get_distortion_generator()

    @classmethod
    def from_directory(cls, dirname, batch_size=32, augment=False):
        '''Constructor only for images
        '''
        assert os.path.isdir(
            dirname), "There is no such directory `%s`" % dirname

        X, y = [], []
        class_folders = glob(os.path.join(dirname, "*", ""))

        n_classes = len(class_folders)
        samples_per_class = np.zeros(n_classes, dtype=np.int32)
        class_idx = [None] * n_classes

        for i, folder in enumerate(class_folders):
            img_fnames = glob(os.path.join(dirname, folder, '*.jpg'))
            # add all image files with other extensions
            for ext in ["*.png", "*jpeg"]:
                img_fnames.extend(glob(os.path.join(dirname, folder, ext)))
            # add filenames and corresponding labels to array
            X.extend(img_fnames)
            y.extend([i] * len(img_fnames))
            samples_per_class[i] = len(img_fnames)
            # split sorted indices on classes
            if i == 0:
                class_idx[i] = np.arange(len(img_fnames))
            else:
                low = sum(samples_per_class[:i])
                high = low + samples_per_class[i]
                class_idx[i] = np.arange(low, high, dtype=np.int32)
        # transform to arrays for convenience
        X = np.array(X)
        y = np.array(y, dtype=np.int8)
        # call __init__
        return cls(
            X,
            y,
            batch_size,
            flow_from_dir=True,
            augment=augment,
            # kwargs
            n_classes=n_classes,
            samples_per_class=samples_per_class,
            class_idx=class_idx)

    def __count_stats(self):
        self.samples_per_class = np.unique(self.y, return_counts=True)[1]
        self.n_classes = len(self.samples_per_class)
        # sort indices by their value, i.e. sort labels
        sorted_idx = np.argsort(self.y)
        # split sorted indices on classes
        self.class_idx = np.split(sorted_idx,
                                  np.cumsum(self.samples_per_class)[:-1])

    def __get_distortion_generator(self):
        self.distortion_generator = ImageDataGenerator(rotation_range=75,
                                                       shear_range=0.3,
                                                       zoom_range=0.3,
                                                       width_shift_range=0.2,
                                                       height_shift_range=0.2,
                                                       channel_shift_range=0.2,
                                                       vertical_flip=True,
                                                       horizontal_flip=True)

    def random_distortion(self, img):
        return self.distortion_generator.random_transform(img)

    def __get_pair(self, c, pos):
        '''c - class number
           pos - positive or negative
        '''
        # randomly select two samples for each class to create pair
        idx = np.random.permutation(self.samples_per_class[c])[:2]

        if not pos or len(idx) == 1:
            # for negatives choose the opposite class
            c_ = np.random.choice([x for x in range(self.n_classes) if x != c])
            # choose the sample from the opposite class
            i_ = np.random.randint(self.samples_per_class[c_])
            l_sample = self.x[self.class_idx[c][idx[0]]]
            r_sample = self.x[self.class_idx[c_][i_]]
            return l_sample, r_sample

        return self.x[self.class_idx[c][idx]]

    def __create_pairs(self, batch_size, pos=True):
        # if batch_size is odd number, then negatives will be one more pair
        n = (batch_size // 2) if pos else (batch_size // 2 + batch_size % 2)
        # array for storing pairs
        pairs = np.zeros((2, n, *self.x.shape[1:]), dtype=self.x.dtype)
        # randomly choose n class labels
        classes = np.random.randint(self.n_classes, size=n)

        i = 0
        while i < n:
            pairs[0][i], pairs[1][i] = self.__get_pair(classes[i], pos)
            i += 1
        return pairs

    def __get_files_from_names(self, arr):
        result = [None] * arr.size
        # read all files
        for i, x in enumerate(np.nditer(arr)):
            result[i] = imread(str(x)) / 255.
            if self.augment:
                result[i] = self.random_distortion(result[i])

        result = np.array(result)
        result = result.reshape((*arr.shape, *result[0].shape))
        return result

    def next_batch(self, batch_size=None, shuffle=True, seed=None):
        if seed is not None:
            np.random.seed(seed)
        # if batch size was not specified use the default one
        batch_size = self.batch_size if batch_size is None else batch_size
        # arrays for pairs and labels respectively
        batch_xs = np.zeros((2, batch_size, *self.x.shape[1:]),
                            dtype=self.x.dtype)
        batch_ys = np.ones((batch_size, ), dtype=np.int8)
        # positive pairs
        batch_xs[:, :batch_size // 2] = self.__create_pairs(batch_size)
        # negative pairs
        batch_xs[:, batch_size // 2:] = self.__create_pairs(batch_size,
                                                            pos=False)
        batch_ys[batch_size // 2:] = 0
        # permutation
        if shuffle:
            batch_xs, batch_ys = shuffle_arrays(batch_xs,
                                                batch_ys,
                                                axes=[1, 0])
        # if flow_from_dir = True, batch_xs - filenames
        # so we should to read files
        if self.flow_from_dir:
            batch_xs = self.__get_files_from_names(batch_xs)
        return batch_xs, batch_ys
コード例 #24
0
# In[50]:

filenames = pd.read_csv("../../data/interimdata.csv")
imgs = np.array([
    plt.imread(f["train_example"], format="jpeg")
    for _, f in filenames.iterrows()
])

# In[51]:

plt.imshow(imgs[0])

# In[52]:

plt.imshow(augmenter.random_transform(imgs[0]).astype("uint8"))

# In[59]:

augmented = []
for i in range(269):
    augmented.append(
        augmenter.random_transform(imgs[random.randint(0,
                                                       len(imgs) -
                                                       1)]).astype("uint8"))

# In[62]:

links = {"path": []}

for i, img in enumerate(augmented):