def main(argv):
    # load input
    img_df = pd.read_pickle(
        os.path.join(FLAGS.input_directory, FLAGS.file_list))
    train_df, validation_df = train_test_split(img_df,
                                               test_size=0.2,
                                               random_state=19,
                                               shuffle=True,
                                               stratify=img_df["label"])
    logging.info(f"Training {img_df.count()} faces")
    # Prepare data generators
    train_datagen = ImageDataGenerator(
        rotation_range=20,
        zoom_range=0.2,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        horizontal_flip=True,
        preprocessing_function=preprocess_input,
        fill_mode="nearest",
    )

    validation_datagen = ImageDataGenerator(
        preprocessing_function=preprocess_input)

    train_generator = train_datagen.flow_from_dataframe(
        dataframe=train_df,
        directory=FLAGS.input_directory,
        x_col="filename",
        y_col="label",
        class_mode="categorical",
        batch_size=FLAGS.batch_size,
        target_size=MASK_INPUT_IMAGE_SHAPE[:-1],
    )

    validation_generator = validation_datagen.flow_from_dataframe(
        dataframe=validation_df,
        directory=FLAGS.input_directory,
        y_col="label",
        class_mode="categorical",
        batch_size=FLAGS.batch_size,
        target_size=MASK_INPUT_IMAGE_SHAPE[:-1],
    )

    # create a model
    maskNet = create_mask_detector_mobilenet(MASK_INPUT_IMAGE_SHAPE)

    # checkpoints
    checkpoint_path = "checkpoints/weights.{epoch:02d}-{val_loss:.4f}.hdf5"
    checkpoint_cb = keras.callbacks.ModelCheckpoint(
        filepath=checkpoint_path,
        save_best_only=False,
        monitor="val_loss",
        mode="min",
        save_weights_only=True,
        verbose=1,
        save_freq="epoch",
    )
    early_stopping_cb = keras.callbacks.EarlyStopping(
        patience=5, restore_best_weights=True)

    # tensorboard callback
    root_logdir = os.path.join(os.curdir, "mylogs")
    run_logdir = get_run_logdir(root_logdir)
    tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)

    STEPS_PER_EPOCH = len(train_df) // FLAGS.batch_size
    STEPS_PER_EPOCH_VAL = len(validation_df) // FLAGS.batch_size

    # compile
    maskNet.compile(loss="binary_crossentropy",
                    optimizer="adam",
                    metrics=["accuracy"])

    # train!
    result = maskNet.fit(
        train_generator,
        steps_per_epoch=STEPS_PER_EPOCH,
        validation_data=validation_generator,
        validation_steps=STEPS_PER_EPOCH_VAL,
        epochs=FLAGS.epoch,
        callbacks=[early_stopping_cb, checkpoint_cb, tensorboard_cb],
    )
    maskNet.save(FLAGS.output_file)
Esempio n. 2
0
model.add(Dense(2, activation = 'softmax'))

model.compile(loss = 'binary_crossentropy', optimizer = 'adam', metrics = ['accuracy'])
model.summary()

df["category"] = df["category"].replace({0:'cat', 1:'dog'})
train_df, validate_df = train_test_split(df, test_size = 0.2, random_state = 42)
train_df = train_df.reset_index(drop = True)
validate_df = validate_df.reset_index(drop = True)

total_train = train_df.shape[0]
total_validate = validate_df.shape[0]
batch_size = 15

train_datagen = ImageDataGenerator(rotation_range = 15, rescale = 1./255, shear_range = 0.1, zoom_range = 0.2, horizontal_flip = True, width_shift_range = 0.1, height_shift_range = 0.1)
train_generator = train_datagen.flow_from_dataframe(train_df, dataset_path, x_col = 'filename', y_col = 'category', target_size = Image_Size, class_mode = 'categorical', batch_size = batch_size)

validation_datagen = ImageDataGenerator(rescale = 1./255)
validation_generator = validation_datagen.flow_from_dataframe(validate_df, dataset_path, x_col = 'filename', y_col = 'category', target_size = Image_Size, class_mode = 'categorical', batch_size = batch_size)

checkpoint = ModelCheckpoint(filepath = "./1/weights.h5", 
                             monitor = "val_loss", 
                             verbose = 1, 
                             save_best_only = True,
                             mode = "min")

callbacks_list = [checkpoint, reduce_lr, earlystop]
epochs = 150
history = model.fit(train_generator, 
                    epochs = epochs, 
                    validation_data = validation_generator, 
Esempio n. 3
0
    def buildDataGeneretor(self, mainClassInstructionsDF, target_img_shape=(256, 256, 3), batch_size=64,
                           pathColName='ImgPath_Absolute'):

        # Receivce a df file containing all mainClassInstructionsDF
        # Location col name: ImgPath_Absolute

        testPath = mainClassInstructionsDF[pathColName].values[0]
        if not os.path.exists(testPath):
            print("Could not locate sample image from mainClassInstructionsDF")
            print("Img path: %s" % (testPath))

        start_time = time.time()

        dataset_train, dataset_val, dataset_test = splitDataSet_train_val_test(dataFrame=mainClassInstructionsDF,
                                                                               val_percent=20, test_percent=10)
        target_size = target_img_shape[0:2]

        # self.y_test = dataset_test.filter(items=self.classDictionary.values()).values
        self.X_test = dataset_test.filter(items=[pathColName]).values

        print("Build data generator")
        # https://vijayabhaskar96.medium.com/multi-label-image-classification-tutorial-with-keras-imagedatagenerator-cd541f8eaf24

        datagen = ImageDataGenerator(rescale=1. / 255.)
        test_datagen = ImageDataGenerator(rescale=1. / 255.)
        train_generator = datagen.flow_from_dataframe(
            dataframe=dataset_train,
            directory=None,
            x_col=pathColName,
            class_mode="raw",
            y_col=list(self.classDictionary.values()),
            batch_size=batch_size,
            seed=42,
            shuffle=True,
            target_size=target_size)
        valid_generator = test_datagen.flow_from_dataframe(
            dataframe=dataset_val,
            directory=None,  # app.config['BASE_FOLDER'],
            x_col=pathColName,
            class_mode="raw",
            y_col=list(self.classDictionary.values()),
            batch_size=batch_size,
            seed=42,
            shuffle=True,
            target_size=target_size)
        test_generator = test_datagen.flow_from_dataframe(
            dataframe=dataset_test,
            directory=None,  # app.config['BASE_FOLDER'],
            x_col=pathColName,
            class_mode="raw",
            y_col=list(self.classDictionary.values()),
            batch_size=1,
            seed=42,
            shuffle=False,
            target_size=target_size)

        self.test_generator = test_generator
        self.valid_generator = valid_generator
        self.train_generator = train_generator

        self.y_test = self.test_generator.labels

        def generator_wrapper(generator):
            for batch_x, batch_y in generator:
                yield (batch_x, [batch_y[:, i] for i in range(5)])

        self.STEP_SIZE_TRAIN = self.test_generator.n // self.test_generator.batch_size
        self.STEP_SIZE_VALID = self.valid_generator.n // self.valid_generator.batch_size
        self.STEP_SIZE_TEST = self.train_generator.n  # // self.train_generator.batch_size

        self.DataGenerator = 1

        print("test_generator: %s records. Shape: %s" % (self.test_generator.n, str(self.y_test.shape)))
        print("valid_generator: %s records" % (self.valid_generator.n))
        print("train_generator: %s records" % (self.train_generator.n))
# get the images

with open(classification_file) as f:
  dic = json.load(f)

dataframe = pd.DataFrame(dic.items())
dataframe.rename(columns = {0:'filename', 1:'class'}, inplace = True)
dataframe["class"] = dataframe["class"].astype(str)

dataframe = dataframe.sample(frac=1)

train_gen = train_data_gen.flow_from_dataframe(dataframe,
                                               train_dir,
                                               batch_size=bs,
                                               target_size=(img_h, img_w),
                                               class_mode='categorical',
                                               shuffle=True,
                                               seed=SEED,
                                               subset='training')

valid_gen = valid_data_gen.flow_from_dataframe(dataframe,
                                               train_dir,
                                               batch_size=bs,
                                               target_size=(img_h, img_w),
                                               class_mode='categorical',
                                               shuffle=True,
                                               seed=SEED,
                                               subset='validation')


test_gen = test_data_gen.flow_from_directory(dataset_dir, classes=['test'], batch_size=bs, class_mode=None,shuffle=False)
def load(name, dataset_dir, BATCH_SIZE, tfrecord):
    if name == "idrid":
        logging.info(f"Preparing dataset {name}...")
        # use tfrecord file
        if tfrecord:
            train_dataset = load_dataset(
                os.path.join(dataset_dir, 'train_image.tfrecords'))
            valid_dataset = load_dataset(
                os.path.join(dataset_dir, 'valid_image.tfrecords'))
            test_dataset = load_dataset(
                os.path.join(dataset_dir, 'test_image.tfrecords'))

            train_dataset = train_dataset.map(lambda x, y:
                                              (tf_preprocess(x), y),
                                              num_parallel_calls=AUTOTUNE)
            '''# Visualized image from training data set
            image, label = next(iter(train_dataset))
            plt.imshow(tf.cast(image, tf.int64))
            plt.axis('off')
            plt.show()'''

            train_dataset = aug_and_prepare(train_dataset,
                                            BATCH_SIZE,
                                            shuffle=True,
                                            augment=True)

            valid_dataset = valid_dataset.map(lambda x, y:
                                              (tf_preprocess(x), y),
                                              num_parallel_calls=AUTOTUNE)
            valid_dataset = aug_and_prepare(valid_dataset, BATCH_SIZE)

            test_dataset = test_dataset.map(lambda x, y: (tf_preprocess(x), y),
                                            num_parallel_calls=AUTOTUNE)
            test_dataset = aug_and_prepare(test_dataset, BATCH_SIZE)

            return train_dataset, valid_dataset, test_dataset

        else:
            # use csv dataset
            train_df = pd.read_csv(os.path.join(dataset_dir, 'train_data.csv'))
            train_df['Image name'] = train_df['Image name'] + ".jpg"
            test_df = pd.read_csv(os.path.join(dataset_dir, 'test_data.csv'))
            test_df['Image name'] = test_df['Image name'] + ".jpg"

            train_datagen = ImageDataGenerator(
                rotation_range=360,
                horizontal_flip=True,
                vertical_flip=True,
                validation_split=0.15,
                preprocessing_function=preprocess,
                rescale=1 / 255.)

            train_generator = train_datagen.flow_from_dataframe(
                train_df,
                x_col='Image name',
                y_col='Retinopathy grade',
                directory=os.path.join(dataset_dir, 'train'),
                target_size=(256, 256),
                batch_size=BATCH_SIZE,
                class_mode='raw',
                subset='training')

            val_generator = train_datagen.flow_from_dataframe(
                train_df,
                x_col='Image name',
                y_col='Retinopathy grade',
                directory=os.path.join(dataset_dir, 'train'),
                target_size=(256, 256),
                batch_size=BATCH_SIZE,
                class_mode='raw',
                shuffle=False,
                subset='validation')

            test_generator = ImageDataGenerator(
                preprocessing_function=preprocess,
                rescale=1 / 255.).flow_from_dataframe(
                    test_df,
                    x_col='Image name',
                    y_col='Retinopathy grade',
                    directory=os.path.join(dataset_dir, 'test'),
                    target_size=(256, 256),
                    batch_size=BATCH_SIZE,
                    class_mode='raw',
                    shuffle=False)

            return train_generator, val_generator, test_generator
Esempio n. 6
0
sns.countplot(train["label"])
plt.show()

print(train.label.value_counts())

img_size = 128

data = ImageDataGenerator(validation_split=0.2)                            

train_data = data.flow_from_dataframe(
    dataframe=train,
    directory='train_images',
    x_col='image_id',
    y_col='label',
    target_size=(img_size, img_size),
    batch_size=32,
    subset='training',
    shuffle = True,
    class_mode='categorical'
)

valid_data = data.flow_from_dataframe(
    dataframe=train,
    directory='train_images',
    x_col='image_id',
    y_col='label',
    target_size=(img_size, img_size),
    batch_size=32,
    subset='validation',
    class_mode = 'categorical',
Esempio n. 7
0
# Data generators
train_df = pandas.read_csv(train_path)
validate_df = pandas.read_csv(validate_path)

train_datagen = ImageDataGenerator(rescale=1. / 255,
                                   horizontal_flip=True,
                                   vertical_flip=True)

val_datagen = ImageDataGenerator(rescale=1. / 255, )

train_generator = train_datagen.flow_from_dataframe(dataframe=train_df,
                                                    directory=image_dir,
                                                    x_col="filename",
                                                    y_col='label',
                                                    target_size=(image_height,
                                                                 image_width),
                                                    batch_size=batch_size,
                                                    shuffle=True,
                                                    class_mode="raw",
                                                    color_mode="rgb")

val_generator = val_datagen.flow_from_dataframe(dataframe=validate_df,
                                                directory=image_dir,
                                                x_col="filename",
                                                y_col='label',
                                                target_size=(image_height,
                                                             image_width),
                                                batch_size=batch_size,
                                                shuffle=True,
                                                class_mode="raw",
                                                color_mode="rgb")
    def generator_splitter_multi(model_name, train, test, imagepath):
        """
        function uses the `ImageDataGenerator` class
        # load our dataset as an iterator (not keeping it all in memory at once).
        :param model_name: model name
        :param train: data set
        :param test: data set
        :param imagepath: path to the image folder
        :return: data split for train val and test
        """
        # Train Set
        # tf.config.list_physical_devices()
        if model_name =='vgg19':
            preprocessing= preprocess_input_VGG19
        elif model_name == 'MobileNetV2':
            preprocessing = preprocess_input_MNV2
        elif model_name == 'vgg16':
            preprocessing = preprocess_input_VGG16
        elif model_name == 'ResNet50':
            preprocessing = Preprocess_RESNET50
        else:
            preprocessing = None
        train['label'] = train['label'].astype(str)
        img_gen = ImageDataGenerator(validation_split=0.2)

        train_data = img_gen.flow_from_dataframe(train,
                                                 directory=imagepath,
                                                 x_col='files',
                                                 y_col='label',
                                                 featurewise_std_normalization=True,
                                                 preprocessing_function=preprocessing,
                                                 class_mode='categorical',
                                                 batch_size=64,
                                                 target_size=(224, 224),
                                                 subset='training')

        # Validation Set
        valid_data = img_gen.flow_from_dataframe(train,
                                                 directory=imagepath,
                                                 x_col='files',
                                                 y_col='label',
                                                 featurewise_std_normalization=True,
                                                 preprocessing_function=preprocessing,
                                                 class_mode='categorical',
                                                 batch_size=64,
                                                 target_size=(224, 224),
                                                 subset='validation')

        # Test Set
        img_gen_test = ImageDataGenerator()
        test_data = img_gen_test.flow_from_dataframe(test,
                                                     directory=imagepath,
                                                     x_col='files',
                                                     y_col='label',
                                                     featurewise_std_normalization=True,
                                                     preprocessing_function=preprocessing,
                                                     class_mode=None,
                                                     target_size=(224, 224),
                                                     batch_size=64,
                                                     shuffle=False)
        return train_data, valid_data, test_data
Esempio n. 9
0
#     subset=None,
#     interpolation="nearest",
#     validate_filenames=True,
#     **kwargs
# )

train_df = pd.read_csv("E:\\ML Training Data\\" + "train.csv",
                       index_col=None,
                       header=0)

train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_df,
    directory=None,  # Set to None because the absolute path is provided
    x_col='Full Path',
    y_col='Common name',
    shuffle=True,
    seed=42,
    target_size=(224, 224),
    batch_size=batch_size,
    class_mode='categorical',
    save_to_dir=None  # "E:\\tmp"
)

#
# images, labels = next(train_generator)
# print(images.dtype, images.shape)
# print(labels.dtype, labels.shape)

# ds = tf.data.Dataset.from_generator(
#     train_generator,
#     output_types=(tf.float32, tf.float32),
#     output_shapes=([16, 224, 224, 3], [16, 44])
Esempio n. 10
0
def create_dataset(config, val_split=0.2):
    log.info("Loading dataset...")

    num_skipped = 0
    for folder_name in ("Cat", "Dog"):
        folder_path = os.path.join("PetImages", folder_name)
        for fname in os.listdir(folder_path):
            fpath = os.path.join(folder_path, fname)
            try:
                fobj = open(fpath, "rb")
                is_jfif = tf.compat.as_bytes("JFIF") in fobj.peek(10)
            finally:
                fobj.close()

            if not is_jfif:
                num_skipped += 1
                # Delete corrupted image
                os.remove(fpath)

    print("Deleted %d images" % num_skipped)
    image_size = (180, 180)
    batch_size = 32

    if config.crossvalidation:
        log.warning(
            "Crossvalidation is used, but not implemented in this way, turn around now!"
        )
    kf = KFold(n_splits=5)

    filenames = []
    labels = []

    for file in os.listdir('PetImages/Cat'):
        filenames.append(os.path.join('Cat', file))
        labels.append('Cat')

    for file in os.listdir('PetImages/Dog'):
        filenames.append(os.path.join('Dog', file))
        labels.append('Dog')

    d = {'filename': filenames, 'label': labels}
    alldata = pd.DataFrame(d)
    alldata = alldata.sample(frac=1).reset_index(drop=True)
    Y = alldata[['label']]

    #optional augmentation applied through idg:
    if config.augmentation:
        idg = ImageDataGenerator(width_shift_range=0.0,
                                 height_shift_range=0.0,
                                 zoom_range=0.0,
                                 rotation_range=36,
                                 fill_mode='nearest',
                                 horizontal_flip=True,
                                 rescale=None)
    else:
        idg = ImageDataGenerator(width_shift_range=0.0,
                                 height_shift_range=0.0,
                                 zoom_range=0.0,
                                 fill_mode='nearest',
                                 horizontal_flip=True,
                                 rescale=None)

    val_idg = keras.preprocessing.image.ImageDataGenerator()

    for train_index, val_index in kf.split(np.zeros(len(Y)), Y):
        training_data = alldata.iloc[train_index]
        validation_data = alldata.iloc[val_index]

        train_ds = idg.flow_from_dataframe(training_data,
                                           target_size=(180, 180),
                                           directory='PetImages',
                                           x_col="filename",
                                           y_col="label",
                                           class_mode="categorical",
                                           shuffle=True)
        val_ds = val_idg.flow_from_dataframe(validation_data,
                                             target_size=(180, 180),
                                             directory='PetImages',
                                             x_col="filename",
                                             y_col="label",
                                             class_mode="categorical",
                                             shuffle=True)
        break

    return (train_ds, val_ds)
    def load_dataset_generators(self):

        # Create training generator and augment training data
        if self.augment_data is True:
            train_datagen = ImageDataGenerator(rescale=1. / 255,
                                               rotation_range=90,
                                               width_shift_range=0.4,
                                               height_shift_range=0.4,
                                               shear_range=0.4,
                                               zoom_range=0.4,
                                               horizontal_flip=True,
                                               fill_mode='nearest')
        else:
            train_datagen = ImageDataGenerator(rescale=1. / 255)

        # Don't augment data in the validation generator
        validation_datagen = ImageDataGenerator(rescale=1. / 255)

        if self.dataset_mode == 'directory':
            # Training generator
            self.train_generator = train_datagen.flow_from_directory(
                self.train_path,
                target_size=(self.height, self.width),
                batch_size=self.batch_size,
                class_mode=self.class_mode,
                shuffle=True)
            # Validation generator
            self.validation_generator = validation_datagen.flow_from_directory(
                self.valid_path,
                target_size=(self.height, self.width),
                batch_size=self.batch_size,
                class_mode=self.class_mode,
                shuffle=True)
            # Set number of classes
            num_classes_train = self.train_generator.num_classes
            num_classes_valid = self.validation_generator.num_classes

        elif self.dataset_mode == 'dataframe':
            # Training generator
            self.train_generator = train_datagen.flow_from_dataframe(
                dataframe=pd.read_csv(self.train_path).astype('str'),
                directory=self.dataset_root,
                x_col=self.xcol_name,
                y_col=self.ycol_name,
                target_size=(self.height, self.width),
                batch_size=self.batch_size,
                class_mode=self.class_mode,
                shuffle=self.shuffle_generator)
            # Validation generator
            self.validation_generator = validation_datagen.flow_from_dataframe(
                dataframe=pd.read_csv(self.valid_path).astype('str'),
                directory=self.dataset_root,
                x_col=self.xcol_name,
                y_col=self.ycol_name,
                target_size=(self.height, self.width),
                batch_size=self.batch_size,
                class_mode=self.class_mode,
                shuffle=self.shuffle_generator)
            # Set number of classes
            num_classes_train = len(self.train_generator.class_indices)
            num_classes_valid = len(self.validation_generator.class_indices)

        # Set number of samples
        self.num_train_samples = self.train_generator.samples
        self.num_valid_samples = self.validation_generator.samples
        # Check if number of training classes == number of validation classes
        assert num_classes_train == num_classes_valid, "number of classes in training and validation sets do not match"

        # Set class-level number of classes
        self.num_classes = num_classes_train

        if self.class_weights == 'balanced':
            self.class_weights = class_weight.compute_class_weight(
                'balanced', np.unique(self.train_generator.classes),
                self.train_generator.classes)
        else:
            self.class_weights = None
Esempio n. 12
0
#Download dataset form https://drive.google.com/file/d/1jwa16s2nZIQywKMdRkpRvdDifxGDxC3I/view?usp=sharing
dataframe = pd.read_csv('fried_noodles_dataset.csv', delimiter=',', header=0)
dataframe["norm_meat"] = dataframe["meat"] / 300
dataframe["norm_veggie"] = dataframe["veggie"] / 300
dataframe["norm_noodle"] = dataframe["noodle"] / 300

#https://keras.io/api/preprocessing/image/
#https://machinelearningmastery.com/how-to-configure-image-data-augmentation-when-training-deep-learning-neural-networks/
datagen = ImageDataGenerator(rescale=1. / 3)

train_generator = datagen.flow_from_dataframe(
    dataframe=dataframe.loc[0:1599],
    directory='images',
    x_col='filename',
    y_col=['norm_meat', 'norm_veggie', 'norm_noodle'],
    shuffle=True,
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='other')

validation_generator = datagen.flow_from_dataframe(
    dataframe=dataframe.loc[1600:1699],
    directory='images',
    x_col='filename',
    y_col=['norm_meat', 'norm_veggie', 'norm_noodle'],
    shuffle=False,
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='other')
    # Compile each model
    model[j].compile(optimizer=Adam(lr=LR), loss='binary_crossentropy', metrics=['acc'])

# All images will be rescaled by 1./255
train_validate_datagen = ImageDataGenerator(rescale=1/255, validation_split=SPLIT)  # set validation split
test_datagen = ImageDataGenerator(rescale=1/255)
data_chunks = ensemble_data(cnn_networks, IMAGES_PATH)
for j in range(cnn_networks):
    print('Net : {}'.format(j+1))
    df_train = data_chunks[j].iloc[:-60]
    df_test = data_chunks[j].iloc[-60:]
    train_generator = train_validate_datagen.flow_from_dataframe(
        dataframe=df_train,
        directory=IMAGES_PATH,
        target_size=(255, 255),
        x_col='Images',
        y_col='Labels',
        batch_size=32,
        class_mode='binary',
        subset='training')

    validation_generator = train_validate_datagen.flow_from_dataframe(
        dataframe=df_train,
        directory=IMAGES_PATH,
        target_size=(255, 255),
        x_col='Images',
        y_col='Labels',
        batch_size=32,
        class_mode='binary',
        subset='validation')
Esempio n. 14
0
def main(args):

    print('[INFO] Starting clustering...')

    # Setup weights and biases
    setup_wandb(args)

    # Setup the pre-trained encoder from autoencoder.py.
    encoder = load_model(args.model)
    model = PretrainedDeepClusteringModel(backbone=encoder,
                                          n_clusters=args.n_clusters)
    optimizer = Adam(learning_rate=args.learning_rate,
                     beta_1=args.beta1,
                     beta_2=args.beta2)
    model.compile(optimizer=optimizer, loss='kld')

    encoder_weights, encoder_biases = encoder.layers[1].get_weights()
    model_weights, model_biases = model.backbone.layers[1].get_weights()
    print("[INFO] Checking weights and biases equality before running...")
    print("[INFO] Weights ", np.sum(encoder_weights - model_weights))
    print("[INFO] Biases ", np.sum(encoder_biases - model_biases))

    # Load the images into memory.  Right now
    # I am not supporting loading from disk.
    train, dev, test = load_dataframes(args.base_dir, args.min_samples)

    # Use an image data generator to save memory.
    augs = dict(preprocessing_function=normalize, )

    gen = ImageDataGenerator(**augs)
    train_flow = gen.flow_from_dataframe(
        dataframe=train,
        directory=os.path.join(args.base_dir, 'train'),
        batch_size=args.batch_size,
        target_size=(args.pixels, args.pixels),
        shuffle=True,
        x_col='file',
        class_mode=None)

    # Setup a generator for dev
    dev_flow = gen.flow_from_dataframe(dataframe=dev,
                                       directory=os.path.join(
                                           args.base_dir, 'dev'),
                                       batch_size=args.batch_size,
                                       target_size=(args.pixels, args.pixels),
                                       shuffle=False,
                                       x_col='file',
                                       class_mode=None)

    test_flow = gen.flow_from_dataframe(dataframe=test,
                                        directory=os.path.join(
                                            args.base_dir, 'test'),
                                        batch_size=args.batch_size,
                                        target_size=(args.pixels, args.pixels),
                                        shuffle=False,
                                        x_col='file',
                                        class_mode=None)

    print('[INFO] Starting initialization of clusters')
    model.initialize_clusters_generator(
        train_flow,
        epochs=1,
        steps_per_epoch=int(np.ceil(len(train) / args.batch_size)))

    print('[INFO] Fitting autoencoder...')
    for layer in encoder.layers:
        layer.trainable = True

    # -----------------
    #    Train here
    # -----------------
    loss = np.inf
    for ite in range(int(args.total_batches)):

        batch = next(train_flow)
        while len(batch) != args.batch_size:
            batch = next(train_flow)

        q = model.predict(batch, verbose=0)
        p = clustering_target_distribution(q)

        for _ in range(args.repeat_batch):

            encoder_weights, encoder_biases = encoder.layers[1].get_weights()
            model_weights, model_biases = model.backbone.layers[1].get_weights(
            )
            print("[INFO] Checking weights and biases equality...")
            print("[INFO] Weights ", np.sum(encoder_weights - model_weights))
            print("[INFO] Biases ", np.sum(encoder_biases - model_biases))

            sub_batches = int(np.ceil(args.batch_size / 32))
            for i in range(sub_batches):
                loss = model.train_on_batch(x=batch[i * 32:(i + 1) * 32],
                                            y=p[i * 32:(i + 1) * 32])
                wandb.log({'kld_loss': loss})

    # Fit the sucker
    batches = int(np.ceil(len(train) / args.batch_size))
    dev_batches = int(np.ceil(len(dev) / args.batch_size))

    # This scaler is used to normalize before
    # doing clustering.  The online run is done
    # on the training data to collect statistics.
    print('[INFO] Fitting the scaler.')
    scaler = StandardScaler()
    for batch in range(batches):
        x_batch = next(train_flow)
        scaler.partial_fit(encoder.predict(x_batch))

    label_encoder = LabelEncoder()
    train['encoded_label'] = label_encoder.fit_transform(train['label'])
    dev['encoded_label'] = label_encoder.transform(dev['label'])
    test['encoded_label'] = label_encoder.transform(test['label'])

    kmeans = MiniBatchKMeans(n_clusters=train['label'].nunique())
    batches = int(np.ceil(len(train) / args.batch_size))
    for i in range(batches):
        kmeans.partial_fit(encoder.predict(next(train_flow)))

    dev_clusters = []
    test_clusters = []
    batches = int(np.ceil(len(dev) / args.batch_size))
    for i in range(batches):
        dev_clusters.extend(kmeans.predict(encoder.predict(next(dev_flow))))

    batches = int(np.ceil(len(test) / args.batch_size))
    for i in range(batches):
        test_clusters.extend(kmeans.predict(encoder.predict(next(test_flow))))

    dev_clusters = np.array(dev_clusters)
    test_clusters = np.array(test_clusters)

    accuracy = hungarian_accuracy(dev['encoded_label'], dev_clusters)
    balanced_accuracy = hungarian_balanced_accuracy(dev['encoded_label'],
                                                    dev_clusters)
    wandb.log({
        "dev_accuracy": accuracy,
        "dev_balanced_accuracy": balanced_accuracy
    })

    accuracy = hungarian_accuracy(test['encoded_label'], test_clusters)
    balanced_accuracy = hungarian_balanced_accuracy(test['encoded_label'],
                                                    test_clusters)
    wandb.log({
        "test_accuracy": accuracy,
        "test_balanced_accuracy": balanced_accuracy
    })

    x_batch = next(dev_flow)

    encoder.save("encoder.dec.{}.hdf5".format(wandb.run.id))

    print('[INFO] Finished!')
Esempio n. 15
0
def train_model(cfg, data, callbacks, verbose=1):
    '''
    Train a and evaluate model on given data.
    :param cfg: Project config (from config.yml)
    :param data: dict of partitioned dataset
    :param callbacks: list of callbacks for Keras model
    :param verbose: Verbosity mode to pass to model.fit_generator()
    :return: Trained model and associated performance metrics on the test set
    '''

    # If set in config file, oversample the minority class
    if cfg['TRAIN']['IMB_STRATEGY'] == 'random_oversample':
        data['TRAIN'] = random_minority_oversample(data['TRAIN'])

    # Create ImageDataGenerators
    train_img_gen = ImageDataGenerator(rotation_range=10, preprocessing_function=remove_text,
                                       samplewise_std_normalization=True, samplewise_center=True)
    val_img_gen = ImageDataGenerator(preprocessing_function=remove_text,
                                       samplewise_std_normalization=True, samplewise_center=True)
    test_img_gen = ImageDataGenerator(preprocessing_function=remove_text,
                                       samplewise_std_normalization=True, samplewise_center=True)

    # Create DataFrameIterators
    img_shape = tuple(cfg['DATA']['IMG_DIM'])
    y_col = 'label_str'
    class_mode = 'categorical'
    train_generator = train_img_gen.flow_from_dataframe(dataframe=data['TRAIN'], directory=cfg['PATHS']['RAW_DATA'],
        x_col="filename", y_col=y_col, target_size=img_shape, batch_size=cfg['TRAIN']['BATCH_SIZE'],
        class_mode=class_mode, validate_filenames=False)
    val_generator = val_img_gen.flow_from_dataframe(dataframe=data['VAL'], directory=cfg['PATHS']['RAW_DATA'],
        x_col="filename", y_col=y_col, target_size=img_shape, batch_size=cfg['TRAIN']['BATCH_SIZE'],
        class_mode=class_mode, validate_filenames=False)
    test_generator = test_img_gen.flow_from_dataframe(dataframe=data['TEST'], directory=cfg['PATHS']['RAW_DATA'],
        x_col="filename", y_col=y_col, target_size=img_shape, batch_size=cfg['TRAIN']['BATCH_SIZE'],
        class_mode=class_mode, validate_filenames=False, shuffle=False)

    # Save model's ordering of class indices
    dill.dump(test_generator.class_indices, open(cfg['PATHS']['OUTPUT_CLASS_INDICES'], 'wb'))

    # Apply class imbalance strategy. We have many more X-rays negative for COVID-19 than positive.
    histogram = np.bincount(np.array(train_generator.labels).astype(int))  # Get class distribution
    class_weight = None
    if cfg['TRAIN']['IMB_STRATEGY'] == 'class_weight':
        class_multiplier = cfg['TRAIN']['CLASS_MULTIPLIER']
        class_multiplier = [class_multiplier[cfg['DATA']['CLASSES'].index(c)] for c in test_generator.class_indices]
        class_weight = get_class_weights(histogram, class_multiplier)

    # Define metrics.
    covid_class_idx = test_generator.class_indices['COVID-19']   # Get index of COVID-19 class
    thresholds = 1.0 / len(cfg['DATA']['CLASSES'])      # Binary classification threshold for a class
    metrics = ['accuracy', CategoricalAccuracy(name='accuracy'),
               Precision(name='precision', thresholds=thresholds, class_id=covid_class_idx),
               Recall(name='recall', thresholds=thresholds, class_id=covid_class_idx),
               AUC(name='auc'),
               F1Score(name='f1score', thresholds=thresholds, class_id=covid_class_idx)]

    # Define the model.
    print('Training distribution: ', ['Class ' + list(test_generator.class_indices.keys())[i] + ': ' + str(histogram[i]) + '. '
           for i in range(len(histogram))])
    input_shape = cfg['DATA']['IMG_DIM'] + [3]
    num_gpus = cfg['TRAIN']['NUM_GPUS']
    if cfg['TRAIN']['MODEL_DEF'] == 'dcnn_resnet':
        model_def = dcnn_resnet
    elif cfg['TRAIN']['MODEL_DEF'] == 'resnet50v2':
        model_def = resnet50v2
    else:
        model_def = resnet101v2
    if cfg['TRAIN']['CLASS_MODE'] == 'binary':
        histogram = np.bincount(data['TRAIN']['label'].astype(int))
        output_bias = np.log([histogram[i] / (np.sum(histogram) - histogram[i]) for i in range(histogram.shape[0])])
        model = model_def(cfg['NN']['DCNN_BINARY'], input_shape, metrics, 2, output_bias=output_bias, gpus=num_gpus)
    else:
        n_classes = len(cfg['DATA']['CLASSES'])
        histogram = np.bincount(data['TRAIN']['label'].astype(int))
        output_bias = np.log([histogram[i] / (np.sum(histogram) - histogram[i]) for i in range(histogram.shape[0])])
        model = model_def(cfg['NN']['DCNN_MULTICLASS'], input_shape, metrics, n_classes, output_bias=output_bias,
                          gpus=num_gpus)

    # Train the model.
    steps_per_epoch = ceil(train_generator.n / train_generator.batch_size)
    val_steps = ceil(val_generator.n / val_generator.batch_size)
    history = model.fit_generator(train_generator, steps_per_epoch=steps_per_epoch, epochs=cfg['TRAIN']['EPOCHS'],
                                  validation_data=val_generator, validation_steps=val_steps, callbacks=callbacks,
                                  verbose=verbose, class_weight=class_weight)

    # Run the model on the test set and print the resulting performance metrics.
    test_results = model.evaluate_generator(test_generator, verbose=1)
    test_metrics = {}
    test_summary_str = [['**Metric**', '**Value**']]
    for metric, value in zip(model.metrics_names, test_results):
        test_metrics[metric] = value
        print(metric, ' = ', value)
        test_summary_str.append([metric, str(value)])
    return model, test_metrics, test_generator
Esempio n. 16
0
model = keras.models.load_model('../working/model_step3.hdf5')
model.load_weights('best_model.hdf5')

scores = model.evaluate_generator(test_generator, verbose=1)
print("Accuracy: %.2f%%" % (scores[1] * 100))

# # Предсказание на тестовых данных

from sklearn.metrics import accuracy_score

test_sub_generator = test_datagen.flow_from_dataframe(
    dataframe=sample_submission,
    directory=DATA_PATH + 'test_upload/',
    x_col="Id",
    y_col=None,
    shuffle=False,
    class_mode=None,
    seed=RANDOM_SEED,
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
)

test_sub_generator.reset()
predictions = model.predict_generator(test_sub_generator,
                                      steps=len(test_sub_generator),
                                      verbose=1)
predictions = np.argmax(predictions, axis=-1)  #multiple categories
label_map = (train_generator.class_indices)
label_map = dict((v, k) for k, v in label_map.items())  #flip k,v
predictions = [label_map[k] for k in predictions]
Esempio n. 17
0
image_size = 380

# Get Labels
label_cols = df_train.columns.tolist()
label_cols.remove("StudyInstanceUID")
label_cols.remove("PatientID")

# Get Test Dataset Generator
test_datagen = ImageDataGenerator()

test_generator = test_datagen.flow_from_dataframe(
    dataframe=df_test,
    directory=comp_dir + "test",  # Change this
    x_col="StudyInstanceUID",
    batch_size=1,
    seed=42,
    shuffle=False,
    color_mode="rgb",
    class_mode=None,
    target_size=(image_size, image_size),
    interpolation="bilinear")

STEP_SIZE_TEST = test_generator.n // test_generator.batch_size

# Load model from H5 Model
model = load_model("../input/ranzcr-clip-big-models/big_model.h5")

# Predict
pred = model.predict(test_generator, steps=STEP_SIZE_TEST, verbose=1)

# Create Submission df
Esempio n. 18
0
    except RuntimeError as e:
        print(e)
        

training_set = pd.read_csv('/data/backup/pervinco_2020/datasets/custom_miml.csv')
training_set["labels"] = training_set["labels"].apply(lambda x: x.split(","))

print(training_set.head())

img_dir = "/data/backup/pervinco_2020/datasets/multi_label_cls/images"

data_generator = ImageDataGenerator(preprocessing_function=preprocess_input)
train_generator = data_generator.flow_from_dataframe(dataframe = training_set,
                                                    directory=img_dir,
                                                    x_col="Filenames",
                                                    y_col="labels",
                                                    class_mode="categorical",
                                                    classes=['1850', '3211', '3715', '5203', '5601', '8584'],
                                                    target_size=(IMAGE_SIZE,IMAGE_SIZE),
                                                    batch_size=32)

cb_early_stopper = EarlyStopping(monitor='loss', patience=EARLY_STOP_PATIENCE)
cb_checkpointer = ModelCheckpoint(filepath=saved_path + dataset_name + '/' + time + '/' + weight_file_name,
                                  monitor='accuracy', save_best_only=True, mode='auto')

model = Sequential()
model.add(InceptionResNetV2(include_top=False, pooling='avg', weights='imagenet'))
model.add(Dense(6, activation='sigmoid'))
model.layers[0].trainable = True
model.summary()

optimizer = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
Esempio n. 19
0
                                     'checkpoint-{epoch}.h5')))

# Set up ImageDataGenerators to do data augmentation for the training images.
train_datagen = ImageDataGenerator(rotation_range=15,
                                   rescale=1. / 255,
                                   shear_range=0.1,
                                   zoom_range=0.2,
                                   horizontal_flip=True,
                                   width_shift_range=0.1,
                                   height_shift_range=0.1)
train_datagen.mean = [123.68, 116.779, 103.939]

train_generator = train_datagen.flow_from_dataframe(train_df,
                                                    DATA_PATH,
                                                    x_col='filename',
                                                    y_col='category',
                                                    target_size=IMAGE_SIZE,
                                                    class_mode='binary',
                                                    batch_size=BATCH_SIZE)

if hvd.rank() == 0:
    mlctx.logger.info('classes:', train_generator.class_indices)

validation_datagen = ImageDataGenerator(rescale=1. / 255)
validation_datagen.mean = [123.68, 116.779, 103.939]
validation_generator = validation_datagen.flow_from_dataframe(
    validate_df,
    DATA_PATH,
    x_col='filename',
    y_col='category',
    target_size=IMAGE_SIZE,
epochs = 500

# Image Generators

train_datagen = ImageDataGenerator(rescale=1. / 255.,
                                   rotation_range=20,
                                   width_shift_range=0.2,
                                   height_shift_range=0.2,
                                   horizontal_flip=True)

train_generator = train_datagen.flow_from_dataframe(dataframe=model_train_data,
                                                    x_col="filepath",
                                                    y_col="class",
                                                    batch_size=batch_size,
                                                    shuffle=True,
                                                    class_mode="categorical",
                                                    target_size=IMAGE_SIZE,
                                                    color_mode='grayscale',
                                                    validate_filenames=False)

valid_datagen = ImageDataGenerator(rescale=1. / 255.)

valid_generator = valid_datagen.flow_from_dataframe(dataframe=model_val_data,
                                                    x_col="filepath",
                                                    y_col="class",
                                                    batch_size=batch_size,
                                                    shuffle=True,
                                                    class_mode="categorical",
                                                    target_size=IMAGE_SIZE,
                                                    color_mode='grayscale',
Esempio n. 21
0
def generadores(etapa, architecture, datos, pipeline, label_active, iteracion,
                models_info):

    _, preprocess_input = get_model(architecture, iteracion, models_info,
                                    pipeline)

    datagen = ImageDataGenerator(preprocessing_function=preprocess_input,
                                 rotation_range=40,
                                 width_shift_range=0.1,
                                 height_shift_range=0.1,
                                 shear_range=0.01,
                                 zoom_range=[0.9, 1.25],
                                 horizontal_flip=True,
                                 vertical_flip=False,
                                 fill_mode='reflect',
                                 data_format='channels_last')

    if etapa == 'train':
        train_generator = datagen.flow_from_dataframe(
            dataframe=datos['df_train'],
            x_col=pipeline["x_col_name"],
            y_col=pipeline["y_col_name"],
            target_size=(pipeline['img_height'], pipeline['img_width']),
            class_mode='categorical',
            batch_size=pipeline["batch_size"],
            seed=42,
            shuffle=True)

    if etapa == 'train_EL':
        train_generator = datagen.flow_from_dataframe(
            dataframe=datos['df_train_EL'],
            x_col=pipeline["x_col_name"],
            y_col=pipeline["y_col_name"],
            target_size=(pipeline['img_height'], pipeline['img_width']),
            class_mode='categorical',
            batch_size=pipeline["batch_size"],
            seed=42,
            shuffle=True)

    test_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)

    test_generator = test_datagen.flow_from_dataframe(
        dataframe=datos['df_test'],
        x_col=pipeline["x_col_name"],
        y_col=pipeline["y_col_name"],
        batch_size=pipeline["batch_size"],
        seed=42,
        shuffle=False,
        class_mode="categorical",
        target_size=(pipeline['img_height'], pipeline['img_width']))

    STEP_SIZE_TEST = test_generator.n // test_generator.batch_size

    if label_active:
        batchset_datagen = ImageDataGenerator(
            preprocessing_function=preprocess_input)

        batchset_generator = batchset_datagen.flow_from_dataframe(
            dataframe=datos['df_batchset'],
            x_col=pipeline["x_col_name"],
            y_col=pipeline["y_col_name"],
            batch_size=pipeline["batch_size"],
            seed=42,
            shuffle=False,
            class_mode="categorical",
            target_size=(pipeline['img_height'], pipeline['img_width']))

        STEP_SIZE_BATCH = batchset_generator.n // batchset_generator.batch_size

        return train_generator, batchset_generator, STEP_SIZE_BATCH

    return train_generator, test_generator, STEP_SIZE_TEST
train_datagen = ImageDataGenerator(
    rescale=1./255.,
    validation_split=0.1,
    horizontal_flip=True,
    rotation_range=10,
    brightness_range=(0.1,0.5),
    zoom_range=0.2,
   
    )

train_ds = train_datagen.flow_from_dataframe(
    dataframe=train_df,
    directory="/content/drive/MyDrive/Dataset/D-attentive-AI-satellite-image-classification/merged_data/train/",
    x_col = "Filename",
    y_col = "Labels",
    subset = "training",
    batch_size = train_btz,
    shuffle = True,
    class_mode = "categorical",
    target_size = img_sz
)

val_ds = train_datagen.flow_from_dataframe(
    dataframe=train_df,
    directory="/content/drive/MyDrive/Dataset/D-attentive-AI-satellite-image-classification/merged_data/train/",
    x_col = "Filename",
    y_col = "Labels",
    subset = "validation",
    batch_size = val_btz,
    shuffle = False,
    class_mode = "categorical",
Esempio n. 23
0
class_weights_dict = {
    i: class_weights[i]
    for i, label in enumerate(classes_to_predict)
}

training_batch_size = 32
validation_batch_size = 32
target_size = (216, 216)

train_datagen = ImageDataGenerator(rescale=1. / 255)

train_generator = train_datagen.flow_from_dataframe(
    dataframe=training_df,
    x_col='song_sample',
    y_col='bird',
    directory='.',
    target_size=target_size,
    batch_size=training_batch_size,
    shuffle=True,
    class_mode='categorical')

validation_datagen = ImageDataGenerator(rescale=1. / 255)
validation_generator = validation_datagen.flow_from_dataframe(
    dataframe=validation_df,
    x_col='song_sample',
    y_col='bird',
    directory='.',
    target_size=target_size,
    shuffle=False,
    batch_size=validation_batch_size,
    class_mode='categorical')
Esempio n. 24
0
df_labels['label'] = df_labels['label'].astype(str)
train_df = df_labels
print(train_df.head())

#train_df = train_df.iloc[:160000]
#Splitting data into train, val_set and test_set
train_set, valid_set = train_test_split(train_df, test_size=0.2)
train_set, test_set = train_test_split(train_set, test_size=0.2)

Datagen = ImageDataGenerator(rescale=1. / 255,
                             horizontal_flip=True,
                             vertical_flip=True)
train_gen = Datagen.flow_from_dataframe(train_set,
                                        directory=None,
                                        x_col='Path',
                                        y_col='label',
                                        target_size=(96, 96),
                                        batch_size=128,
                                        class_mode='binary',
                                        shuffle=True)
valDatgen = ImageDataGenerator(rescale=1. / 255)
val_gen = valDatgen.flow_from_dataframe(valid_set,
                                        x_col='Path',
                                        y_col='label',
                                        target_size=(96, 96),
                                        batch_size=128,
                                        class_mode='binary',
                                        shuffle=False)
testDatgen = ImageDataGenerator(rescale=1. / 255)
test_gen = testDatgen.flow_from_dataframe(test_set,
                                          x_col='Path',
                                          y_col='label',
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

target_size = (360, 480)
batch_size = 16

from tensorflow.keras.applications import inception_resnet_v2

datagen = ImageDataGenerator()

train_datagen_x = datagen.flow_from_dataframe(dataframe=train_gen_df,
                                              x_col="mean_file_name",
                                              y_col=label_columns,
                                              class_mode="other",
                                              target_size=target_size,
                                              batch_size=batch_size,
                                              shuffle=True)
val_datagen_x = datagen.flow_from_dataframe(dataframe=val_gen_df,
                                            x_col="mean_file_name",
                                            y_col=label_columns,
                                            class_mode="other",
                                            target_size=target_size,
                                            batch_size=batch_size,
                                            shuffle=True)


def back_mean_gen(df, flip=True):
    while True:
        sample = df.sample(n=8)
            # "data/mergecropped1700image/image",
            "data/mergecropped256image/image",
            x_col='image',
            y_col='traveler',
            target_size=target_size,
            class_mode="raw",  # for regression
            batch_size=batch_size,
            seed=seed_value)

        valid_datagen = ImageDataGenerator(rescale=1. / 255)
        valid_datagenerator = valid_datagen.flow_from_dataframe(
            test,
            # "data/testimage/image",
            # "data/mergeimage/image",
            # "data/mergecropped1700image/image",
            "data/mergecropped256image/image",
            x_col='image',
            y_col='traveler',
            target_size=target_size,
            class_mode="raw",  # for regression
            batch_size=batch_size,
            seed=seed_value)

        inference_datagen = ImageDataGenerator(
            rotation_range=15,
            shear_range=0.2,
            horizontal_flip=True,
            vertical_flip=True,
            width_shift_range=0.1,
            height_shift_range=0.1,
            fill_mode='nearest',
            zca_whitening=True  # ZCA白色化 
Esempio n. 27
0
BATCH_SIZE = 64

# Get test subset
print('Loading image data...')
label_df = pd.read_csv(args["csv"])
test_df = label_df[label_df['subset'] == 'test']

# Preprocess test data
print('Preprocess test data...')
test_datagen = ImageDataGenerator(rescale=1. / 255,
                                  preprocessing_function=apply_grey)

test_generator = test_datagen.flow_from_dataframe(test_df,
                                                  directory=args["dataset"],
                                                  x_col='file_path',
                                                  y_col='label',
                                                  class_mode='categorical',
                                                  target_size=TARGET_SIZE,
                                                  shuffle=False,
                                                  batch_size=BATCH_SIZE)

# Evaluate model
print('Load model...')
model = load_model(MODELS_PATH + MODEL_NAME + '.h5')

print('Calculate test accuracy...')
start_time = time.time()
test_loss, test_acc = model.evaluate(test_generator,
                                     steps=test_generator.n // BATCH_SIZE,
                                     verbose=2)

print("Test accuracy:", test_acc)
Esempio n. 28
0
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split

DATA_DIR = 'D:/kaggle/chineseMNIST/'

IMAGE_SIZE = (64, 64)
train_df = pd.read_csv(DATA_DIR + 'chinese_mnist.csv')

# Prepping the Data
train_df['file'] = train_df.apply(lambda x: f'input_{x[0]}_{x[1]}_{x[2]}.jpg',
                                  axis=1)

train_df, test_df = train_test_split(train_df,
                                     test_size=0.2,
                                     stratify=train_df['character'].values)

train_df, val_df = train_test_split(train_df,
                                    test_size=0.1,
                                    stratify=train_df['character'].values)

train_generator = ImageDataGenerator(rescale=1. / 255,
                                     rotation_range=20,
                                     color_mode='grayscale')
test_generator = ImageDataGenerator(rescale=1. / 255)

#fix this up next time im using this
train_data = train_generator.flow_from_dataframe(x_col='file', y_col='value')
val_data = test_generator.flow_from_dataframe(x_col='file', y_col='value')
test_data = test_generator.flow_from_dataframe(x_col='file', y_col='value')
                                    width_shift_range=0.1,
                                    height_shift_range=0.1,
                                    brightness_range=(0.75, 1.25),
                                    horizontal_flip=True,
                                    vertical_flip=True,
                                    preprocessing_function=contrast_stretch,
                                    validation_split=val_split)

# https://stackoverflow.com/questions/42443936/keras-split-train-test-set-when-using-imagedatagenerator
# will not shuffle before split! Need to shuffle first
spiral_train_generator = spiral_datagen.flow_from_dataframe(
    df_train_images,
    x_col='path',
    y_col='label',
    subset="training",
    target_size=(img_height, img_width),
    color_mode="grayscale",
    batch_size=batch_size,
    class_mode="binary",
    shuffle=True,
    seed=42)
spiral_val_generator = spiral_datagen.flow_from_dataframe(
    df_train_images,
    x_col='path',
    y_col='label',
    subset="validation",
    target_size=(img_height, img_width),
    color_mode="grayscale",
    batch_size=batch_size,
    class_mode="binary",
    shuffle=True,
Esempio n. 30
0
def main():
    directory = 'img' # 画像が保存されているフォルダ
    df_train = pd.read_csv('train.csv') # 学習データの情報がかかれたDataFrame
    df_validation = pd.read_csv('val.csv') # 検証データの情報がかかれたDataFrame
    df_test = pd.read_csv('test.csv') # テストデータの情報がかかれたDataFrame
    label_list = ['AMD', 'DR_DM', 'Gla', 'MH', 'Normal', 'RD', 'RP', 'RVO'] # ラベル名
    image_size = (224, 224) # 入力画像サイズ
    classes = len(label_list) # 分類クラス数
    batch_size = 32 # バッチサイズ
    epochs = 300 # エポック数
    loss = 'categorical_crossentropy' # 損失関数
    optimizer = Adam(lr=0.00001, amsgrad=True) # 最適化関数
    metrics = 'accuracy' # 評価方法
    # ImageDataGenerator画像増幅のパラメータ
    aug_params = {'rotation_range': 5,
                  'width_shift_range': 0.05,
                  'height_shift_range': 0.05,
                  'shear_range': 0.1,
                  'zoom_range': 0.05,
                  'horizontal_flip': True,
                  'vertical_flip': True}


    # val_lossが最小になったときのみmodelを保存
    mc_cb = ModelCheckpoint('model_weights.h5',
                            monitor='val_loss', verbose=1,
                            save_best_only=True, mode='min')
    # 学習が停滞したとき、学習率を0.2倍に
    rl_cb = ReduceLROnPlateau(monitor='loss', factor=0.2, patience=3,
                              verbose=1, mode='auto',
                              min_delta=0.0001, cooldown=0, min_lr=0)
    # 学習が進まなくなったら、強制的に学習終了
    es_cb = EarlyStopping(monitor='loss', min_delta=0,
                          patience=5, verbose=1, mode='auto')


    # データの数に合わせて損失の重みを調整
    weight_balanced = {}
    for i, label in enumerate(label_list):
        weight_balanced[i] = (df_train['label'] == label).sum()
    max_count = max(weight_balanced.values())
    for label in weight_balanced:
        weight_balanced[label] = max_count / weight_balanced[label]
    print(weight_balanced)


    # ジェネレータの生成
    ## 学習データのジェネレータ
    datagen = ImageDataGenerator(rescale=1./255, **aug_params)
    train_generator = datagen.flow_from_dataframe(
        dataframe=df_train, directory=directory,
        x_col='filename', y_col='label',
        target_size=image_size, class_mode='categorical',
        classes=label_list,
        batch_size=batch_size)
    step_size_train = train_generator.n // train_generator.batch_size
    ## 検証データのジェネレータ
    datagen = ImageDataGenerator(rescale=1./255)
    validation_generator = datagen.flow_from_dataframe(
        dataframe=df_validation, directory=directory,
        x_col='filename', y_col='label',
        target_size=image_size, class_mode='categorical',
        classes=label_list,
        batch_size=batch_size)
    step_size_validation = validation_generator.n // validation_generator.batch_size


    # ネットワーク構築
    base_model = InceptionV3(include_top=False, weights='imagenet', pooling='avg',
                       input_shape=(image_size[0], image_size[1], 3))
    x = Dense(256, kernel_initializer='he_normal')(base_model.output)
    x = Dense(classes, kernel_initializer='he_normal')(x)
    outputs = Activation('softmax')(x)
    model = Model(inputs=base_model.inputs, outputs=outputs)

    model.summary()
    model.compile(loss=loss, optimizer=optimizer, metrics=[metrics])


    # 学習
    history = model.fit_generator(
        train_generator, steps_per_epoch=step_size_train,
        epochs=epochs, verbose=1, callbacks=[mc_cb, rl_cb, es_cb],
        validation_data=validation_generator,
        validation_steps=step_size_validation,
        class_weight=weight_balanced,
        workers=3)

    # 学習曲線の保存
    plot_history(history)


    # テストデータの評価
    ## 学習済み重みの読み込み
    model.load_weights('model_weights.h5')

    ## 推論
    X = df_test['filename'].values
    y_true = list(map(lambda x: label_list.index(x), df_test['label'].values))
    y_pred = []
    for file in tqdm(X, desc='pred'):
        # 学習時と同じ条件になるように画像をリサイズ&変換
        img = Image.open(f'{directory}/{file}')
        img = img.resize(image_size)
        img = np.array(img, dtype=np.float32)
        img *= 1./255
        img = np.expand_dims(img, axis=0)

        y_pred.append(np.argmax(model.predict(img)[0]))

    ## 評価
    print(classification_report(y_true, y_pred, target_names=label_list))