def main(argv): # load input img_df = pd.read_pickle( os.path.join(FLAGS.input_directory, FLAGS.file_list)) train_df, validation_df = train_test_split(img_df, test_size=0.2, random_state=19, shuffle=True, stratify=img_df["label"]) logging.info(f"Training {img_df.count()} faces") # Prepare data generators train_datagen = ImageDataGenerator( rotation_range=20, zoom_range=0.2, width_shift_range=0.2, height_shift_range=0.2, shear_range=0.2, horizontal_flip=True, preprocessing_function=preprocess_input, fill_mode="nearest", ) validation_datagen = ImageDataGenerator( preprocessing_function=preprocess_input) train_generator = train_datagen.flow_from_dataframe( dataframe=train_df, directory=FLAGS.input_directory, x_col="filename", y_col="label", class_mode="categorical", batch_size=FLAGS.batch_size, target_size=MASK_INPUT_IMAGE_SHAPE[:-1], ) validation_generator = validation_datagen.flow_from_dataframe( dataframe=validation_df, directory=FLAGS.input_directory, y_col="label", class_mode="categorical", batch_size=FLAGS.batch_size, target_size=MASK_INPUT_IMAGE_SHAPE[:-1], ) # create a model maskNet = create_mask_detector_mobilenet(MASK_INPUT_IMAGE_SHAPE) # checkpoints checkpoint_path = "checkpoints/weights.{epoch:02d}-{val_loss:.4f}.hdf5" checkpoint_cb = keras.callbacks.ModelCheckpoint( filepath=checkpoint_path, save_best_only=False, monitor="val_loss", mode="min", save_weights_only=True, verbose=1, save_freq="epoch", ) early_stopping_cb = keras.callbacks.EarlyStopping( patience=5, restore_best_weights=True) # tensorboard callback root_logdir = os.path.join(os.curdir, "mylogs") run_logdir = get_run_logdir(root_logdir) tensorboard_cb = keras.callbacks.TensorBoard(run_logdir) STEPS_PER_EPOCH = len(train_df) // FLAGS.batch_size STEPS_PER_EPOCH_VAL = len(validation_df) // FLAGS.batch_size # compile maskNet.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"]) # train! result = maskNet.fit( train_generator, steps_per_epoch=STEPS_PER_EPOCH, validation_data=validation_generator, validation_steps=STEPS_PER_EPOCH_VAL, epochs=FLAGS.epoch, callbacks=[early_stopping_cb, checkpoint_cb, tensorboard_cb], ) maskNet.save(FLAGS.output_file)
model.add(Dense(2, activation = 'softmax')) model.compile(loss = 'binary_crossentropy', optimizer = 'adam', metrics = ['accuracy']) model.summary() df["category"] = df["category"].replace({0:'cat', 1:'dog'}) train_df, validate_df = train_test_split(df, test_size = 0.2, random_state = 42) train_df = train_df.reset_index(drop = True) validate_df = validate_df.reset_index(drop = True) total_train = train_df.shape[0] total_validate = validate_df.shape[0] batch_size = 15 train_datagen = ImageDataGenerator(rotation_range = 15, rescale = 1./255, shear_range = 0.1, zoom_range = 0.2, horizontal_flip = True, width_shift_range = 0.1, height_shift_range = 0.1) train_generator = train_datagen.flow_from_dataframe(train_df, dataset_path, x_col = 'filename', y_col = 'category', target_size = Image_Size, class_mode = 'categorical', batch_size = batch_size) validation_datagen = ImageDataGenerator(rescale = 1./255) validation_generator = validation_datagen.flow_from_dataframe(validate_df, dataset_path, x_col = 'filename', y_col = 'category', target_size = Image_Size, class_mode = 'categorical', batch_size = batch_size) checkpoint = ModelCheckpoint(filepath = "./1/weights.h5", monitor = "val_loss", verbose = 1, save_best_only = True, mode = "min") callbacks_list = [checkpoint, reduce_lr, earlystop] epochs = 150 history = model.fit(train_generator, epochs = epochs, validation_data = validation_generator,
def buildDataGeneretor(self, mainClassInstructionsDF, target_img_shape=(256, 256, 3), batch_size=64, pathColName='ImgPath_Absolute'): # Receivce a df file containing all mainClassInstructionsDF # Location col name: ImgPath_Absolute testPath = mainClassInstructionsDF[pathColName].values[0] if not os.path.exists(testPath): print("Could not locate sample image from mainClassInstructionsDF") print("Img path: %s" % (testPath)) start_time = time.time() dataset_train, dataset_val, dataset_test = splitDataSet_train_val_test(dataFrame=mainClassInstructionsDF, val_percent=20, test_percent=10) target_size = target_img_shape[0:2] # self.y_test = dataset_test.filter(items=self.classDictionary.values()).values self.X_test = dataset_test.filter(items=[pathColName]).values print("Build data generator") # https://vijayabhaskar96.medium.com/multi-label-image-classification-tutorial-with-keras-imagedatagenerator-cd541f8eaf24 datagen = ImageDataGenerator(rescale=1. / 255.) test_datagen = ImageDataGenerator(rescale=1. / 255.) train_generator = datagen.flow_from_dataframe( dataframe=dataset_train, directory=None, x_col=pathColName, class_mode="raw", y_col=list(self.classDictionary.values()), batch_size=batch_size, seed=42, shuffle=True, target_size=target_size) valid_generator = test_datagen.flow_from_dataframe( dataframe=dataset_val, directory=None, # app.config['BASE_FOLDER'], x_col=pathColName, class_mode="raw", y_col=list(self.classDictionary.values()), batch_size=batch_size, seed=42, shuffle=True, target_size=target_size) test_generator = test_datagen.flow_from_dataframe( dataframe=dataset_test, directory=None, # app.config['BASE_FOLDER'], x_col=pathColName, class_mode="raw", y_col=list(self.classDictionary.values()), batch_size=1, seed=42, shuffle=False, target_size=target_size) self.test_generator = test_generator self.valid_generator = valid_generator self.train_generator = train_generator self.y_test = self.test_generator.labels def generator_wrapper(generator): for batch_x, batch_y in generator: yield (batch_x, [batch_y[:, i] for i in range(5)]) self.STEP_SIZE_TRAIN = self.test_generator.n // self.test_generator.batch_size self.STEP_SIZE_VALID = self.valid_generator.n // self.valid_generator.batch_size self.STEP_SIZE_TEST = self.train_generator.n # // self.train_generator.batch_size self.DataGenerator = 1 print("test_generator: %s records. Shape: %s" % (self.test_generator.n, str(self.y_test.shape))) print("valid_generator: %s records" % (self.valid_generator.n)) print("train_generator: %s records" % (self.train_generator.n))
# get the images with open(classification_file) as f: dic = json.load(f) dataframe = pd.DataFrame(dic.items()) dataframe.rename(columns = {0:'filename', 1:'class'}, inplace = True) dataframe["class"] = dataframe["class"].astype(str) dataframe = dataframe.sample(frac=1) train_gen = train_data_gen.flow_from_dataframe(dataframe, train_dir, batch_size=bs, target_size=(img_h, img_w), class_mode='categorical', shuffle=True, seed=SEED, subset='training') valid_gen = valid_data_gen.flow_from_dataframe(dataframe, train_dir, batch_size=bs, target_size=(img_h, img_w), class_mode='categorical', shuffle=True, seed=SEED, subset='validation') test_gen = test_data_gen.flow_from_directory(dataset_dir, classes=['test'], batch_size=bs, class_mode=None,shuffle=False)
def load(name, dataset_dir, BATCH_SIZE, tfrecord): if name == "idrid": logging.info(f"Preparing dataset {name}...") # use tfrecord file if tfrecord: train_dataset = load_dataset( os.path.join(dataset_dir, 'train_image.tfrecords')) valid_dataset = load_dataset( os.path.join(dataset_dir, 'valid_image.tfrecords')) test_dataset = load_dataset( os.path.join(dataset_dir, 'test_image.tfrecords')) train_dataset = train_dataset.map(lambda x, y: (tf_preprocess(x), y), num_parallel_calls=AUTOTUNE) '''# Visualized image from training data set image, label = next(iter(train_dataset)) plt.imshow(tf.cast(image, tf.int64)) plt.axis('off') plt.show()''' train_dataset = aug_and_prepare(train_dataset, BATCH_SIZE, shuffle=True, augment=True) valid_dataset = valid_dataset.map(lambda x, y: (tf_preprocess(x), y), num_parallel_calls=AUTOTUNE) valid_dataset = aug_and_prepare(valid_dataset, BATCH_SIZE) test_dataset = test_dataset.map(lambda x, y: (tf_preprocess(x), y), num_parallel_calls=AUTOTUNE) test_dataset = aug_and_prepare(test_dataset, BATCH_SIZE) return train_dataset, valid_dataset, test_dataset else: # use csv dataset train_df = pd.read_csv(os.path.join(dataset_dir, 'train_data.csv')) train_df['Image name'] = train_df['Image name'] + ".jpg" test_df = pd.read_csv(os.path.join(dataset_dir, 'test_data.csv')) test_df['Image name'] = test_df['Image name'] + ".jpg" train_datagen = ImageDataGenerator( rotation_range=360, horizontal_flip=True, vertical_flip=True, validation_split=0.15, preprocessing_function=preprocess, rescale=1 / 255.) train_generator = train_datagen.flow_from_dataframe( train_df, x_col='Image name', y_col='Retinopathy grade', directory=os.path.join(dataset_dir, 'train'), target_size=(256, 256), batch_size=BATCH_SIZE, class_mode='raw', subset='training') val_generator = train_datagen.flow_from_dataframe( train_df, x_col='Image name', y_col='Retinopathy grade', directory=os.path.join(dataset_dir, 'train'), target_size=(256, 256), batch_size=BATCH_SIZE, class_mode='raw', shuffle=False, subset='validation') test_generator = ImageDataGenerator( preprocessing_function=preprocess, rescale=1 / 255.).flow_from_dataframe( test_df, x_col='Image name', y_col='Retinopathy grade', directory=os.path.join(dataset_dir, 'test'), target_size=(256, 256), batch_size=BATCH_SIZE, class_mode='raw', shuffle=False) return train_generator, val_generator, test_generator
sns.countplot(train["label"]) plt.show() print(train.label.value_counts()) img_size = 128 data = ImageDataGenerator(validation_split=0.2) train_data = data.flow_from_dataframe( dataframe=train, directory='train_images', x_col='image_id', y_col='label', target_size=(img_size, img_size), batch_size=32, subset='training', shuffle = True, class_mode='categorical' ) valid_data = data.flow_from_dataframe( dataframe=train, directory='train_images', x_col='image_id', y_col='label', target_size=(img_size, img_size), batch_size=32, subset='validation', class_mode = 'categorical',
# Data generators train_df = pandas.read_csv(train_path) validate_df = pandas.read_csv(validate_path) train_datagen = ImageDataGenerator(rescale=1. / 255, horizontal_flip=True, vertical_flip=True) val_datagen = ImageDataGenerator(rescale=1. / 255, ) train_generator = train_datagen.flow_from_dataframe(dataframe=train_df, directory=image_dir, x_col="filename", y_col='label', target_size=(image_height, image_width), batch_size=batch_size, shuffle=True, class_mode="raw", color_mode="rgb") val_generator = val_datagen.flow_from_dataframe(dataframe=validate_df, directory=image_dir, x_col="filename", y_col='label', target_size=(image_height, image_width), batch_size=batch_size, shuffle=True, class_mode="raw", color_mode="rgb")
def generator_splitter_multi(model_name, train, test, imagepath): """ function uses the `ImageDataGenerator` class # load our dataset as an iterator (not keeping it all in memory at once). :param model_name: model name :param train: data set :param test: data set :param imagepath: path to the image folder :return: data split for train val and test """ # Train Set # tf.config.list_physical_devices() if model_name =='vgg19': preprocessing= preprocess_input_VGG19 elif model_name == 'MobileNetV2': preprocessing = preprocess_input_MNV2 elif model_name == 'vgg16': preprocessing = preprocess_input_VGG16 elif model_name == 'ResNet50': preprocessing = Preprocess_RESNET50 else: preprocessing = None train['label'] = train['label'].astype(str) img_gen = ImageDataGenerator(validation_split=0.2) train_data = img_gen.flow_from_dataframe(train, directory=imagepath, x_col='files', y_col='label', featurewise_std_normalization=True, preprocessing_function=preprocessing, class_mode='categorical', batch_size=64, target_size=(224, 224), subset='training') # Validation Set valid_data = img_gen.flow_from_dataframe(train, directory=imagepath, x_col='files', y_col='label', featurewise_std_normalization=True, preprocessing_function=preprocessing, class_mode='categorical', batch_size=64, target_size=(224, 224), subset='validation') # Test Set img_gen_test = ImageDataGenerator() test_data = img_gen_test.flow_from_dataframe(test, directory=imagepath, x_col='files', y_col='label', featurewise_std_normalization=True, preprocessing_function=preprocessing, class_mode=None, target_size=(224, 224), batch_size=64, shuffle=False) return train_data, valid_data, test_data
# subset=None, # interpolation="nearest", # validate_filenames=True, # **kwargs # ) train_df = pd.read_csv("E:\\ML Training Data\\" + "train.csv", index_col=None, header=0) train_generator = train_datagen.flow_from_dataframe( dataframe=train_df, directory=None, # Set to None because the absolute path is provided x_col='Full Path', y_col='Common name', shuffle=True, seed=42, target_size=(224, 224), batch_size=batch_size, class_mode='categorical', save_to_dir=None # "E:\\tmp" ) # # images, labels = next(train_generator) # print(images.dtype, images.shape) # print(labels.dtype, labels.shape) # ds = tf.data.Dataset.from_generator( # train_generator, # output_types=(tf.float32, tf.float32), # output_shapes=([16, 224, 224, 3], [16, 44])
def create_dataset(config, val_split=0.2): log.info("Loading dataset...") num_skipped = 0 for folder_name in ("Cat", "Dog"): folder_path = os.path.join("PetImages", folder_name) for fname in os.listdir(folder_path): fpath = os.path.join(folder_path, fname) try: fobj = open(fpath, "rb") is_jfif = tf.compat.as_bytes("JFIF") in fobj.peek(10) finally: fobj.close() if not is_jfif: num_skipped += 1 # Delete corrupted image os.remove(fpath) print("Deleted %d images" % num_skipped) image_size = (180, 180) batch_size = 32 if config.crossvalidation: log.warning( "Crossvalidation is used, but not implemented in this way, turn around now!" ) kf = KFold(n_splits=5) filenames = [] labels = [] for file in os.listdir('PetImages/Cat'): filenames.append(os.path.join('Cat', file)) labels.append('Cat') for file in os.listdir('PetImages/Dog'): filenames.append(os.path.join('Dog', file)) labels.append('Dog') d = {'filename': filenames, 'label': labels} alldata = pd.DataFrame(d) alldata = alldata.sample(frac=1).reset_index(drop=True) Y = alldata[['label']] #optional augmentation applied through idg: if config.augmentation: idg = ImageDataGenerator(width_shift_range=0.0, height_shift_range=0.0, zoom_range=0.0, rotation_range=36, fill_mode='nearest', horizontal_flip=True, rescale=None) else: idg = ImageDataGenerator(width_shift_range=0.0, height_shift_range=0.0, zoom_range=0.0, fill_mode='nearest', horizontal_flip=True, rescale=None) val_idg = keras.preprocessing.image.ImageDataGenerator() for train_index, val_index in kf.split(np.zeros(len(Y)), Y): training_data = alldata.iloc[train_index] validation_data = alldata.iloc[val_index] train_ds = idg.flow_from_dataframe(training_data, target_size=(180, 180), directory='PetImages', x_col="filename", y_col="label", class_mode="categorical", shuffle=True) val_ds = val_idg.flow_from_dataframe(validation_data, target_size=(180, 180), directory='PetImages', x_col="filename", y_col="label", class_mode="categorical", shuffle=True) break return (train_ds, val_ds)
def load_dataset_generators(self): # Create training generator and augment training data if self.augment_data is True: train_datagen = ImageDataGenerator(rescale=1. / 255, rotation_range=90, width_shift_range=0.4, height_shift_range=0.4, shear_range=0.4, zoom_range=0.4, horizontal_flip=True, fill_mode='nearest') else: train_datagen = ImageDataGenerator(rescale=1. / 255) # Don't augment data in the validation generator validation_datagen = ImageDataGenerator(rescale=1. / 255) if self.dataset_mode == 'directory': # Training generator self.train_generator = train_datagen.flow_from_directory( self.train_path, target_size=(self.height, self.width), batch_size=self.batch_size, class_mode=self.class_mode, shuffle=True) # Validation generator self.validation_generator = validation_datagen.flow_from_directory( self.valid_path, target_size=(self.height, self.width), batch_size=self.batch_size, class_mode=self.class_mode, shuffle=True) # Set number of classes num_classes_train = self.train_generator.num_classes num_classes_valid = self.validation_generator.num_classes elif self.dataset_mode == 'dataframe': # Training generator self.train_generator = train_datagen.flow_from_dataframe( dataframe=pd.read_csv(self.train_path).astype('str'), directory=self.dataset_root, x_col=self.xcol_name, y_col=self.ycol_name, target_size=(self.height, self.width), batch_size=self.batch_size, class_mode=self.class_mode, shuffle=self.shuffle_generator) # Validation generator self.validation_generator = validation_datagen.flow_from_dataframe( dataframe=pd.read_csv(self.valid_path).astype('str'), directory=self.dataset_root, x_col=self.xcol_name, y_col=self.ycol_name, target_size=(self.height, self.width), batch_size=self.batch_size, class_mode=self.class_mode, shuffle=self.shuffle_generator) # Set number of classes num_classes_train = len(self.train_generator.class_indices) num_classes_valid = len(self.validation_generator.class_indices) # Set number of samples self.num_train_samples = self.train_generator.samples self.num_valid_samples = self.validation_generator.samples # Check if number of training classes == number of validation classes assert num_classes_train == num_classes_valid, "number of classes in training and validation sets do not match" # Set class-level number of classes self.num_classes = num_classes_train if self.class_weights == 'balanced': self.class_weights = class_weight.compute_class_weight( 'balanced', np.unique(self.train_generator.classes), self.train_generator.classes) else: self.class_weights = None
#Download dataset form https://drive.google.com/file/d/1jwa16s2nZIQywKMdRkpRvdDifxGDxC3I/view?usp=sharing dataframe = pd.read_csv('fried_noodles_dataset.csv', delimiter=',', header=0) dataframe["norm_meat"] = dataframe["meat"] / 300 dataframe["norm_veggie"] = dataframe["veggie"] / 300 dataframe["norm_noodle"] = dataframe["noodle"] / 300 #https://keras.io/api/preprocessing/image/ #https://machinelearningmastery.com/how-to-configure-image-data-augmentation-when-training-deep-learning-neural-networks/ datagen = ImageDataGenerator(rescale=1. / 3) train_generator = datagen.flow_from_dataframe( dataframe=dataframe.loc[0:1599], directory='images', x_col='filename', y_col=['norm_meat', 'norm_veggie', 'norm_noodle'], shuffle=True, target_size=IMAGE_SIZE, batch_size=BATCH_SIZE, class_mode='other') validation_generator = datagen.flow_from_dataframe( dataframe=dataframe.loc[1600:1699], directory='images', x_col='filename', y_col=['norm_meat', 'norm_veggie', 'norm_noodle'], shuffle=False, target_size=IMAGE_SIZE, batch_size=BATCH_SIZE, class_mode='other')
# Compile each model model[j].compile(optimizer=Adam(lr=LR), loss='binary_crossentropy', metrics=['acc']) # All images will be rescaled by 1./255 train_validate_datagen = ImageDataGenerator(rescale=1/255, validation_split=SPLIT) # set validation split test_datagen = ImageDataGenerator(rescale=1/255) data_chunks = ensemble_data(cnn_networks, IMAGES_PATH) for j in range(cnn_networks): print('Net : {}'.format(j+1)) df_train = data_chunks[j].iloc[:-60] df_test = data_chunks[j].iloc[-60:] train_generator = train_validate_datagen.flow_from_dataframe( dataframe=df_train, directory=IMAGES_PATH, target_size=(255, 255), x_col='Images', y_col='Labels', batch_size=32, class_mode='binary', subset='training') validation_generator = train_validate_datagen.flow_from_dataframe( dataframe=df_train, directory=IMAGES_PATH, target_size=(255, 255), x_col='Images', y_col='Labels', batch_size=32, class_mode='binary', subset='validation')
def main(args): print('[INFO] Starting clustering...') # Setup weights and biases setup_wandb(args) # Setup the pre-trained encoder from autoencoder.py. encoder = load_model(args.model) model = PretrainedDeepClusteringModel(backbone=encoder, n_clusters=args.n_clusters) optimizer = Adam(learning_rate=args.learning_rate, beta_1=args.beta1, beta_2=args.beta2) model.compile(optimizer=optimizer, loss='kld') encoder_weights, encoder_biases = encoder.layers[1].get_weights() model_weights, model_biases = model.backbone.layers[1].get_weights() print("[INFO] Checking weights and biases equality before running...") print("[INFO] Weights ", np.sum(encoder_weights - model_weights)) print("[INFO] Biases ", np.sum(encoder_biases - model_biases)) # Load the images into memory. Right now # I am not supporting loading from disk. train, dev, test = load_dataframes(args.base_dir, args.min_samples) # Use an image data generator to save memory. augs = dict(preprocessing_function=normalize, ) gen = ImageDataGenerator(**augs) train_flow = gen.flow_from_dataframe( dataframe=train, directory=os.path.join(args.base_dir, 'train'), batch_size=args.batch_size, target_size=(args.pixels, args.pixels), shuffle=True, x_col='file', class_mode=None) # Setup a generator for dev dev_flow = gen.flow_from_dataframe(dataframe=dev, directory=os.path.join( args.base_dir, 'dev'), batch_size=args.batch_size, target_size=(args.pixels, args.pixels), shuffle=False, x_col='file', class_mode=None) test_flow = gen.flow_from_dataframe(dataframe=test, directory=os.path.join( args.base_dir, 'test'), batch_size=args.batch_size, target_size=(args.pixels, args.pixels), shuffle=False, x_col='file', class_mode=None) print('[INFO] Starting initialization of clusters') model.initialize_clusters_generator( train_flow, epochs=1, steps_per_epoch=int(np.ceil(len(train) / args.batch_size))) print('[INFO] Fitting autoencoder...') for layer in encoder.layers: layer.trainable = True # ----------------- # Train here # ----------------- loss = np.inf for ite in range(int(args.total_batches)): batch = next(train_flow) while len(batch) != args.batch_size: batch = next(train_flow) q = model.predict(batch, verbose=0) p = clustering_target_distribution(q) for _ in range(args.repeat_batch): encoder_weights, encoder_biases = encoder.layers[1].get_weights() model_weights, model_biases = model.backbone.layers[1].get_weights( ) print("[INFO] Checking weights and biases equality...") print("[INFO] Weights ", np.sum(encoder_weights - model_weights)) print("[INFO] Biases ", np.sum(encoder_biases - model_biases)) sub_batches = int(np.ceil(args.batch_size / 32)) for i in range(sub_batches): loss = model.train_on_batch(x=batch[i * 32:(i + 1) * 32], y=p[i * 32:(i + 1) * 32]) wandb.log({'kld_loss': loss}) # Fit the sucker batches = int(np.ceil(len(train) / args.batch_size)) dev_batches = int(np.ceil(len(dev) / args.batch_size)) # This scaler is used to normalize before # doing clustering. The online run is done # on the training data to collect statistics. print('[INFO] Fitting the scaler.') scaler = StandardScaler() for batch in range(batches): x_batch = next(train_flow) scaler.partial_fit(encoder.predict(x_batch)) label_encoder = LabelEncoder() train['encoded_label'] = label_encoder.fit_transform(train['label']) dev['encoded_label'] = label_encoder.transform(dev['label']) test['encoded_label'] = label_encoder.transform(test['label']) kmeans = MiniBatchKMeans(n_clusters=train['label'].nunique()) batches = int(np.ceil(len(train) / args.batch_size)) for i in range(batches): kmeans.partial_fit(encoder.predict(next(train_flow))) dev_clusters = [] test_clusters = [] batches = int(np.ceil(len(dev) / args.batch_size)) for i in range(batches): dev_clusters.extend(kmeans.predict(encoder.predict(next(dev_flow)))) batches = int(np.ceil(len(test) / args.batch_size)) for i in range(batches): test_clusters.extend(kmeans.predict(encoder.predict(next(test_flow)))) dev_clusters = np.array(dev_clusters) test_clusters = np.array(test_clusters) accuracy = hungarian_accuracy(dev['encoded_label'], dev_clusters) balanced_accuracy = hungarian_balanced_accuracy(dev['encoded_label'], dev_clusters) wandb.log({ "dev_accuracy": accuracy, "dev_balanced_accuracy": balanced_accuracy }) accuracy = hungarian_accuracy(test['encoded_label'], test_clusters) balanced_accuracy = hungarian_balanced_accuracy(test['encoded_label'], test_clusters) wandb.log({ "test_accuracy": accuracy, "test_balanced_accuracy": balanced_accuracy }) x_batch = next(dev_flow) encoder.save("encoder.dec.{}.hdf5".format(wandb.run.id)) print('[INFO] Finished!')
def train_model(cfg, data, callbacks, verbose=1): ''' Train a and evaluate model on given data. :param cfg: Project config (from config.yml) :param data: dict of partitioned dataset :param callbacks: list of callbacks for Keras model :param verbose: Verbosity mode to pass to model.fit_generator() :return: Trained model and associated performance metrics on the test set ''' # If set in config file, oversample the minority class if cfg['TRAIN']['IMB_STRATEGY'] == 'random_oversample': data['TRAIN'] = random_minority_oversample(data['TRAIN']) # Create ImageDataGenerators train_img_gen = ImageDataGenerator(rotation_range=10, preprocessing_function=remove_text, samplewise_std_normalization=True, samplewise_center=True) val_img_gen = ImageDataGenerator(preprocessing_function=remove_text, samplewise_std_normalization=True, samplewise_center=True) test_img_gen = ImageDataGenerator(preprocessing_function=remove_text, samplewise_std_normalization=True, samplewise_center=True) # Create DataFrameIterators img_shape = tuple(cfg['DATA']['IMG_DIM']) y_col = 'label_str' class_mode = 'categorical' train_generator = train_img_gen.flow_from_dataframe(dataframe=data['TRAIN'], directory=cfg['PATHS']['RAW_DATA'], x_col="filename", y_col=y_col, target_size=img_shape, batch_size=cfg['TRAIN']['BATCH_SIZE'], class_mode=class_mode, validate_filenames=False) val_generator = val_img_gen.flow_from_dataframe(dataframe=data['VAL'], directory=cfg['PATHS']['RAW_DATA'], x_col="filename", y_col=y_col, target_size=img_shape, batch_size=cfg['TRAIN']['BATCH_SIZE'], class_mode=class_mode, validate_filenames=False) test_generator = test_img_gen.flow_from_dataframe(dataframe=data['TEST'], directory=cfg['PATHS']['RAW_DATA'], x_col="filename", y_col=y_col, target_size=img_shape, batch_size=cfg['TRAIN']['BATCH_SIZE'], class_mode=class_mode, validate_filenames=False, shuffle=False) # Save model's ordering of class indices dill.dump(test_generator.class_indices, open(cfg['PATHS']['OUTPUT_CLASS_INDICES'], 'wb')) # Apply class imbalance strategy. We have many more X-rays negative for COVID-19 than positive. histogram = np.bincount(np.array(train_generator.labels).astype(int)) # Get class distribution class_weight = None if cfg['TRAIN']['IMB_STRATEGY'] == 'class_weight': class_multiplier = cfg['TRAIN']['CLASS_MULTIPLIER'] class_multiplier = [class_multiplier[cfg['DATA']['CLASSES'].index(c)] for c in test_generator.class_indices] class_weight = get_class_weights(histogram, class_multiplier) # Define metrics. covid_class_idx = test_generator.class_indices['COVID-19'] # Get index of COVID-19 class thresholds = 1.0 / len(cfg['DATA']['CLASSES']) # Binary classification threshold for a class metrics = ['accuracy', CategoricalAccuracy(name='accuracy'), Precision(name='precision', thresholds=thresholds, class_id=covid_class_idx), Recall(name='recall', thresholds=thresholds, class_id=covid_class_idx), AUC(name='auc'), F1Score(name='f1score', thresholds=thresholds, class_id=covid_class_idx)] # Define the model. print('Training distribution: ', ['Class ' + list(test_generator.class_indices.keys())[i] + ': ' + str(histogram[i]) + '. ' for i in range(len(histogram))]) input_shape = cfg['DATA']['IMG_DIM'] + [3] num_gpus = cfg['TRAIN']['NUM_GPUS'] if cfg['TRAIN']['MODEL_DEF'] == 'dcnn_resnet': model_def = dcnn_resnet elif cfg['TRAIN']['MODEL_DEF'] == 'resnet50v2': model_def = resnet50v2 else: model_def = resnet101v2 if cfg['TRAIN']['CLASS_MODE'] == 'binary': histogram = np.bincount(data['TRAIN']['label'].astype(int)) output_bias = np.log([histogram[i] / (np.sum(histogram) - histogram[i]) for i in range(histogram.shape[0])]) model = model_def(cfg['NN']['DCNN_BINARY'], input_shape, metrics, 2, output_bias=output_bias, gpus=num_gpus) else: n_classes = len(cfg['DATA']['CLASSES']) histogram = np.bincount(data['TRAIN']['label'].astype(int)) output_bias = np.log([histogram[i] / (np.sum(histogram) - histogram[i]) for i in range(histogram.shape[0])]) model = model_def(cfg['NN']['DCNN_MULTICLASS'], input_shape, metrics, n_classes, output_bias=output_bias, gpus=num_gpus) # Train the model. steps_per_epoch = ceil(train_generator.n / train_generator.batch_size) val_steps = ceil(val_generator.n / val_generator.batch_size) history = model.fit_generator(train_generator, steps_per_epoch=steps_per_epoch, epochs=cfg['TRAIN']['EPOCHS'], validation_data=val_generator, validation_steps=val_steps, callbacks=callbacks, verbose=verbose, class_weight=class_weight) # Run the model on the test set and print the resulting performance metrics. test_results = model.evaluate_generator(test_generator, verbose=1) test_metrics = {} test_summary_str = [['**Metric**', '**Value**']] for metric, value in zip(model.metrics_names, test_results): test_metrics[metric] = value print(metric, ' = ', value) test_summary_str.append([metric, str(value)]) return model, test_metrics, test_generator
model = keras.models.load_model('../working/model_step3.hdf5') model.load_weights('best_model.hdf5') scores = model.evaluate_generator(test_generator, verbose=1) print("Accuracy: %.2f%%" % (scores[1] * 100)) # # Предсказание на тестовых данных from sklearn.metrics import accuracy_score test_sub_generator = test_datagen.flow_from_dataframe( dataframe=sample_submission, directory=DATA_PATH + 'test_upload/', x_col="Id", y_col=None, shuffle=False, class_mode=None, seed=RANDOM_SEED, target_size=(IMG_SIZE, IMG_SIZE), batch_size=BATCH_SIZE, ) test_sub_generator.reset() predictions = model.predict_generator(test_sub_generator, steps=len(test_sub_generator), verbose=1) predictions = np.argmax(predictions, axis=-1) #multiple categories label_map = (train_generator.class_indices) label_map = dict((v, k) for k, v in label_map.items()) #flip k,v predictions = [label_map[k] for k in predictions]
image_size = 380 # Get Labels label_cols = df_train.columns.tolist() label_cols.remove("StudyInstanceUID") label_cols.remove("PatientID") # Get Test Dataset Generator test_datagen = ImageDataGenerator() test_generator = test_datagen.flow_from_dataframe( dataframe=df_test, directory=comp_dir + "test", # Change this x_col="StudyInstanceUID", batch_size=1, seed=42, shuffle=False, color_mode="rgb", class_mode=None, target_size=(image_size, image_size), interpolation="bilinear") STEP_SIZE_TEST = test_generator.n // test_generator.batch_size # Load model from H5 Model model = load_model("../input/ranzcr-clip-big-models/big_model.h5") # Predict pred = model.predict(test_generator, steps=STEP_SIZE_TEST, verbose=1) # Create Submission df
except RuntimeError as e: print(e) training_set = pd.read_csv('/data/backup/pervinco_2020/datasets/custom_miml.csv') training_set["labels"] = training_set["labels"].apply(lambda x: x.split(",")) print(training_set.head()) img_dir = "/data/backup/pervinco_2020/datasets/multi_label_cls/images" data_generator = ImageDataGenerator(preprocessing_function=preprocess_input) train_generator = data_generator.flow_from_dataframe(dataframe = training_set, directory=img_dir, x_col="Filenames", y_col="labels", class_mode="categorical", classes=['1850', '3211', '3715', '5203', '5601', '8584'], target_size=(IMAGE_SIZE,IMAGE_SIZE), batch_size=32) cb_early_stopper = EarlyStopping(monitor='loss', patience=EARLY_STOP_PATIENCE) cb_checkpointer = ModelCheckpoint(filepath=saved_path + dataset_name + '/' + time + '/' + weight_file_name, monitor='accuracy', save_best_only=True, mode='auto') model = Sequential() model.add(InceptionResNetV2(include_top=False, pooling='avg', weights='imagenet')) model.add(Dense(6, activation='sigmoid')) model.layers[0].trainable = True model.summary() optimizer = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
'checkpoint-{epoch}.h5'))) # Set up ImageDataGenerators to do data augmentation for the training images. train_datagen = ImageDataGenerator(rotation_range=15, rescale=1. / 255, shear_range=0.1, zoom_range=0.2, horizontal_flip=True, width_shift_range=0.1, height_shift_range=0.1) train_datagen.mean = [123.68, 116.779, 103.939] train_generator = train_datagen.flow_from_dataframe(train_df, DATA_PATH, x_col='filename', y_col='category', target_size=IMAGE_SIZE, class_mode='binary', batch_size=BATCH_SIZE) if hvd.rank() == 0: mlctx.logger.info('classes:', train_generator.class_indices) validation_datagen = ImageDataGenerator(rescale=1. / 255) validation_datagen.mean = [123.68, 116.779, 103.939] validation_generator = validation_datagen.flow_from_dataframe( validate_df, DATA_PATH, x_col='filename', y_col='category', target_size=IMAGE_SIZE,
epochs = 500 # Image Generators train_datagen = ImageDataGenerator(rescale=1. / 255., rotation_range=20, width_shift_range=0.2, height_shift_range=0.2, horizontal_flip=True) train_generator = train_datagen.flow_from_dataframe(dataframe=model_train_data, x_col="filepath", y_col="class", batch_size=batch_size, shuffle=True, class_mode="categorical", target_size=IMAGE_SIZE, color_mode='grayscale', validate_filenames=False) valid_datagen = ImageDataGenerator(rescale=1. / 255.) valid_generator = valid_datagen.flow_from_dataframe(dataframe=model_val_data, x_col="filepath", y_col="class", batch_size=batch_size, shuffle=True, class_mode="categorical", target_size=IMAGE_SIZE, color_mode='grayscale',
def generadores(etapa, architecture, datos, pipeline, label_active, iteracion, models_info): _, preprocess_input = get_model(architecture, iteracion, models_info, pipeline) datagen = ImageDataGenerator(preprocessing_function=preprocess_input, rotation_range=40, width_shift_range=0.1, height_shift_range=0.1, shear_range=0.01, zoom_range=[0.9, 1.25], horizontal_flip=True, vertical_flip=False, fill_mode='reflect', data_format='channels_last') if etapa == 'train': train_generator = datagen.flow_from_dataframe( dataframe=datos['df_train'], x_col=pipeline["x_col_name"], y_col=pipeline["y_col_name"], target_size=(pipeline['img_height'], pipeline['img_width']), class_mode='categorical', batch_size=pipeline["batch_size"], seed=42, shuffle=True) if etapa == 'train_EL': train_generator = datagen.flow_from_dataframe( dataframe=datos['df_train_EL'], x_col=pipeline["x_col_name"], y_col=pipeline["y_col_name"], target_size=(pipeline['img_height'], pipeline['img_width']), class_mode='categorical', batch_size=pipeline["batch_size"], seed=42, shuffle=True) test_datagen = ImageDataGenerator(preprocessing_function=preprocess_input) test_generator = test_datagen.flow_from_dataframe( dataframe=datos['df_test'], x_col=pipeline["x_col_name"], y_col=pipeline["y_col_name"], batch_size=pipeline["batch_size"], seed=42, shuffle=False, class_mode="categorical", target_size=(pipeline['img_height'], pipeline['img_width'])) STEP_SIZE_TEST = test_generator.n // test_generator.batch_size if label_active: batchset_datagen = ImageDataGenerator( preprocessing_function=preprocess_input) batchset_generator = batchset_datagen.flow_from_dataframe( dataframe=datos['df_batchset'], x_col=pipeline["x_col_name"], y_col=pipeline["y_col_name"], batch_size=pipeline["batch_size"], seed=42, shuffle=False, class_mode="categorical", target_size=(pipeline['img_height'], pipeline['img_width'])) STEP_SIZE_BATCH = batchset_generator.n // batchset_generator.batch_size return train_generator, batchset_generator, STEP_SIZE_BATCH return train_generator, test_generator, STEP_SIZE_TEST
train_datagen = ImageDataGenerator( rescale=1./255., validation_split=0.1, horizontal_flip=True, rotation_range=10, brightness_range=(0.1,0.5), zoom_range=0.2, ) train_ds = train_datagen.flow_from_dataframe( dataframe=train_df, directory="/content/drive/MyDrive/Dataset/D-attentive-AI-satellite-image-classification/merged_data/train/", x_col = "Filename", y_col = "Labels", subset = "training", batch_size = train_btz, shuffle = True, class_mode = "categorical", target_size = img_sz ) val_ds = train_datagen.flow_from_dataframe( dataframe=train_df, directory="/content/drive/MyDrive/Dataset/D-attentive-AI-satellite-image-classification/merged_data/train/", x_col = "Filename", y_col = "Labels", subset = "validation", batch_size = val_btz, shuffle = False, class_mode = "categorical",
class_weights_dict = { i: class_weights[i] for i, label in enumerate(classes_to_predict) } training_batch_size = 32 validation_batch_size = 32 target_size = (216, 216) train_datagen = ImageDataGenerator(rescale=1. / 255) train_generator = train_datagen.flow_from_dataframe( dataframe=training_df, x_col='song_sample', y_col='bird', directory='.', target_size=target_size, batch_size=training_batch_size, shuffle=True, class_mode='categorical') validation_datagen = ImageDataGenerator(rescale=1. / 255) validation_generator = validation_datagen.flow_from_dataframe( dataframe=validation_df, x_col='song_sample', y_col='bird', directory='.', target_size=target_size, shuffle=False, batch_size=validation_batch_size, class_mode='categorical')
df_labels['label'] = df_labels['label'].astype(str) train_df = df_labels print(train_df.head()) #train_df = train_df.iloc[:160000] #Splitting data into train, val_set and test_set train_set, valid_set = train_test_split(train_df, test_size=0.2) train_set, test_set = train_test_split(train_set, test_size=0.2) Datagen = ImageDataGenerator(rescale=1. / 255, horizontal_flip=True, vertical_flip=True) train_gen = Datagen.flow_from_dataframe(train_set, directory=None, x_col='Path', y_col='label', target_size=(96, 96), batch_size=128, class_mode='binary', shuffle=True) valDatgen = ImageDataGenerator(rescale=1. / 255) val_gen = valDatgen.flow_from_dataframe(valid_set, x_col='Path', y_col='label', target_size=(96, 96), batch_size=128, class_mode='binary', shuffle=False) testDatgen = ImageDataGenerator(rescale=1. / 255) test_gen = testDatgen.flow_from_dataframe(test_set, x_col='Path', y_col='label',
from tensorflow.keras.preprocessing.image import ImageDataGenerator from PIL import ImageFile ImageFile.LOAD_TRUNCATED_IMAGES = True target_size = (360, 480) batch_size = 16 from tensorflow.keras.applications import inception_resnet_v2 datagen = ImageDataGenerator() train_datagen_x = datagen.flow_from_dataframe(dataframe=train_gen_df, x_col="mean_file_name", y_col=label_columns, class_mode="other", target_size=target_size, batch_size=batch_size, shuffle=True) val_datagen_x = datagen.flow_from_dataframe(dataframe=val_gen_df, x_col="mean_file_name", y_col=label_columns, class_mode="other", target_size=target_size, batch_size=batch_size, shuffle=True) def back_mean_gen(df, flip=True): while True: sample = df.sample(n=8)
# "data/mergecropped1700image/image", "data/mergecropped256image/image", x_col='image', y_col='traveler', target_size=target_size, class_mode="raw", # for regression batch_size=batch_size, seed=seed_value) valid_datagen = ImageDataGenerator(rescale=1. / 255) valid_datagenerator = valid_datagen.flow_from_dataframe( test, # "data/testimage/image", # "data/mergeimage/image", # "data/mergecropped1700image/image", "data/mergecropped256image/image", x_col='image', y_col='traveler', target_size=target_size, class_mode="raw", # for regression batch_size=batch_size, seed=seed_value) inference_datagen = ImageDataGenerator( rotation_range=15, shear_range=0.2, horizontal_flip=True, vertical_flip=True, width_shift_range=0.1, height_shift_range=0.1, fill_mode='nearest', zca_whitening=True # ZCA白色化
BATCH_SIZE = 64 # Get test subset print('Loading image data...') label_df = pd.read_csv(args["csv"]) test_df = label_df[label_df['subset'] == 'test'] # Preprocess test data print('Preprocess test data...') test_datagen = ImageDataGenerator(rescale=1. / 255, preprocessing_function=apply_grey) test_generator = test_datagen.flow_from_dataframe(test_df, directory=args["dataset"], x_col='file_path', y_col='label', class_mode='categorical', target_size=TARGET_SIZE, shuffle=False, batch_size=BATCH_SIZE) # Evaluate model print('Load model...') model = load_model(MODELS_PATH + MODEL_NAME + '.h5') print('Calculate test accuracy...') start_time = time.time() test_loss, test_acc = model.evaluate(test_generator, steps=test_generator.n // BATCH_SIZE, verbose=2) print("Test accuracy:", test_acc)
from tensorflow.keras.preprocessing.image import ImageDataGenerator from sklearn.model_selection import train_test_split DATA_DIR = 'D:/kaggle/chineseMNIST/' IMAGE_SIZE = (64, 64) train_df = pd.read_csv(DATA_DIR + 'chinese_mnist.csv') # Prepping the Data train_df['file'] = train_df.apply(lambda x: f'input_{x[0]}_{x[1]}_{x[2]}.jpg', axis=1) train_df, test_df = train_test_split(train_df, test_size=0.2, stratify=train_df['character'].values) train_df, val_df = train_test_split(train_df, test_size=0.1, stratify=train_df['character'].values) train_generator = ImageDataGenerator(rescale=1. / 255, rotation_range=20, color_mode='grayscale') test_generator = ImageDataGenerator(rescale=1. / 255) #fix this up next time im using this train_data = train_generator.flow_from_dataframe(x_col='file', y_col='value') val_data = test_generator.flow_from_dataframe(x_col='file', y_col='value') test_data = test_generator.flow_from_dataframe(x_col='file', y_col='value')
width_shift_range=0.1, height_shift_range=0.1, brightness_range=(0.75, 1.25), horizontal_flip=True, vertical_flip=True, preprocessing_function=contrast_stretch, validation_split=val_split) # https://stackoverflow.com/questions/42443936/keras-split-train-test-set-when-using-imagedatagenerator # will not shuffle before split! Need to shuffle first spiral_train_generator = spiral_datagen.flow_from_dataframe( df_train_images, x_col='path', y_col='label', subset="training", target_size=(img_height, img_width), color_mode="grayscale", batch_size=batch_size, class_mode="binary", shuffle=True, seed=42) spiral_val_generator = spiral_datagen.flow_from_dataframe( df_train_images, x_col='path', y_col='label', subset="validation", target_size=(img_height, img_width), color_mode="grayscale", batch_size=batch_size, class_mode="binary", shuffle=True,
def main(): directory = 'img' # 画像が保存されているフォルダ df_train = pd.read_csv('train.csv') # 学習データの情報がかかれたDataFrame df_validation = pd.read_csv('val.csv') # 検証データの情報がかかれたDataFrame df_test = pd.read_csv('test.csv') # テストデータの情報がかかれたDataFrame label_list = ['AMD', 'DR_DM', 'Gla', 'MH', 'Normal', 'RD', 'RP', 'RVO'] # ラベル名 image_size = (224, 224) # 入力画像サイズ classes = len(label_list) # 分類クラス数 batch_size = 32 # バッチサイズ epochs = 300 # エポック数 loss = 'categorical_crossentropy' # 損失関数 optimizer = Adam(lr=0.00001, amsgrad=True) # 最適化関数 metrics = 'accuracy' # 評価方法 # ImageDataGenerator画像増幅のパラメータ aug_params = {'rotation_range': 5, 'width_shift_range': 0.05, 'height_shift_range': 0.05, 'shear_range': 0.1, 'zoom_range': 0.05, 'horizontal_flip': True, 'vertical_flip': True} # val_lossが最小になったときのみmodelを保存 mc_cb = ModelCheckpoint('model_weights.h5', monitor='val_loss', verbose=1, save_best_only=True, mode='min') # 学習が停滞したとき、学習率を0.2倍に rl_cb = ReduceLROnPlateau(monitor='loss', factor=0.2, patience=3, verbose=1, mode='auto', min_delta=0.0001, cooldown=0, min_lr=0) # 学習が進まなくなったら、強制的に学習終了 es_cb = EarlyStopping(monitor='loss', min_delta=0, patience=5, verbose=1, mode='auto') # データの数に合わせて損失の重みを調整 weight_balanced = {} for i, label in enumerate(label_list): weight_balanced[i] = (df_train['label'] == label).sum() max_count = max(weight_balanced.values()) for label in weight_balanced: weight_balanced[label] = max_count / weight_balanced[label] print(weight_balanced) # ジェネレータの生成 ## 学習データのジェネレータ datagen = ImageDataGenerator(rescale=1./255, **aug_params) train_generator = datagen.flow_from_dataframe( dataframe=df_train, directory=directory, x_col='filename', y_col='label', target_size=image_size, class_mode='categorical', classes=label_list, batch_size=batch_size) step_size_train = train_generator.n // train_generator.batch_size ## 検証データのジェネレータ datagen = ImageDataGenerator(rescale=1./255) validation_generator = datagen.flow_from_dataframe( dataframe=df_validation, directory=directory, x_col='filename', y_col='label', target_size=image_size, class_mode='categorical', classes=label_list, batch_size=batch_size) step_size_validation = validation_generator.n // validation_generator.batch_size # ネットワーク構築 base_model = InceptionV3(include_top=False, weights='imagenet', pooling='avg', input_shape=(image_size[0], image_size[1], 3)) x = Dense(256, kernel_initializer='he_normal')(base_model.output) x = Dense(classes, kernel_initializer='he_normal')(x) outputs = Activation('softmax')(x) model = Model(inputs=base_model.inputs, outputs=outputs) model.summary() model.compile(loss=loss, optimizer=optimizer, metrics=[metrics]) # 学習 history = model.fit_generator( train_generator, steps_per_epoch=step_size_train, epochs=epochs, verbose=1, callbacks=[mc_cb, rl_cb, es_cb], validation_data=validation_generator, validation_steps=step_size_validation, class_weight=weight_balanced, workers=3) # 学習曲線の保存 plot_history(history) # テストデータの評価 ## 学習済み重みの読み込み model.load_weights('model_weights.h5') ## 推論 X = df_test['filename'].values y_true = list(map(lambda x: label_list.index(x), df_test['label'].values)) y_pred = [] for file in tqdm(X, desc='pred'): # 学習時と同じ条件になるように画像をリサイズ&変換 img = Image.open(f'{directory}/{file}') img = img.resize(image_size) img = np.array(img, dtype=np.float32) img *= 1./255 img = np.expand_dims(img, axis=0) y_pred.append(np.argmax(model.predict(img)[0])) ## 評価 print(classification_report(y_true, y_pred, target_names=label_list))