def train(batch_size=500, n=50, data=1): dataset = f"train/data0{data}_train" version = f"data0{data}_{n}" checkpoint_path = f'checkpoint_{version}.hdf5' log_dir = f'logs/{version}' epochs = 100 img_width = 200 img_height = 60 alphabet = list('ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789') char_to_int = dict((c, i) for i, c in enumerate(alphabet)) int_to_char = dict((i, c) for i, c in enumerate(alphabet)) df = pd.read_csv(f'{dataset}.csv', delimiter=',') df['code'] = df['code'].apply(lambda el: list(el)) df[[f'code{i}' for i in range(1, 7)]] = pd.DataFrame(df['code'].to_list(), index=df.index) for i in range(1, 7): df[f'code{i}'] = df[f'code{i}'].apply( lambda el: to_categorical(char_to_int[el], len(alphabet))) datagen = ImageDataGenerator(rescale=1. / 255, validation_split=0.2) train_generator = datagen.flow_from_dataframe( dataframe=df, directory=dataset, subset='training', x_col="filename", y_col=[f'code{i}' for i in range(1, 7)], class_mode="multi_output", target_size=(img_height, img_width), batch_size=batch_size) valid_generator = datagen.flow_from_dataframe( dataframe=df, directory=dataset, subset='validation', x_col="filename", y_col=[f'code{i}' for i in range(1, 7)], class_mode="multi_output", target_size=(img_height, img_width), batch_size=batch_size) input_shape = (img_height, img_width, 3) main_input = Input(shape=input_shape) x = main_input x = Conv2D(filters=64, kernel_size=(3, 3), padding='same')(x) x = BatchNormalization()(x) x = Activation(activation='relu')(x) x = Conv2D(filters=64, kernel_size=(3, 3), padding='same')(x) x = BatchNormalization()(x) x = Activation(activation='relu')(x) x = MaxPooling2D(pool_size=(2, 2), padding='same')(x) x = Dropout(0.2)(x) x = Conv2D(filters=128, kernel_size=(3, 3), padding='same')(x) x = BatchNormalization()(x) x = Activation(activation='relu')(x) x = Conv2D(filters=128, kernel_size=(3, 3), padding='same')(x) x = BatchNormalization()(x) x = Activation(activation='relu')(x) x = MaxPooling2D(pool_size=(2, 2), padding='same')(x) x = Dropout(0.2)(x) x = Conv2D(filters=256, kernel_size=(3, 3), padding='same')(x) x = BatchNormalization()(x) x = Activation(activation='relu')(x) x = Conv2D(filters=256, kernel_size=(3, 3), padding='same')(x) x = BatchNormalization()(x) x = Activation(activation='relu')(x) x = MaxPooling2D(pool_size=(2, 2), padding='same')(x) x = Conv2D(filters=512, kernel_size=(3, 3), padding='same')(x) x = BatchNormalization()(x) x = Activation(activation='relu')(x) x = GlobalAveragePooling2D()(x) x = Dropout(0.2)(x) x = RepeatVector(6)(x) x = GRU(128, return_sequences=True)(x) out = [ Dense(len(alphabet), name=f'digit{i + 1}', activation='softmax')( Lambda(lambda z: z[:, i, :], output_shape=(1, ) + input_shape[2:])(x)) for i in range(6) ] model = Model(main_input, out) model.compile(loss='categorical_crossentropy', optimizer=Adam(0.0001), metrics=['accuracy']) checkpoint = ModelCheckpoint(checkpoint_path, monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=False, mode='auto') if data == 1: earlystop = MinimumEpochEarlyStopping(monitor='val_loss', patience=10, verbose=1, mode='auto', min_epoch=5) else: earlystop = MinimumEpochEarlyStopping(monitor='val_loss', patience=10, verbose=1, mode='auto', min_epoch=10) tensorBoard = TensorBoard(log_dir=log_dir, histogram_freq=1) callbacks_list = [tensorBoard, earlystop, checkpoint] # callbacks_list = [tensorBoard] model.summary() train_history = model.fit( train_generator, steps_per_epoch=train_generator.n // train_generator.batch_size, epochs=epochs, validation_data=valid_generator, validation_steps=valid_generator.n // valid_generator.batch_size, verbose=1, callbacks=callbacks_list) with open(f"{version}.txt", "w") as file: loss_idx = np.argmin(train_history.history['val_loss']) digit6_idx = np.argmax(train_history.history['val_digit6_accuracy']) file.write(f"{train_history.history['val_loss'][loss_idx]}\n") file.write( f"{train_history.history['val_digit1_accuracy'][loss_idx]}\n") file.write( f"{train_history.history['val_digit2_accuracy'][loss_idx]}\n") file.write( f"{train_history.history['val_digit3_accuracy'][loss_idx]}\n") file.write( f"{train_history.history['val_digit4_accuracy'][loss_idx]}\n") file.write( f"{train_history.history['val_digit5_accuracy'][loss_idx]}\n") file.write( f"{train_history.history['val_digit6_accuracy'][loss_idx]}\n") file.write(f"{'-'*20}\n") file.write(f"{train_history.history['val_loss'][digit6_idx]}\n") file.write( f"{train_history.history['val_digit1_accuracy'][digit6_idx]}\n") file.write( f"{train_history.history['val_digit2_accuracy'][digit6_idx]}\n") file.write( f"{train_history.history['val_digit3_accuracy'][digit6_idx]}\n") file.write( f"{train_history.history['val_digit4_accuracy'][digit6_idx]}\n") file.write( f"{train_history.history['val_digit5_accuracy'][digit6_idx]}\n") file.write( f"{train_history.history['val_digit6_accuracy'][digit6_idx]}\n") K.clear_session()
# Part 2 - Fitting the CNN to the images from keras_preprocessing.image import ImageDataGenerator from PIL import Image train_datagen = ImageDataGenerator(rescale=1. / 255., featurewise_std_normalization=True, validation_split=0.20) test_datagen = ImageDataGenerator(rescale=1. / 255.) training_set = train_datagen.flow_from_dataframe(dataframe=train_dataset, directory='./data/training/', x_col='image_names', y_col='labels', subset="training", class_mode="other", target_size=(480, 480), batch_size=30, shuffle=True) validation_set = train_datagen.flow_from_dataframe( dataframe=train_dataset, directory='./data/training/', x_col='image_names', y_col='labels', subset="validation", class_mode="other", target_size=(480, 480), batch_size=30, shuffle=True)
#get directory of input images and create array of images and store images in the directory to the array train_dir = "C:/pooled/Train" #get labels pickle and convert to dataframe then sort by the filename to go along with the images train_labels_file = "C:/Users/panka/OneDrive/Desktop/Aditya/image data 2018-19/Training_Input_Resized.pkl" train_labels = pd.read_pickle(train_labels_file) train_datagen = ImageDataGenerator(rescale=1. / 255) train_generator = train_datagen.flow_from_dataframe( dataframe=train_labels, directory=train_dir, target_size=(108, 192), x_col='Filename', y_col=[ 'Right Ankle x', 'Right Knee x', 'Right Hip x', 'Left Hip x', 'Left Knee x', 'Left Ankle x', 'Pelvis x', 'Thorax x', 'Upper Neck x', 'Head Top x', 'Right Wrist x', 'Right Elbow x', 'Right Shoulder x', 'Left Shoulder x', 'Left Elbow x', 'Left Wrist x', 'Right Ankle y', 'Right Knee y', 'Right Hip y', 'Left Hip y', 'Left Knee y', 'Left Ankle y', 'Pelvis y', 'Thorax y', 'Upper Neck y', 'Head Top y', 'Right Wrist y', 'Right Elbow y', 'Right Shoulder y', 'Left Shoulder y', 'Left Elbow y', 'Left Wrist y' ], class_mode='other', batch_size=16) #get directory of input images and create array of images and store images in the directory to the array test_dir = "C:/pooled/Test" #get labels pickle and convert to dataframe then sort by the filename to go along with the images test_labels_file = "C:/Users/panka/OneDrive/Desktop/Aditya/image data 2018-19/Testing_Input_Resized.pkl" test_labels = pd.read_pickle(test_labels_file)
from keras_preprocessing.image import ImageDataGenerator train_data_path = 'data/train/' test_data_path = 'data/train/' wav_path = 'data/wav/' traindf = pd.read_csv('data/train.csv', dtype=str) testdf = pd.read_csv('data/test.csv', dtype=str) datagen = ImageDataGenerator(rescale=1. / 255., validation_split=0.25) train_generator = datagen.flow_from_dataframe(dataframe=traindf, directory=train_data_path, x_col="slice_file_name", y_col="Class", subset="training", batch_size=32, seed=42, shuffle=True, class_mode="categorical", target_size=(64, 64)) valid_generator = datagen.flow_from_dataframe(dataframe=traindf, directory=train_data_path, x_col="slice_file_name", y_col="Class", subset="validation", batch_size=32, seed=42, shuffle=True, class_mode="categorical", target_size=(64, 64))
# width_shift_range=0.2, # height_shift_range=0.2, # zoom_range=0.2, # shear_range=0.2, # rotation_range=40, # horizontal_flip = True, rescale=1./255.) test_datagen = ImageDataGenerator(rescale=1./255.) #set up train generator train_generator= datagen.flow_from_dataframe( dataframe= df[:train_size], directory= TRAIN_DATA_DIR, x_col= "filename", y_col= "labels", batch_size= BATCH_SIZE, seed= 42, shuffle= True, class_mode= "categorical", target_size= (IMAGE_SIZE,IMAGE_SIZE,3)) #set up dev generator valid_generator=test_datagen.flow_from_dataframe( dataframe=df[train_size:(train_size + dev_size) ], directory= TRAIN_DATA_DIR, x_col= "filename", y_col= "labels", batch_size= BATCH_SIZE, seed= 42, shuffle= True, class_mode= "categorical",
zca_whitening=True, rotation_range=5, ) datagen = ImageDataGenerator(rescale=1. / 255) BATCH_SIZE = 12 img_size = 224 dataframe_train = training_set steps_train = len(dataframe_train) / BATCH_SIZE steps_train = round(steps_train + 0.5) train_generator = train_dataGen.flow_from_dataframe(dataframe=dataframe_train, directory="", x_col="Path", y_col=types, class_mode="raw", color_mode="rgb", target_size=(img_size, img_size), batch_size=BATCH_SIZE, shuffle=True) test_set = valid_set dataframe_valid = test_set steps_valid = len(dataframe_valid) / BATCH_SIZE steps_valid = round(steps_valid + 0.5) valid_generator = datagen.flow_from_dataframe( dataframe=dataframe_valid, # dataframe = valid_set, directory="", x_col="Path", y_col=types,
def predict(): #set up data generator test_datagen = ImageDataGenerator(rescale=1. / 255.) #read file test_generator = test_datagen.flow_from_dataframe( dataframe=gen(CONFIG.TEST_NUM, CONFIG.TEST_FORMAT), directory=CONFIG.TEST_DIR, x_col="filename", batch_size=1, seed=42, shuffle=False, class_mode=None, target_size=(CONFIG.IMAGE_SIZE, CONFIG.IMAGE_SIZE)) #load model model = load_model(CONFIG.MODEL, custom_objects={ 'arg': arg, 'arg2': multi_label_accu }) #to restore order test_generator.reset() #feed in data pred = model.predict_generator(test_generator, steps=test_generator.n // test_generator.batch_size, verbose=1) #numpy it pred = np.array(pred) #select the one with max prob am = np.argmax(pred, axis=-1) predictions = [] #get the dictionary labels = CONFIG.LABELS for i in am: predictions.append(labels[i]) filenames = test_generator.filenames results = pd.DataFrame({"Filename": filenames, "Predictions": predictions}) results.to_csv("Predicted_labels.csv", index=False) file1 = open("../Output/Predicted_labels.txt", "a") with open('Predicted_labels.csv', 'r') as f: reader = csv.reader(f) i = 0 for row in reader: if (i == 0): i += 1 continue s = '{}\t{}\n'.format(row[0], row[1]) file1.write(s) file1.close()
#img = image.load_img(im, target_size = (8,8)) im_arr = image.img_to_array(im).astype(float) im_arr[:, :, 0] = im_arr[:, :, 0] im_arr[:, :, 1] = np.mean(im_arr, axis=2) im_arr[:, :, 2] = np.var(im_arr, axis=2) return im_arr datagen = ImageDataGenerator(rescale=1. / 255, preprocessing_function=ch_making_func, validation_split=0.25) train_generator = datagen.flow_from_dataframe(dataframe=pdf, directory="None", color_mode="rgb", x_col="filename", y_col=target_name, subset="training", class_mode="other", target_size=input_shape[:2], batch_size=32) valid_generator = datagen.flow_from_dataframe(dataframe=pdf, directory="None", color_mode="rgb", x_col="filename", y_col=target_name, subset="validation", class_mode="other", target_size=input_shape[:2], batch_size=32)
df.iloc[(i + 1) * (valset_size + testset_size):dataset_size, :] ], sort=True) df_train.reset_index(drop=True) df_val = df[i * (valset_size + testset_size):((i + 1) * (valset_size + testset_size)) - testset_size] df_test = df[i * (valset_size + testset_size) + valset_size:(i + 1) * (valset_size + testset_size)] train_generator = datagen.flow_from_dataframe( dataframe=df_train, directory=img_path, x_col="Überschrift der Spalte mit den Dateinamen", y_col="Überschrift der Spalte mit den Schadensklassen", classes=['Klasse 1', 'Klasse 2', 'Klasse 3', 'Klasse 4', 'Klasse 5'], batch_size=67, shuffle=False, class_mode="categorical", target_size=(224, 224)) validation_generator = test_datagen.flow_from_dataframe( dataframe=df_val, directory=img_path, x_col="Überschrift der Spalte mit den Dateinamen", y_col="Überschrift der Spalte mit den Schadensklassen", classes=['Klasse 1', 'Klasse 2', 'Klasse 3', 'Klasse 4', 'Klasse 5'], batch_size=10, shuffle=False, class_mode="categorical", target_size=(224, 224)) test_generator = test_datagen.flow_from_dataframe(
def CNN_for_tasks(task, number_of_task, shape, epochs): #read the attribute_list.csv df = pd.read_csv(r"dataset/attribute_list.csv", skiprows=1) #initiate ImageDataGenerator while splitting validation to 25% of the training images datagen = ImageDataGenerator(validation_split=0.25, rescale=1. / 255) #Initializing train_generator using flow_from_dataframe method. I've turned on shuffle for more consistent result # using seed 42. the batch_size is 10 and target size is 32x32 pixel train_generator = datagen.flow_from_dataframe(dataframe=df, directory="training", x_col="file_name", y_col=task, class_mode="categorical", has_ext=False, shuffle=True, seed=1, subset='training', target_size=(32, 32), batch_size=10) valid_generator = datagen.flow_from_dataframe(dataframe=df, directory="training", x_col="file_name", y_col=task, has_ext=False, class_mode="categorical", shuffle=True, seed=1, subset='validation', target_size=(32, 32), batch_size=10) test_generator = datagen.flow_from_dataframe(dataframe=df, directory="testing", x_col="file_name", y_col=task, has_ext=False, class_mode="categorical", shuffle=True, seed=1, target_size=(32, 32), batch_size=10) # Initialising CNN Model model = Sequential() # step 1 - convolution model.add( Convolution2D(32, 3, 3, input_shape=(32, 32, 3), activation='relu')) # step 2 - pooling model.add(MaxPooling2D(pool_size=(2, 2))) # step 3 - Flattening model.add(Flatten()) # step 4 - Full Connection model.add(Dense(output_dim=128, activation='relu')) model.add(Dense(output_dim=shape, activation='sigmoid')) # compile model.compile(optimizers.rmsprop(lr=0.0001), loss="categorical_crossentropy", metrics=["accuracy"]) # Training the model through fit # steps_per_epoch should be (number of training images total / batch_size) # validation_steps should be (number of validation images total / batch_size) STEP_SIZE_TRAIN = train_generator.n // train_generator.batch_size STEP_SIZE_VALID = valid_generator.n // valid_generator.batch_size model.fit_generator(generator=train_generator, steps_per_epoch=STEP_SIZE_TRAIN, validation_data=valid_generator, validation_steps=STEP_SIZE_VALID, epochs=epochs) # predicting the output test_generator.reset() #reset test_generator first pred = model.predict_generator(test_generator, steps=len(test_generator), verbose=1) predicted_class_indices = np.argmax(pred, axis=1) #generating labels for predictions labels = (train_generator.class_indices) labels = dict((v, k) for k, v in labels.items()) predictions = [labels[k] for k in predicted_class_indices] # Finally, save the results to a CSV file. filenames = test_generator.filenames results = pd.DataFrame({ "file_name": filenames, "Predictions": predictions }) results.to_csv(number_of_task, index=False) # removing the .png extension on task1 file_name text = open(number_of_task, "r") text = ''.join([i for i in text]).replace(".png", "") x = open(number_of_task, "w") x.writelines(text) x.close() return 0
#1,2,13,15 NUM_GENRES = 20 PIXEL=100 #224 SIZE_TRAIN=30000 SIZE_VALIDATION = 8000 SIZE_TEST = 4000 datagen=ImageDataGenerator(rescale=1./255.) test_datagen=ImageDataGenerator(rescale=1./255.) train_generator=datagen.flow_from_dataframe( dataframe=df[:SIZE_TRAIN], # directory="poster/poster_images", directory="poster/transformed_poster_images", x_col="imdb_id", y_col=columns, batch_size=128, seed=42, shuffle=True, class_mode="other", target_size=(PIXEL,PIXEL)) valid_generator=test_datagen.flow_from_dataframe( dataframe=df[SIZE_TRAIN:SIZE_TRAIN+SIZE_VALIDATION], # directory="poster/poster_images", directory="poster/transformed_poster_images", x_col="imdb_id", y_col=columns, batch_size=128, seed=42, shuffle=True,
def train_model(audio_path, midi_path, epochs=20, batch_size=96, filters1=48, filters2=96, save=False, model_name=''): midi_files_bin = [] audio_files_bin = [] for filename in sorted(os.listdir(midi_path)): midi_files_bin.append(filename) for filename in sorted(os.listdir(audio_path)): audio_files_bin.append(filename) y_images = [] for filename in midi_files_bin: array = np.load(os.path.join(midi_path, filename)) y_images.append(array) y_train = np.array(y_images) df = pd.DataFrame() df['filenames'] = audio_files_bin note_labels = np.arange(21, 109) df[note_labels] = y_train img = Image.open(f'{audio_path}/{df.iloc[0, 0]}') img_arr = np.asarray(img) input_rows = img_arr.shape[0] input_columns = img_arr.shape[1] print(input_rows, input_columns) df_train, df_test = train_test_split(df, test_size=0.20) gpus = tensorflow.config.experimental.list_physical_devices('GPU') tensorflow.config.experimental.set_memory_growth(gpus[0], True) train_datagen = ImageDataGenerator(rescale=1./255) train_gen = train_datagen.flow_from_dataframe(df_train, audio_path, x_col='filenames', y_col=note_labels, batch_size=batch_size, seed=42, shuffle=True, class_mode='raw', color_mode='grayscale', target_size=(input_rows,input_columns)) valid_datagen = ImageDataGenerator(rescale=1./255) valid_gen = valid_datagen.flow_from_dataframe(df_test, audio_path, x_col='filenames', y_col=note_labels, batch_size=batch_size, seed=42, shuffle=True, class_mode='raw', color_mode='grayscale', target_size=(input_rows,input_columns)) model = Sequential() model.add(Conv2D(input_shape=(input_rows, input_columns, 1), filters=filters1, kernel_size=(3, 3), padding="same", activation="relu")) model.add(Conv2D(filters=filters1, kernel_size=(3, 3), padding="same", activation="relu")) model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2))) model.add(Conv2D(filters=filters2, kernel_size=(3, 3), padding="same", activation="relu")) model.add(Conv2D(filters=filters2, kernel_size=(3, 3), padding="same", activation="relu")) model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2))) model.add(Flatten()) model.add(Dense(352, activation='relu')) model.add(Dense(352, activation='relu')) model.add(Dense(88, activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer='Adam', metrics=['accuracy', 'binary_accuracy', tensorflow.keras.metrics.Precision(), tensorflow.keras.metrics.Recall(), ]) print(model.summary()) model.fit(train_gen, steps_per_epoch=df_train.shape[0] / batch_size, epochs=epochs, validation_data=valid_gen, validation_steps=df_test.shape[0] / batch_size, verbose=1) if save: model.save(f'../models/{model_name}')
from sklearn.linear_model import LogisticRegression from sklearn.multiclass import OneVsRestClassifier types = ['No_Finding', 'Enlarged_Cardiomediastinum', 'Cardiomegaly', 'Lung_Opacity', 'Lung_Lesion', 'Edema', 'Consolidation', 'Pneumonia', 'Atelectasis', 'Pneumothorax', 'Pleural_Effusion', 'Pleural_Other', 'Fracture', 'Support_Devices'] BATCH_SIZE = 32 datagen = ImageDataGenerator(rescale=1. / 255) set = pd.read_csv("CheXpert-v1.0-small/csv/top/train_top_lateral.csv") generator = datagen.flow_from_dataframe( dataframe=set, directory="", x_col="Path", y_col=types, classes=types, class_mode="raw", color_mode="rgb", target_size=(224, 224), shuffle=True, batch_size=BATCH_SIZE) model = tf.keras.applications.DenseNet121(weights="imagenet", include_top=False) steps = len(set) / BATCH_SIZE feature_list = [] labels_list = [] cont = 1 for batch in generator: for i in range(len(batch[0])): img = batch[0][i]
'/home/juanp.montoya/NeuralNetworks/Final_Project/final-project-landandbuildingsatimg-ccny/data/raw/input' ) nRowsRead = None df2 = pd.read_csv('metadata.csv', delimiter=',', nrows=nRowsRead) df2.dataframeName = 'metadata.csv' nRow, nCol = df2.shape img_height = 2448 img_width = 2448 data_gen = ImageDataGenerator(rescale=1) batch_size = 50 mask_generator = data_gen.flow_from_dataframe( dataframe=df2.loc[df2['split'] == 'train'], directory="", x_col="mask_path", y_col="mask_path", class_mode=None, seed=0, target_size=(img_height, img_width), batch_size=batch_size) batch_n = None slices = 16 directory = '../../interim/Mask' MaskConverter.mask_to_label(mask_generator, directory, slices, img_height, batch_size, batch_n)
class ClassTester(ModelTester): core_idg = None all_labels = [] def load_model(self): self.model = load_model(os.path.join('Models', 'classifier_model.h5')) self.model.load_weights( os.path.join('Models', 'classifier_model_weight.hdf5')) print("Loaded model from disk") def read_data(self): self.df = pd.read_csv(os.path.join('.', 'Data', 'Data_Entry_2017.csv')) # For display purposes - actual vs predicted values. If not found, actual will be 'Unknown' file_paths = [os.path.abspath(x) for x in glob(class_img_paths)] all_image_paths = {os.path.basename(x): x for x in file_paths} self.df['path'] = self.df['Image Index'].map(all_image_paths.get) # Preserve only the following classes keep_classes = ['Effusion', 'Mass', 'Nodule', 'Pneumothorax'] self.df = self.df[self.df['Finding Labels'].isin(keep_classes)] self.df['Patient Age'] = self.df['Patient Age'].map(lambda x: int(x)) self.all_labels = np.unique( list( chain(*self.df['Finding Labels'].map( lambda x: x.split('|')).tolist()))) self.all_labels = [x for x in self.all_labels if len(x) > 0] self.all_labels.append('Unknown') self.set_finding_flags(self.df) self.df['newLabel'] = self.df.apply( lambda x: x['Finding Labels'].split('|'), axis=1) print("Loaded the data for classification") def set_finding_flags(self, df): for c_label in self.all_labels: if len(c_label) > 1: # leave out empty labels df[c_label] = df['Finding Labels'].map( lambda finding: 1.0 if c_label in finding else 0) def predict(self, file_paths): plot_path = os.path.join( USER_UPLOAD, "classification_plot_{}.png".format( str(np.random.randint(100, 999)))) if self.model is None: self.load_model() if self.df is None: self.read_data() valid_df = pd.DataFrame() for file_path in file_paths: file_name = os.path.basename(file_path) found_df = self.df.loc[self.df['Image Index'] == file_name] if found_df.empty: print("Path", file_name, "is not found. Mimicing properties.") found_df = self.df.head(1).copy(deep=True) found_df['Image Index'] = file_name found_df['Finding Labels'] = 'Unknown' found_df['newLabel'] = ['Unknown'] self.set_finding_flags(found_df) found_df['path'] = file_path valid_df = valid_df.append(found_df) if valid_df.empty: print("Path(s) ", ','.join(file_paths), " is not found") return if self.core_idg is None: self.core_idg = ImageDataGenerator( samplewise_center=True, samplewise_std_normalization=True, horizontal_flip=True, vertical_flip=False, height_shift_range=0.05, width_shift_range=0.1, rotation_range=5, shear_range=0.1, fill_mode='reflect', zoom_range=0.15) test_x, test_y = next( self.core_idg.flow_from_dataframe(dataframe=valid_df, directory=None, x_col='path', y_col='newLabel', class_mode='categorical', classes=self.all_labels, target_size=(IMAGE_SIZE, IMAGE_SIZE), color_mode='grayscale', batch_size=1024)) pred_y = self.model.predict(test_x, batch_size=32) # Image_name Actual_classes Predicted_classes sickest_idx = np.argsort(np.sum(test_y, 1) < 1) fig, m_axs = plt.subplots(len(sickest_idx), 2, figsize=(12, 4)) fig.canvas.set_window_title( 'Class prediction: ' + ','.join([os.path.basename(p) for p in file_paths])) fig.tight_layout(pad=3.0) axes = m_axs.flatten() act_infos, pred_infos = self.get_pred_string(test_y, pred_y) for idx in sickest_idx: act_info = act_infos[idx] pred_info = pred_infos[idx] pred_str = [ '%s:%2.0f%%' % (n_class, p_score) for n_class, p_score in pred_info ] if self.plot or self.save_plot: ax_idx = idx * 2 ax_img = axes[ax_idx] ax_plt = axes[ax_idx + 1] # Image display ax_img.imshow(test_x[idx, :, :, 0], cmap='bone', vmin=-1.5, vmax=1.5) ax_img.set_title('Actual: ' + ', '.join(act_info) + '\nPredicted: ' + ', '.join(pred_str)) ax_img.axis('off') # Result display ax_plt.set_xlim([0, 100]) ax_plt.barh([val[0] for val in pred_info], [val[1] for val in pred_info], align='center') ax_plt.set_title('Predicted results') else: if act_info[0] == pred_info[0][0]: print(file_paths[idx], 'match', act_info[0]) else: print(file_paths[idx], 'non match', act_info[0], pred_info[0][0]) if self.save_plot: if not os.path.exists(USER_UPLOAD): os.makedirs(USER_UPLOAD) fig.savefig(plot_path) if self.plot: plt.show() return plot_path def get_pred_string(self, test_y, pred_y): act_infos = [] pred_infos = [] for idx in range(0, len(test_y)): act_info = [ n_class for n_class, n_score in zip(self.all_labels, test_y[idx]) if n_score > 0.5 ] pred_info = [[n_class, p_score * 100] for n_class, n_score, p_score in zip( self.all_labels, test_y[idx], pred_y[idx]) if (n_score > 0.5) or (p_score > 0)] pred_info.sort(key=lambda x: x[1], reverse=True) # Get top 4 classes only pred_info = pred_info[:4] act_infos.append(act_info) pred_infos.append(pred_info) return act_infos, pred_infos
data_dir = pathlib.Path(data_dir) train_dir = os.path.join(data_dir, "train") train_df = pd.read_json(os.path.join(data_dir, "train", "parameters.jsonl"), lines=True) train_df["filename"] = train_df["id"] + ".png" valid_dir = os.path.join(data_dir, "validation") valid_df = pd.read_json(os.path.join(valid_dir, "parameters.jsonl"), lines=True) valid_df["filename"] = valid_df["id"] + ".png" datagen = ImageDataGenerator(rescale=1. / 255) train_generator = datagen.flow_from_dataframe(dataframe=train_df, directory=train_dir, x_col="filename", y_col="label", batch_size=64) valid_generator = datagen.flow_from_dataframe(dataframe=valid_df, directory=valid_dir, x_col="filename", y_col="label", batch_size=64) STEP_SIZE_TRAIN = train_generator.n // train_generator.batch_size STEP_SIZE_VALID = valid_generator.n // valid_generator.batch_size model_filepath = "lenet_" + str(dataset_name) model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint( filepath=model_filepath, save_weights_only=False, monitor='val_accuracy',
total_train = train_df.shape[0] total_validate = validate_df.shape[0] batch_size = 15 train_datagen = ImageDataGenerator(rotation_range=15, rescale=1. / 255, shear_range=0.1, zoom_range=0.2, horizontal_flip=True, width_shift_range=0.1, height_shift_range=0.1) train_generator = train_datagen.flow_from_dataframe(train_df, "../../data/train", x_col='filename', y_col='category', target_size=IMAGE_SIZE, class_mode='categorical', batch_size=batch_size) validation_datagen = ImageDataGenerator(rescale=1. / 255) validation_generator = validation_datagen.flow_from_dataframe( validate_df, "../../data/train", x_col='filename', y_col='category', target_size=IMAGE_SIZE, class_mode='categorical', batch_size=batch_size) # example_df = train_df.sample(n=1).reset_index(drop=True)
from keras_preprocessing.image import ImageDataGenerator from configuration import validation_images_list_filename, validation_images_list_filename_just_faces, \ model_filename, model_filename_just_faces #%% evaluate model accuracy on holdout set without facial extraction image_size = (32, 25) # no facial extract model_name = model_filename validation_list = validation_images_list_filename test_df = pd.read_csv(validation_list) test_datagen = ImageDataGenerator(rescale=1. / 255.) test_generator = test_datagen.flow_from_dataframe(dataframe=test_df, directory=None, x_col="name", y_col="class", target_size=image_size, batch_size=32, seed=42, class_mode='categorical', shuffle=False) STEP_SIZE_TEST = test_generator.n // test_generator.batch_size model = tf.keras.models.load_model(model_name) scoring = model.evaluate_generator(generator=test_generator, steps=STEP_SIZE_TEST, verbose=0) print("No facial extraction", model.metrics_names[1], scoring[1]) #%% evaluate model accuracy on holdout set with facial extraction image_size = (54, 72) # facial extract model_name = model_filename_just_faces
def train_model(model_paths, transfer=False): print("SETTING UP TRAINING...") train_df = pd.read_csv(model_paths.augmented_train_solutions) valid_df = pd.read_csv(model_paths.valid_solutions) df_headers = list(train_df.columns) train_datagen = IDG(rescale=1. / 255., shear_range=0.2, zoom_range=0.2, horizontal_flip=True) valid_datagen = IDG(rescale=1. / 255.) # Create generators train_generator = train_datagen.flow_from_dataframe( dataframe=train_df, directory=model_paths.train_image_path, x_col=df_headers[0], y_col=df_headers[1], class_mode='categorical', shuffle=True, batch_size=24, seed=42, target_size=(200, 200)) valid_generator = valid_datagen.flow_from_dataframe( dataframe=valid_df, directory=model_paths.valid_image_path, x_col=df_headers[0], y_col=df_headers[1], class_mode='categorical', shuffle=False, batch_size=24, seed=42, target_size=(200, 200)) print("CLASS INDICES:", train_generator.class_indices) if transfer: model = construct_transfer_model() else: model = construct_model() STEP_SIZE_TRAIN = train_generator.n // train_generator.batch_size + 1 STEP_SIZE_VALID = valid_generator.n // valid_generator.batch_size + 1 print("Training model...") checkpoint = ModelCheckpoint(model_paths.checkpoint_path, monitor='val_acc', verbose=1, save_best_only=True, mode='max') early_stopping = EarlyStopping(monitor='val_loss', patience=2) callbacks_list = [checkpoint, early_stopping] model.fit_generator(generator=train_generator, steps_per_epoch=STEP_SIZE_TRAIN, validation_data=valid_generator, validation_steps=STEP_SIZE_VALID, callbacks=callbacks_list, epochs=100) model.save(model_paths.checkpoint_overall_path, overwrite=True) print("Saved model to: " + model_paths.checkpoint_overall_path)
#declare the datagen options train_datagen = ImageDataGenerator(rescale=1. / 255, rotation_range=20, zoom_range=0.05, width_shift_range=0.05, height_shift_range=0.05, shear_range=0.05, horizontal_flip=True, fill_mode="nearest") #generate training dataset train_generator = train_datagen.flow_from_dataframe(dataframe=train, directory=None, x_col="Path", y_col="Pleural Effusion", class_mode="binary", color_mode="rgb", target_size=TARGET_SIZE, batch_size=batch_size) #set up the test data set valid_datagen = ImageDataGenerator(rescale=1. / 255) valid_generator = valid_datagen.flow_from_dataframe(dataframe=valid, directory=None, x_col="Path", y_col="Pleural Effusion", class_mode="binary", color_mode="rgb", batch_size=batch_size)
train_df = train_df[train_df[finding] != -1] train_df[finding] = train_df[finding].astype(str) val_df = val_df[val_df[finding] != -1] val_df[finding] = val_df[finding].astype(str) train_datagen = ImageDataGenerator(preprocessing_function=preprocess_input, horizontal_flip=True) val_datagen = ImageDataGenerator(preprocessing_function=preprocess_input) print('Initializing data generators') class_list = ["0.0", "1.0"] #directory='/home/steve/PycharmProjects/mimic-cxr/data', directory = '/media/steve/Samsung_T5/MIMICCXR' train_gen = train_datagen.flow_from_dataframe(dataframe=train_df, directory=directory, x_col='path', y_col=finding, target_size=(224, 224), color_mode='rgb', class_mode='categorical', batch_size=16, shuffle=True, classes=class_list) val_gen = val_datagen.flow_from_dataframe(dataframe=val_df, directory=directory, x_col='path', y_col=finding, target_size=(224, 224), color_mode='rgb', class_mode='categorical', batch_size=16, shuffle=True, classes=class_list)
train_df = pd.read_csv(training_images_list_filename_just_faces) datagen = ImageDataGenerator( rescale=1. / 255., validation_split=0.25, horizontal_flip=True # ,height_shift_range=1 # ,width_shift_range=1 # ,rotation_range=1 ) train_generator = datagen.flow_from_dataframe(dataframe=train_df, directory=None, x_col="name", y_col="class", subset="training", batch_size=256, seed=42, shuffle=True, class_mode="categorical", target_size=image_size) valid_generator = datagen.flow_from_dataframe(dataframe=train_df, directory=None, x_col="name", y_col="class", subset="validation", batch_size=32, seed=42, shuffle=True, class_mode="categorical", target_size=image_size)
def arrangeData(self): # training image generator. in this generator I am modifying the training images each iteration # so as to prevent overfitting during training. validation images are not modified. train_Image_generator = ImageDataGenerator(rescale=1. / 255, zoom_range=0.3, rotation_range=6, width_shift_range=0.2, height_shift_range=0.2, shear_range=0.2, horizontal_flip=True, fill_mode='nearest') # validation image generator. these images are not modified. val_Image_generator = ImageDataGenerator(rescale=1. / 255, validation_split=0.25) # test image generator. these images are not modified. test_Image_generator = ImageDataGenerator(rescale=1. / 255) if self.directoryType == 'FlowFromDirectory': # generating the training images and converting them into data usable by the classification algorithm self.train_data_gen = train_Image_generator.flow_from_directory( batch_size=self.batch_size, directory=self.train_dir, shuffle=True, # images will be shuffled each iteration color_mode="rgb", target_size=(self.img_height, self.img_width), class_mode=self.classMode) # generating the validation images self.validation_data_gen = val_Image_generator.flow_from_directory( batch_size=self.batch_size, directory=self.val_dir, color_mode="rgb", target_size=(self.img_height, self.img_width), class_mode=self.classMode) # generating the test images which are seperate from train and validation images # which the algorithm will have not seen self.test_data_gen = test_Image_generator.flow_from_directory( directory=self.test_dir, color_mode="rgb", target_size=(self.img_height, self.img_width), class_mode=self.classMode, shuffle=False) #save_to_dir = path+'\\testImagesPredicted', #save_format = 'jpeg') elif self.directoryType == 'Flow': self.train_data_dataframe = train_Image_generator.flow_from_dataframe( dataframe=self.trainLabels, directory=self.train_dir, validate_filenames=False, x_col=self.target, y_col=list(self.trainLabels. loc[:, self.trainLabels.columns != self.target]), class_mode='raw', subset='training', batch_size=self.batch_size, target_size=(self.img_height, self.img_width), shuffle=True) self.val_data_dataframe = val_Image_generator.flow_from_dataframe( dataframe=self.trainLabels, directory=self.train_dir, validate_filenames=False, x_col=self.target, y_col=list(self.trainLabels. loc[:, self.trainLabels.columns != self.target]), class_mode='raw', subset='validation', batch_size=self.batch_size, target_size=(self.img_height, self.img_width), shuffle=True)
print(df) columns = ["Überschrift der Spalte mit den Schadensklassen"] datagen = ImageDataGenerator(preprocessing_function=preprocess_input, rotation_range=40, width_shift_range=0.2, height_shift_range=0.2, shear_range=0.2, zoom_range=0.2, horizontal_flip=True, fill_mode='nearest') test_datagen = ImageDataGenerator(preprocessing_function=preprocess_input) train_generator = datagen.flow_from_dataframe( dataframe=df[:130], directory="Ordner mit allen Bildern", x_col="Überschrift der Spalte mit den Dateinamen", y_col=columns, batch_size=25, shuffle=True, class_mode="other", target_size=(300, 300)) validation_generator = test_datagen.flow_from_dataframe( dataframe=df[130:160], directory="Ordner mit allen Bildern", x_col="Überschrift der Spalte mit den Dateinamen", y_col=columns, batch_size=32, shuffle=True, class_mode="other", target_size=(300, 300)) test_generator = test_datagen.flow_from_dataframe( dataframe=df[160:],
replace=True, p=[ group_proportions['train'], group_proportions['validate'], group_proportions['test'] ]) train_data_df = data_dict_df.iloc[np.where(group_assignments == 'train')] valid_data_df = data_dict_df.iloc[np.where(group_assignments == 'validate')] test_data_df = data_dict_df.iloc[np.where(group_assignments == 'test')] train_generator = train_data_gen.flow_from_dataframe( dataframe=train_data_df # subset to use for training , directory=images_location, x_col='filename', y_col=label_colnames, batch_size=1, seed=69, shuffle=True, class_mode='raw', target_size=img_size_for_model) valid_generator = valid_test_data_gen.flow_from_dataframe( dataframe=valid_data_df # subset to use for validation , directory=images_location, x_col='filename', y_col=label_colnames, batch_size=1, seed=69, shuffle=True,
def main(): ### Parse Arguments ### parser = ArgumentParser() parser.add_argument('--csv_path', type=str) parser.add_argument("--epoch", type=int, default=100) parser.add_argument('--output_path', type=str) parser.add_argument('--dropout_rate', type=float, default=0.4) parser.add_argument('--padding', type=str, default='same') parser.add_argument( '--imagedir', type=str, default='/media/nfs/CXR/NIH/chest_xrays/NIH/data/images_1024x1024/') args = parser.parse_args() print(args) ### Load the data frame and Image Generator ### train_path = os.path.join(args.csv_path, 'train.csv') valid_path = os.path.join(args.csv_path, 'test.csv') train = pd.read_csv(train_path) valid = pd.read_csv(valid_path) ### Select Normal Images ### train = select_normal(train) valid = select_normal(valid) datagen = ImageDataGenerator(rescale=1. / 255) train_generator = datagen.flow_from_dataframe(dataframe=train, directory=args.imagedir, x_col='Image Index', class_mode='input', batch_size=32, color_mode="grayscale", target_size=(224, 224)) valid_generator = datagen.flow_from_dataframe(dataframe=valid, directory=args.imagedir, x_col='Image Index', class_mode='input', batch_size=32, color_mode="grayscale", target_size=(224, 224)) ### Load Some Sample Image ### from glob import glob from skimage.io import imread imgs = [] imgs_sample = train.sample(10) for idx, row in imgs_sample.iterrows(): image = cv2.imread( "/media/nfs/CXR/NIH/chest_xrays/NIH/data/images_1024x1024/" + imgs_sample.loc[idx, "Image Index"], 0) image = cv2.resize(image, (224, 224), interpolation=cv2.INTER_CUBIC) image = image / 255. imgs.append(np.array(image)) imgs = np.array(imgs) print(imgs.shape) ### Train model ### AE = CNN_AE() model = AE.get_model() model.compile(loss=root_mean_squared_error, optimizer='adadelta', metrics=['accuracy']) print(model.summary) modelCallback = SaveImageNModel(imgs) model.fit_generator(generator=train_generator, steps_per_epoch=250, epochs=args.epoch, shuffle=True, callbacks=[modelCallback], validation_data=valid_generator, validation_steps=15)
] for i in range(len(extract)): create_spectrogram(extract[i], extract[i].split('/')[5].split('.')[0]) traindf = create_df(extract, 'train') from keras_preprocessing.image import ImageDataGenerator datagen = ImageDataGenerator(rescale=1. / 255., validation_split=0.25) train_generator = datagen.flow_from_dataframe( dataframe=traindf, directory="/home/shubrah/ml/pad/train/", x_col="filename", y_col="class", subset="training", batch_size=32, seed=42, shuffle=True, class_mode="categorical", target_size=(64, 64)) valid_generator = datagen.flow_from_dataframe( dataframe=traindf, directory="/home/shubrah/ml/pad/train/", x_col="filename", y_col="class", subset="validation", batch_size=32, seed=42, shuffle=True,
directory='/data1/visionlab/data/EyePACS_2015/256-EyePACS-all', x_col="image", y_col="level", has_ext=False, batch_size=32, seed=42, shuffle=True, class_mode="other", target_size=(256, 256)) valid_generator = val_datagen.flow_from_dataframe( dataframe=val_df, directory='/data1/visionlab/data/EyePACS_2015/256-EyePACS-all', x_col="image", y_col="level", has_ext=False, batch_size=32, seed=42, shuffle=True, class_mode="other", target_size=(256, 256)) STEP_SIZE_TRAIN = train_generator.n // train_generator.batch_size STEP_SIZE_VALID = valid_generator.n // valid_generator.batch_size balance_weights = K.variable( class_weight.compute_class_weight('balanced', np.unique(train_df.level), train_df.level)) callbacks = [ Resample_Iterator(balance_weights),
vertical_flip=True, fill_mode='nearest', shear_range=0.1, height_shift_range=0.1, width_shift_range=0.1) validation_idg = ImageDataGenerator(validation_split=0.2) for train_index, val_index in skf.split(np.zeros(n), Y): training_data = train_labels.iloc[train_index] validation_data = train_labels.iloc[val_index] train_generator = train_idg.flow_from_dataframe(training_data, directory=os.path.join(WORK_DIR, "train_images"), subset="training", x_col="image_id", y_col="label", target_size=(TARGET_SIZE, TARGET_SIZE), batch_size=BATCH_SIZE, class_mode="sparse") validation_generator = validation_idg.flow_from_dataframe(validation_data, directory=os.path.join(WORK_DIR, "train_images"), subset="validation", x_col="image_id", y_col="label", target_size=(TARGET_SIZE, TARGET_SIZE), batch_size=BATCH_SIZE, class_mode="sparse") model = create_model() model.summary()
from keras.preprocessing.image import ImageDataGenerator from keras.preprocessing.image import load_img #print("intento cnn mia") #print("intento cnn1") print("TODO") meld = pd.read_csv("mel.csv") df = pd.read_csv("./traindir/newtrain.csv") dftrain = pd.read_csv("newsampl.csv") columns = ["MEL", "NV", "BCC", "AKIEC", "BKL", "DF", "VASC"] datagen = ImageDataGenerator(rescale=1. / 255.) test_datagen = ImageDataGenerator(rescale=1. / 255.) train_generator = datagen.flow_from_dataframe( dataframe=dftrain, ###dftrain directory="./traindir/train", x_col="image", y_col=columns, batch_size=32, seed=42, shuffle=True, class_mode="other", target_size=(350, 350)) valid_generator = test_datagen.flow_from_dataframe( dataframe=df[4500:5500], directory="./traindir/train", x_col="image", y_col=columns, batch_size=32, seed=42, shuffle=True, class_mode="other", target_size=(350, 350))