def diagnose(): IMAGE_WIDTH = 128 IMAGE_HEIGHT = 128 IMAGE_SIZE = (IMAGE_WIDTH, IMAGE_HEIGHT) IMAGE_CHANNELS = 3 test_filenames = os.listdir( "/home/sheila/Desktop/Learning_data_science/crop check/images/test1") test_df = pd.DataFrame({'filename': test_filenames}) nb_samples = test_df.shape[0] batch_size = 15 test_gen = ImageDataGenerator(rescale=1. / 255) test_generator = test_gen.flow_from_dataframe( test_df, "/home/sheila/Desktop/Learning_data_science/crop check/images/test1/", x_col='filename', y_col=None, class_mode=None, target_size=IMAGE_SIZE, batch_size=batch_size, shuffle=False) predict = model.predict_generator(test_generator, steps=np.ceil(nb_samples / batch_size)) 9 + 90 test_df['category'] = np.argmax(predict, axis=-1) d = test_df.to_dict(orient='records') j = json.dumps(d) submission_df = test_df.copy() submission_df['id'] = submission_df['filename'].str.split('.').str[0] submission_df['label'] = submission_df['category'] submission_df.drop(['filename', 'category'], axis=1, inplace=True) submission_df.to_csv('submission_test1.csv', index=False) print(j) # return [j[key] for key in sorted(j.keys())] return j
def get_train_generator(df, image_dir, x_col, y_cols, shuffle=True, batch_size=8, seed=1, target_w = 320, target_h = 320): print("getting train generator...") # normalize images image_generator = ImageDataGenerator( samplewise_center=True, samplewise_std_normalization= True) # flow from directory with specified batch size # and target image size generator = image_generator.flow_from_dataframe( dataframe=df, directory=image_dir, x_col=x_col, y_col=y_cols, class_mode="raw", batch_size=batch_size, shuffle=shuffle, seed=seed, target_size=(target_w,target_h)) return generator
def input_loads(): df = pd.read_csv(label_dir) df['file_id'] = df['file_id'].astype('str') df['file_id'] = df['file_id'].apply(lambda x: x + '.png') df['accent'] = df['accent'].astype('str') # df['accent'] = df['accent'].map(onehot) # label = pd.DataFrame(df['accent'].map(onehot).tolist(), columns=['0', '1', '2']) # df = pd.concat([df['file_id'], label], axis=1) print(df.head()) datagen = ImageDataGenerator(rescale=1. / 255) train_generator = datagen.flow_from_dataframe(dataframe=df, directory=train_dir, x_col='file_id', y_col='accent', class_mode='categorical', target_size=(128, 174), batch_size=32) return train_generator
def trainGenerator(batch_size, train_path, data_frame, aug_dict, image_color_mode="grayscale", mask_color_mode="grayscale", flag_multi_class=False, save_to_dir=None, target_size=(256,256), seed=1): ''' if you want to visualize the results of generator, set save_to_dir = "your path" ''' image_datagen = ImageDataGenerator(**aug_dict) image_generator = image_datagen.flow_from_dataframe( data_frame, directory = train_path, x_col = 'field', y_col = 'lai', target_size = target_size, color_mode = image_color_mode, class_mode = 'other', batch_size = batch_size, seed = seed, save_to_dir = save_to_dir ) return image_generator
def test_model(data, model, class_cols, model_path): """Tests models that have already been initialized and trained. Parameters ---------- data : :obj:`pandas.core.frame.DataFrame` The data frame containing testing data formatted for use with :func:`ImageDataGenerator.flow_from_dataframe` model : :obj:`keras.models.Model` The model to test. class_cols : list of str The dataframe columns containing the classes. For use with multi-output models. model_path : str Path where model is saved. Used to store test results with model. Returns ------- score : list of float The model's test scores. """ testgen = ImageDataGenerator() testgen = testgen.flow_from_dataframe(data, directory=MODULE_PATH, x_col='imgpath', y_col=class_cols, batchsize=BATCH_SIZE, target_size=INPUT_DIM, class_mode='other') test_step_size = testgen.n // testgen.batch_size score = model.evaluate_generator(testgen, steps=test_step_size, verbose=1) with open(os.path.join(model_path, 'test.results'), 'a') as f_res: print("loss: {0:.4f}".format(score[0]), file=f_res) print("acc: {0:.4f}".format(score[1]), file=f_res) return score
def get_train_generator(df, image_dir, x_col, y_cols, shuffle=True, batch_size=8, seed=1, target_w = 320, target_h = 320): """ Return generator for training set, normalizing using batch statistics. Args: train_df (dataframe): dataframe specifying training data. image_dir (str): directory where image files are held. x_col (str): name of column in df that holds filenames. y_cols (list): list of strings that hold y labels for images. sample_size (int): size of sample to use for normalization statistics. batch_size (int): images per batch to be fed into model during training. seed (int): random seed. target_w (int): final width of input images. target_h (int): final height of input images. Returns: train_generator (DataFrameIterator): iterator over training set """ print("getting train generator...") # normalize images image_generator = ImageDataGenerator( samplewise_center=True, samplewise_std_normalization= True) # flow from directory with specified batch size # and target image size generator = image_generator.flow_from_dataframe( dataframe=df, directory=image_dir, x_col=x_col, y_col=y_cols, class_mode="raw", batch_size=batch_size, shuffle=shuffle, seed=seed, target_size=(target_w,target_h)) return generator
def extract_train_features(pitcher,sample_count,train_df,pitch_type): train_datagen = ImageDataGenerator( rescale=1./255, width_shift_range=0.2, height_shift_range=0.2, shear_range=0.2, zoom_range=0.2, fill_mode='nearest') batch_size = 20 train_generator = train_datagen.flow_from_dataframe( dataframe = train_df, directory = f'../data/{pitcher}/image/combined/', x_col = 'file_name', y_col = f'{pitch_type}', class_mode = 'binary', batch_size = batch_size, shuffle = False # target_size=(1280,720) ) features = np.zeros(shape=(sample_count, 8, 8, 512)) labels = np.zeros(shape=(sample_count)) i = 0 for inputs_batch, labels_batch in train_generator: features_batch = conv_base.predict(inputs_batch) features[i * batch_size : (i + 1) * batch_size] = features_batch labels[i * batch_size : (i + 1) * batch_size] = labels_batch i += 1 if i * batch_size >= sample_count: # Note that since generators yield data indefinitely in a loop, # we must `break` after every image has been seen once. break features = np.reshape(features, (sample_count, 8*8*512)) return features, labels
def get_data_generator(dataframe, x_col, y_col, subset=None, shuffle=True, batch_size=16, class_mode="categorical"): datagen = ImageDataGenerator( rotation_range=15, rescale=1./255, shear_range=0.1, zoom_range=0.2, horizontal_flip=False, width_shift_range=0.1, height_shift_range=0.1) data_generator = datagen.flow_from_dataframe( dataframe=dataframe, x_col=x_col, y_col=y_col, subset=subset, target_size=(width, height), class_mode=class_mode, # color_mode="rgb", batch_size=batch_size, shuffle=shuffle, ) return data_generator
def df_to_generators(self, df: DataFrame, path: str, mode: str): y_col = None if mode == 'test' else 'class' class_mode = None if mode == 'test' else 'categorical' preprocessing_function = get_random_eraser(v_l=0, v_h=255) if mode == 'train' else None datagen = ImageDataGenerator(preprocessing_function=preprocessing_function) generator = datagen.flow_from_dataframe( dataframe=df, directory=os.path.abspath(path), x_col='img_file', y_col=y_col, target_size=self.img_size, color_mode='rgb', class_mode=class_mode, batch_size=self.batch_size, seed=self.seed, shuffle=mode != 'test' ) return generator
def test(args, ckpt_file): print("Currently processing fold ", FOLD) output_directory = "{}{}/".format(args["OUTPUT_DIRECTORY"], FOLD) test_label_file = "{}test_subset{}.csv".format(args["LABEL_DIRECTORY"], FOLD) test_dataframe = pd.read_csv(test_label_file) test_dataframe["Absolutefilename"] = (args["IMG_DIRECTORY"] + test_dataframe["Filename"]) test_image_count = test_dataframe.shape[0] # No testing image augmentation (except for converting pixel values to floats) test_data_generator = ImageDataGenerator(rescale=1.0 / 255) # Load test images in batches from directory and apply rescaling test_data_generator = test_data_generator.flow_from_dataframe( test_dataframe, args["IMG_DIRECTORY"], x_col="Absolutefilename", y_col="Label", target_size=args["IMG_SIZE"], batch_size=args["BATCH_SIZE"], has_ext=True, shuffle=False, classes=args["CLASSES_STR"], class_mode="categorical", ) # Load the last best model model = load_model(output_directory + ckpt_file) # Evaluate model on test subset for kth fold predictions = model.predict_generator( test_data_generator, test_image_count // args["BATCH_SIZE"] + 1) y_true = test_data_generator.classes y_pred = np.argmax(predictions, axis=1) y_pred[ np.max(predictions, axis=1) < 1 / 9] = 8 # Assign predictions worse than random guess to negative class return {"predictions": y_pred, "labels": y_true}
def inference(csv_file): json_file = open('model_isolation.json', 'r') loaded_model_json = json_file.read() json_file.close() loaded_model = model_from_json(loaded_model_json) loaded_model.load_weights("model_wieghts_isolation.h5") df = pd.read_csv(csv_file, header=None, names=["id", "label"], dtype=str) df = df.replace({ 'true': 'isolation', 'false': 'noaction', 'True': 'isolation', 'False': 'noaction' }) test_datagen = ImageDataGenerator(rescale=1. / 255) valid_generator = test_datagen.flow_from_dataframe( dataframe=df, directory="./", x_col="id", y_col="label", class_mode="categorical", batch_size=1, target_size=(320, 180)) loaded_model.compile(loss='binary_crossentropy', optimizer=optimizers.RMSprop(lr=2e-5), metrics=['acc']) result1 = loaded_model.predict_generator(valid_generator, steps=valid_generator.samples) result = [] result1 = result1.argmax(axis=1) for i in result1: if i == 0: result.append('isolation') else: result.append('noaction') accuracy = (df['label'].values == result).mean() ans = precision_recall_fscore_support(df['label'].values, result, average='macro') return {'accuracy': accuracy, 'recall': ans[1], 'precision': ans[0]}
def train_generator_from_dataframe(dataframe_keras_master, batch_size, classes): train_datagen = ImageDataGenerator(rescale=1. / 255, rotation_range=20, width_shift_range=0.2, height_shift_range=0.2, horizontal_flip=True, validation_split=0.2) train_gen = train_datagen.flow_from_dataframe( dataframe=dataframe_keras_master, directory=None, x_col="origin", y_col="class_name", classes=classes, batch_size=batch_size, target_size=(299, 299), class_mode="categorical", subset="training", shuffle=True, validate_filenames=False) return [train_datagen, train_gen]
def img_flow(csv_file,base_path): datagen = ImageDataGenerator( rescale=1. / 255, fill_mode="reflect", shear_range=0.2, # zoom_range=(0.5, 1), horizontal_flip=True, rotation_range=10, channel_shift_range=10, brightness_range=(0.85, 1.15)) return datagen.flow_from_dataframe( dataframe=pd.read_csv(csv_file), directory=base_path, x_col='Filename', y_col='Label', class_mode='categorical', target_size=RAW_IMG_SIZE, batch_size=BATCH_SIZE, classes=CLASS_NAMES, shuffle=True, seed=123 )
def get_prediction(filepath): dir = os.path.dirname(filepath) file_name = os.path.basename(filepath) df = pd.DataFrame([{'file_path': file_name}]) loaded_model = load_model(os.path.abspath('./model.h5')) test_datagen = ImageDataGenerator(rescale=1. / 255.) test_gen = test_datagen.flow_from_dataframe(dataframe=df, directory=dir, x_col="file_path", y_col=None, batch_size=1, class_mode=None, target_size=(64, 64)) pred = loaded_model.predict_generator(test_gen, steps=1, verbose=1) emotions = [ 'angry', 'fearful', 'disgust', 'sad', 'surprised', 'happy', 'calm', 'neutral' ] # TODO: Find a better solution for this. Google Cloud will probably do this for us anyway... mapped_emotions = { 'angry': round_pred(pred[0][0]), 'fearful': round_pred(pred[0][1]), 'disgust': round_pred(pred[0][2]), 'sad': round_pred(pred[0][3]), 'surprised': round_pred(pred[0][4]), 'happy': round_pred(pred[0][5]), 'calm': round_pred(pred[0][6]), 'neutral': round_pred(pred[0][7]) } return emotions[np.argmax(pred, axis=1)[0]], mapped_emotions
def generate_validation_iterator(self): # IMAGES validation_images_path = './res/ILSVRC2012_img_val/val/' validations_labels_path = './res/ILSVRC2012_devkit_t12/data/ILSVRC2012_validation_ground_truth.txt' validation_x = [ f for f in listdir(validation_images_path) if isfile(join(validation_images_path, f)) ] validation_x.sort() # LABELS with open(validations_labels_path) as f: content = f.readlines() # you may also want to remove whitespace characters like `\n` at the end of each line validation_y = [in_classes[int(x.strip())][0] for x in content] # merge images and labels validation_sequence = [[validation_x[i], validation_y[i]] for i in range(0, len(validation_x))] validation_dataframe = pd.DataFrame(validation_sequence, columns=['x', 'y']) # create generator datagen = ImageDataGenerator() valid_it = datagen.flow_from_dataframe( dataframe=validation_dataframe, directory=validation_images_path, x_col='x', y_col='y', target_size=TARGET_SIZE, class_mode="categorical", color_mode='rgb', batch_size=BATCH_SIZE) return valid_it
def image_feature_extraction(df): ''' images: A numpy 4D array of shape (no. of examples,299,299,3) ''' if os.path.exists("../data/image_features.npy"): ans = input("Feature already exists,do you want to re run it?(y/n)\n") if ans == "n": return else: pass file_list = os.listdir('../data/train_images') print(len(file_list)) test_datagen = ImageDataGenerator(rescale=1. / 255) test_generator = test_datagen.flow_from_dataframe( dataframe=df, directory="../data/train_images/", x_col="image", y_col=None, batch_size=8, shuffle=False, seed=123, class_mode=None, target_size=(229, 229)) filenames = test_generator.filenames nb_samples = len(filenames) print(nb_samples) model = keras.applications.inception_v3.InceptionV3(include_top=True, weights='imagenet', pooling='avg') model.layers.pop() model = Model(model.input, model.layers[-1].output) image_features = model.predict_generator(test_generator, steps=np.ceil(nb_samples / 8), use_multiprocessing=False, verbose=1) print(image_features.shape) np.save("../data/image_features.npy", image_features)
def get_predictions(model_path, draws_path): result = '' model = load_model(model_path) draws = os.listdir(draws_path) filenames_test = [] categories_test = [] for file in draws: filenames_test.append(file) categories_test.append('') df_draws = pd.DataFrame({ 'filename': filenames_test, 'category': categories_test }) test_datagen = ImageDataGenerator(rescale=1. / 255) for i in range(df_draws.shape[0]): with suppress_stdout(): df_draw = df_draws.iloc[[i]] draw_generator = test_datagen.flow_from_dataframe( df_draw, draws_path, x_col='filename', y_col='category', target_size=(28, 28), class_mode='categorical', color_mode='grayscale') predict = model.predict_generator(draw_generator, steps=1) prediction = predict.argmax() balance = '' for b in range(len(predict[0])): balance += str('{:0.2f}'.format(predict[0, b] * 100)) + '\n' result += df_draw.iloc[0, 0] + '\n' + str( prediction) + '\n' + balance + '\n' return result
def create_image_generator(self, df, x_col, base_directory, batch_size=32, target_size=(64, 64), rotation_range=15, width_shift_range=0.05, height_shift_range=0.05, shear_range=0.05, zoom_range=0.1, horizontal_flip=True): image_generator_settings = ImageDataGenerator( rescale=1. / 255., rotation_range=rotation_range, width_shift_range=width_shift_range, height_shift_range=height_shift_range, shear_range=shear_range, zoom_range=zoom_range, horizontal_flip=horizontal_flip, fill_mode='nearest') y_col = list(df.columns) y_col.remove(x_col) image_generator = image_generator_settings.flow_from_dataframe( dataframe=df, directory=base_directory, x_col=x_col, y_col=y_col, target_size=target_size, batch_size=batch_size, class_mode='raw') return image_generator
def predictGenerator(batch_size, dataframe, x_col='file_gfp', image_color_mode="grayscale", target_size=(256, 256)): ''' can generate image and mask at the same time use the same seed for image_datagen and mask_datagen to ensure the transformation for image and mask is the same if you want to visualize the results of generator, set save_to_dir = "your path" ''' image_datagen = ImageDataGenerator( rescale=1. / 255) #,samplewise_center=True,samplewise_std_normalization=True) image_generator = image_datagen.flow_from_dataframe( dataframe, directory=None, class_mode=None, x_col=x_col, color_mode=image_color_mode, target_size=target_size, batch_size=batch_size, shuffle=False) return image_generator
def get_test_and_valid_generator(valid_df, test_df, train_df, image_dir, x_col, y_cols, sample_size=100, batch_size=8, seed=1, target_w=320, target_h=320): """ Return generator for validation set and test test set using normalization statistics from training set. Args: valid_df (dataframe): dataframe specifying validation data. test_df (dataframe): dataframe specifying test data. train_df (dataframe): dataframe specifying training data. image_dir (str): directory where image files are held. x_col (str): name of column in df that holds filenames. y_cols (list): list of strings that hold y labels for images. sample_size (int): size of sample to use for normalization statistics. batch_size (int): images per batch to be fed into model during training. seed (int): random seed. target_w (int): final width of input images. target_h (int): final height of input images. Returns: test_generator (DataFrameIterator) and valid_generator: iterators over test set and validation set respectively """ print("getting train and valid generators...") # get generator to sample dataset raw_train_generator = ImageDataGenerator().flow_from_dataframe( dataframe=train_df, directory=IMAGE_DIR, x_col="Image", y_col=labels, class_mode="raw", batch_size=sample_size, shuffle=True, target_size=(target_w, target_h)) # get data sample batch = raw_train_generator.next() data_sample = batch[0] # use sample to fit mean and std for test set generator image_generator = ImageDataGenerator(featurewise_center=True, featurewise_std_normalization=True) # fit generator to sample from training data image_generator.fit(data_sample) # get test generator valid_generator = image_generator.flow_from_dataframe( dataframe=valid_df, directory=image_dir, x_col=x_col, y_col=y_cols, class_mode="raw", batch_size=batch_size, shuffle=False, seed=seed, target_size=(target_w, target_h)) test_generator = image_generator.flow_from_dataframe( dataframe=test_df, directory=image_dir, x_col=x_col, y_col=y_cols, class_mode="raw", batch_size=batch_size, shuffle=False, seed=seed, target_size=(target_w, target_h)) return valid_generator, test_generator
all_data_info_true300_count = all_data_info_true300.groupby('artist').count() print(all_data_info_true300_count.shape) artist_list = all_data_info_true300_count.index.values.tolist() ## Image processing to get the starting data for training the model from keras.preprocessing.image import ImageDataGenerator df = all_data_info_true300.loc[:, ['artist', 'new_filename']] train_datagen = ImageDataGenerator(horizontal_flip=True) valid_datagen = ImageDataGenerator(horizontal_flip=False) #featurewise_center=True 0-center #featurewise_std_normalization normalize train_generator = train_datagen.flow_from_dataframe(df,\ "data/train", \ target_size=(224, 224), x_col='new_filename',\ y_col='artist', has_ext=True, seed=100) #Found 13680 images belonging to 57 classes. valid_generator = valid_datagen.flow_from_dataframe(df,\ "data/valid",\ target_size=(224, 224), x_col='new_filename',\ y_col='artist', has_ext=True, seed=100) #Found 1710 images belonging to 57 classes. #color_mode='rgb' default #has_ext has been deprecated, extensions included #class_mode= default categorical #batch_size: size of the batches of data (default: 32) STEP_SIZE_TRAIN = train_generator.n // train_generator.batch_size
shuffle=True batch_size=16 img_size = 224 bntk_input = (img_size, img_size, 3) kelas = len(np.unique(label)) sss = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=0) # sss = StratifiedShuffleSplit(n_splits=5, test_size=0.2, random_state=0) for train_index, val_index in sss.split(filename, label): training_data = imgData.iloc[train_index] validation_data = imgData.iloc[val_index] clear_session() train_generator = image_generator.flow_from_dataframe( training_data, # directory = image_dir, batch_size=batch_size, x_col = "filepaths", y_col = "labels", class_mode = "categorical", shuffle = True) validation_generator= image_generator2.flow_from_dataframe( validation_data, # directory = image_dir, batch_size=batch_size, x_col = "filepaths", y_col = "labels", class_mode = "categorical", shuffle = True) print('------------------------------------------------------------------------') print(f'Training for fold {fold_no} ...')
import numpy as np from keras.layers import InputLayer traindf = pd.read_csv("Horror_Test.csv", dtype=str) valid_datagen = ImageDataGenerator(rescale=1. / 255., validation_split=0.05) test_datagen = ImageDataGenerator(rescale=1. / 255., validation_split=0.15) train_datagen = ImageDataGenerator(rescale=1. / 255, shear_range=0.2, zoom_range=0.2, horizontal_flip=True) train_generator = train_datagen.flow_from_dataframe(dataframe=traindf, directory="Horror_images", x_col='Image_cropped', y_col='Emotion', subset="training", batch_size=64, seed=42, shuffle=True, class_mode="categorical", target_size=(299, 299)) valid_generator = valid_datagen.flow_from_dataframe(dataframe=traindf, directory="Horror_images", x_col="Image_cropped", y_col="Emotion", subset="validation", batch_size=64, seed=42, shuffle=True, class_mode="categorical", target_size=(299, 299))
model.add(Dropout(0.5)) model.add(Dense(3)) model.add(Activation('softmax')) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) train_datagen = ImageDataGenerator(fill_mode='nearest', rescale=1. / 255) test_datagen = ImageDataGenerator(fill_mode='nearest', rescale=1. / 255) train_generator = train_datagen.flow_from_dataframe(dataframe=csv, directory="./dataset", x_col="image", y_col="category", target_size=(img_height, img_width), batch_size=batch_size, class_mode='categorical') label_map = (train_generator.class_indices) print(label_map) validation_generator = test_datagen.flow_from_dataframe( dataframe=csv, directory="./dataset", x_col="image", y_col="category", target_size=(img_height, img_width), batch_size=batch_size, class_mode='categorical')
def cnn_model(channels, nb_epoch, batch_size, nb_classes, nb_gpus, cl_weights, leakiness, w_regu, b_regu, initializer, img_height, img_width, labels_train, train_data_dir): ''' #Sample data+labels path sample_data_dir = r"D:\Final Year Project\sample_cnn\sample.npy" sample_label = r"D:\Final Year Project\sample_cnn\sample.csv" sample_data= np.load(sample_data_dir) sam_labels = pd.read_csv(sample_label) sam_labels = sam_labels.values ''' input_shape = (img_height, img_width, channels) ''' Conv2D takes a 4D tensor as input_shape but we need to pass only 3D while keras takes care of batch size on its own so pass (img_height,img_width,channels) not (batch_size,img_height,img_width,channels) ''' model = Sequential() model.add( Conv2D( 32, (3, 3), strides=(1, 1), input_shape=input_shape, padding='same', )) model.add(BatchNormalization(axis=-1)) model.add(LeakyReLU(alpha=leakiness)) model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2))) model.add(Conv2D( 64, (3, 3), strides=(1, 1), padding='same', )) model.add(BatchNormalization(axis=-1)) model.add(LeakyReLU(alpha=leakiness)) model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2))) #model.add(Dropout(0.20)) model.add(Conv2D( 128, (3, 3), strides=(1, 1), padding='same', )) model.add(BatchNormalization(axis=-1)) model.add(LeakyReLU(alpha=leakiness)) #model.add(Dropout(0.20)) model.add(Conv2D( 64, (1, 1), strides=(1, 1), padding='same', )) model.add(BatchNormalization(axis=-1)) model.add(LeakyReLU(alpha=leakiness)) model.add(Conv2D( 128, (3, 3), strides=(1, 1), padding='same', )) model.add(BatchNormalization(axis=-1)) model.add(LeakyReLU(alpha=leakiness)) model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2))) model.add(Conv2D( 256, (3, 3), strides=(1, 1), padding='same', )) model.add(BatchNormalization(axis=-1)) model.add(LeakyReLU(alpha=leakiness)) model.add(Conv2D( 128, (1, 1), strides=(1, 1), padding='same', )) model.add(BatchNormalization(axis=-1)) model.add(LeakyReLU(alpha=leakiness)) #model.add(Dropout(0.20)) model.add(Conv2D( 256, (3, 3), strides=(1, 1), padding='same', )) model.add(BatchNormalization(axis=-1)) model.add(LeakyReLU(alpha=leakiness)) model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2))) #model.add(Dropout(0.4)) model.add(Conv2D( 512, (3, 3), strides=(1, 1), padding='same', )) model.add(BatchNormalization(axis=-1)) model.add(LeakyReLU(alpha=leakiness)) model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2))) ## model.add(Dropout(0.25)) model.add(Flatten()) model.add(Dense(1024)) model.add(LeakyReLU(alpha=leakiness)) model.add(Dropout(0.3)) model.add(Dense(1024)) model.add(LeakyReLU(alpha=leakiness)) model.add(Dropout(0.3)) model.add(Dense(classes, activation='softmax')) model.summary() ggwp sgd = SGD(lr=0.0001, momentum=0.9, decay=0, nesterov=True) model.compile(optimizer=sgd, loss='mean_squared_error', metrics=['accuracy']) tensorboard = TensorBoard(log_dir='log/', histogram_freq=0, write_graph=True, write_images=True) stop = EarlyStopping(monitor='loss', patience=0, verbose=2, mode='auto') model_chkpt = ModelCheckpoint( filepath= 'saved_model/best_model/best_model_weights_Epoch_{epoch:02d}-ValLoss_{val_loss:.2f}.h5', monitor='val_loss', save_best_only=True) train_datagen = ImageDataGenerator(rescale=1. / 255, validation_split=0.2) train_generator = train_datagen.flow_from_dataframe( labels_train, train_data_dir, x_col='train_image_name', y_col='level', has_ext=True, target_size=(img_height, img_width), class_mode='categorical', batch_size=batch_size, subset='training') validation_generator = train_datagen.flow_from_dataframe( labels_train, train_data_dir, x_col='train_image_name', y_col='level', has_ext=True, target_size=(img_height, img_width), class_mode='categorical', batch_size=batch_size, subset='validation') STEP_SIZE_TRAIN = train_generator.n // train_generator.batch_size STEP_SIZE_VALID = validation_generator.n // validation_generator.batch_size model.fit_generator(train_generator, steps_per_epoch=STEP_SIZE_TRAIN, validation_data=validation_generator, validation_steps=STEP_SIZE_VALID, epochs=nb_epoch, class_weight=cl_weights, verbose=1, callbacks=[stop, tensorboard, model_chkpt]) return model, validation_generator, train_generator
batch_size = 15 # In[15]: # training generator train_datagen = ImageDataGenerator(rescale=1. / 255, shear_range=0.1, zoom_range=0.2, rotation_range=15, horizontal_flip=True, width_shift_range=0.1, height_shift_range=0.1) train_generator = train_datagen.flow_from_dataframe(train_df, 'data/train/train/', x_col='filename', y_col='category', target_size=(64, 64), class_mode='categorical', batch_size=15) # In[16]: # Validation generator validate_datagen = ImageDataGenerator(rescale=1. / 225) validate_generator = validate_datagen.flow_from_dataframe( validate_df, directory='data/train/train/', x_col='filename', y_col='category', target_size=(64, 64), batch_size=15,
# The `image_generator` you created above will act to adjust your image data such that the new mean of the data will be zero, and the standard deviation of the data will be 1. # # In other words, the generator will replace each pixel value in the image with a new value calculated by subtracting the mean and dividing by the standard deviation. # # $$\frac{x_i - \mu}{\sigma}$$ # # Run the next cell to pre-process your data using the `image_generator`. In this step you will also be reducing the image size down to 320x320 pixels. # In[12]: # Flow from directory with specified batch size and target image size generator = image_generator.flow_from_dataframe( dataframe=train_df, directory="nih/images-small/", x_col="Image", # features y_col=['Mass'], # labels class_mode="raw", # 'Mass' column should be in train_df batch_size=1, # images per batch shuffle=False, # shuffle the rows or not target_size=(320, 320) # width and height of output image ) # Run the next cell to plot up an example of a pre-processed image # In[13]: # Plot a processed image sns.set_style("white") generated_image, label = generator.__getitem__(0) plt.imshow(generated_image[0], cmap='gray') plt.colorbar() plt.title('Raw Chest X Ray Image')
pd_1 = pd.DataFrame(list( zip(col_1, col_3, col_4, col_5, col_6, col_7, col_8)), columns=['id_1', 'x', 'y', 'z', 'qx', 'qy', 'qz']) pd_2 = pd.DataFrame(list(zip(col_2, col_3)), columns=['id_2', 'labels']) return pd_1, pd_2 df_1, df_2 = create_df() in_gen_1 = ImageDataGenerator() in_gen_2 = ImageDataGenerator() in_gen_1 = in_gen_1.flow_from_dataframe( df_1, directory='/home/diego/my_project_dir/my_proj_env/flowfrom/images/', x_col="id_1", y_col=['x', 'y', 'z', 'qx', 'qy', 'qz'], target_size=(100, 300), batch_size=2, shuffle=False, class_mode='multi_output', color_mode='rgb') in_gen_2 = in_gen_2.flow_from_dataframe( df_2, directory='/home/diego/my_project_dir/my_proj_env/flowfrom/images/', x_col="id_2", y_col='labels', target_size=(100, 300), batch_size=2, shuffle=False, class_mode='raw', color_mode='rgb')
def main(config=None): trial_name = os.path.splitext(__file__)[0] model_filename = os.path.sep.join(["output", trial_name, "model.h5"]) checkpoint_folder = os.path.sep.join(["output", trial_name]) from pathlib import Path Path(checkpoint_folder).mkdir(parents=True, exist_ok=True) import pandas as pd from keras.models import Sequential, load_model from keras.layers import Dense, Flatten, Dropout from keras.preprocessing.image import ImageDataGenerator from keras.optimizers import Adam from keras.applications import ResNet50V2 from keras.applications.resnet_v2 import preprocess_input import tensorflow as tf physical_devices = tf.config.experimental.list_physical_devices('GPU') if len(physical_devices) > 0: _ = tf.config.experimental.set_memory_growth(physical_devices[0], True) import wandb from wandb.keras import WandbCallback if (config is None): wandb.init(project="minibar") config = wandb.config else: wandb.init(project="minibar", config=config) df_train = pd.read_csv('data/train_labels.csv') from helpers.decouple import decouple matrix_train, _ = decouple(df_train) from helpers.matrix_to_df import matrix_to_dfcount df_train_agg = matrix_to_dfcount(matrix_train) train_datagen = ImageDataGenerator(validation_split=0.2, horizontal_flip=True, preprocessing_function=preprocess_input) train_generator = train_datagen.flow_from_dataframe( dataframe=df_train_agg, directory='data/train', x_col='filename', y_col='count', target_size=(config['input_shape_height'], config['input_shape_width']), batch_size=config['batch_size'], class_mode='raw', subset="training", ) validation_generator = train_datagen.flow_from_dataframe( dataframe=df_train_agg, directory='data/train', x_col='filename', y_col='count', target_size=(config['input_shape_height'], config['input_shape_width']), batch_size=config['batch_size'], class_mode='raw', subset="validation", ) if os.path.isfile(model_filename) and config['continue_training']: model = load_model(model_filename) else: model = Sequential() model.add( ResNet50V2(include_top=False, input_shape=(config['input_shape_height'], config['input_shape_width'], 3))) model.add(Flatten()) model.add(Dense(units=512, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(units=1)) model.compile(optimizer=Adam(learning_rate=config['learning_rate']), loss='mean_squared_error', metrics=['accuracy']) model.save(model_filename) # construct the set of callbacks from helpers.epochcheckpoint import EpochCheckpoint callbacks = [ EpochCheckpoint(checkpoint_folder, every=1, startAt=0), WandbCallback(save_model=False) ] model.fit( train_generator, #steps_per_epoch=100, epochs=config['epoch'], #steps_per_epoch=100, validation_data=validation_generator, #validation_steps=100 callbacks=callbacks, verbose=1, initial_epoch=config['initial_epoch']) model.save(model_filename)
def createModel(df, dir, savename, columns, types_num, epo=10, batch=32): DATASET_LOCATION = dir BATCH_SIZE = batch IMAGE_SIZE = (128, 128) INPUT_SHAPE = (128, 128, 3) EPOCHS = epo conv_base = VGG19(weights='imagenet', include_top=False, input_shape=(128, 128, 3)) conv_base.trainable = False # Tworzymy bazę na podstawie conv modelu bez górnego klasyfikatora # Instantiating a Convolutional Neural Network (CNN) Classifier model = Sequential() # biggest ----------- # --------vectoor for layer in conv_base.layers: layer.trainable = False # ------frozen base------------- model.add(conv_base) # model.add(Conv2D(filters=32,kernel_size=3,padding='same',activation='relu', input_shape=INPUT_SHAPE)) # model.add(Conv2D(filters=32,kernel_size=3,padding='same',activation='relu')) # model.add(MaxPooling2D(2, 2)) # model.add(Conv2D(64, (3, 3), activation= 'relu',padding='same')) # model.add(Conv2D(64, (3, 3), activation= 'relu',padding='same')) # model.add(MaxPooling2D(2, 2)) # model.add(Conv2D(128, (3, 3), activation='relu')) # model.add(MaxPooling2D(2, 2)) # conv_base.summary() model.add( Flatten()) # this converts our 3D feature maps to 1D feature vectors # model.add(GlobalAveragePooling2D()) model.add(Dense(512, activation='relu')) model.add(Dense(256, activation='relu')) model.add(Dense(64, activation='relu')) model.add(Dense(types_num, activation='softmax')) model.compile( loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.adam(), metrics=["accuracy"], ) print('Initialized model\n') # separate in training and testing train_df, test_df = train_test_split(df, test_size=0.35, random_state=40) # data augmentation - to provide more samples train_datagen = ImageDataGenerator( rescale=1. / 255, rotation_range=40, width_shift_range=0.2, height_shift_range=0.2, shear_range=0.2, zoom_range=0.2, horizontal_flip=True, fill_mode='nearest', ) # cannot change validation data! test_datagen = ImageDataGenerator(rescale=1. / 255) print( 'Created data augmentation method, now we have more data! Cool huh?\n') # read files of a difrectory using flow from dataframe # FIRST FOR TRAIN SECOND FOR TEST try: train_generator = train_datagen.flow_from_dataframe( train_df, DATASET_LOCATION, x_col=columns[0], y_col=columns[1], target_size=IMAGE_SIZE, class_mode="categorical", batch_size=BATCH_SIZE, ) print('Created set for teaching\n') test_generator = test_datagen.flow_from_dataframe( test_df, DATASET_LOCATION, x_col=columns[0], y_col=columns[1], target_size=IMAGE_SIZE, class_mode="categorical", batch_size=BATCH_SIZE, ) print('Created set for validation\n') # NOW WE TRAIN THE MODEL history = model.fit_generator( train_generator, epochs=EPOCHS, validation_data=test_generator, validation_steps=test_df.shape[0] // BATCH_SIZE, steps_per_epoch=train_df.shape[0] // BATCH_SIZE, verbose=1, ) print('Trained frozen model. Now unfroze some\n') set_trainable = False for layer in conv_base.layers: if layer.name == 'block5_conv1': set_trainable = True if set_trainable: layer.trainable = True else: layer.trainable = False model.compile( loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.RMSprop(lr=1e-5), metrics=["accuracy"], ) history = model.fit_generator( train_generator, epochs=EPOCHS, validation_data=test_generator, validation_steps=test_df.shape[0] // BATCH_SIZE, steps_per_epoch=train_df.shape[0] // BATCH_SIZE, verbose=1, ) print('Trained model\n') # save model and architecture to single file model.save(savename) print("Saved model to disk\n") return history except Exception as e: print(e)