def get_image_data(data_path, read_mode=None, training_ratio=0.8): training_path = "/proc_training_set" testing_path = "/proc_testing_set" read_type = set_distinguisher(data_path, read_mode)['read_mode'] process_info = setwise_preprocessing(data_path, True) input_shape = (process_info["height"], process_info["width"], 3) input_single = (process_info["height"], process_info["width"]) num_classes = process_info["num_categories"] loss_func = "" if num_classes > 2: loss_func = "categorical_crossentropy" elif num_classes == 2: loss_func = "binary_crossentropy" train_data = ImageDataGenerator(rescale=1. / 255, shear_range=0.2, zoom_range=0.2, horizontal_flip=True) test_data = ImageDataGenerator(rescale=1. / 255) X_train = train_data.flow_from_directory( data_path + training_path, target_size=input_single, color_mode='rgb', batch_size=(32 if process_info["train_size"] >= 32 else 1), class_mode=loss_func[:loss_func.find("_")]) X_test = test_data.flow_from_directory( data_path + testing_path, target_size=input_single, color_mode='rgb', batch_size=(32 if process_info["test_size"] >= 32 else 1), class_mode=loss_func[:loss_func.find("_")]) return X_train, X_test, process_info['height'], process_info[ 'width'], num_classes
def convolutional(instruction=None, read_mode=None, preprocess=True, verbose=0, data_path=os.getcwd(), new_folders=True, image_column=None, training_ratio=0.8, augmentation=True, epochs=10, height=None, width=None): ''' Body of the convolutional function used that is called in the neural network query if the data is presented in images. :param many parameters: used to preprocess, tune, plot generation, and parameterizing the convolutional neural network trained. :return dictionary that holds all the information for the finished model. ''' logger("Generating datasets for classes") if preprocess: read_mode_info = set_distinguisher(data_path, read_mode) read_mode = read_mode_info["read_mode"] training_path = "/proc_training_set" testing_path = "/proc_testing_set" if read_mode == "setwise": processInfo = setwise_preprocessing(data_path, new_folders, height, width) if not new_folders: training_path = "/training_set" testing_path = "/testing_set" # if image dataset in form of csv elif read_mode == "csvwise": if training_ratio <= 0 or training_ratio >= 1: raise BaseException(f"Test ratio must be between 0 and 1.") processInfo = csv_preprocessing(read_mode_info["csv_path"], data_path, instruction, image_column, training_ratio, height, width) # if image dataset in form of one folder containing class folders elif read_mode == "classwise": if training_ratio <= 0 or training_ratio >= 1: raise BaseException(f"Test ratio must be between 0 and 1.") processInfo = classwise_preprocessing(data_path, training_ratio, height, width) else: training_path = "/training_set" testing_path = "/testing_set" processInfo = already_processed(data_path) num_channels = 3 color_mode = 'rgb' if processInfo["gray_scale"]: num_channels = 1 color_mode = 'grayscale' input_shape = (processInfo["height"], processInfo["width"], num_channels) input_single = (processInfo["height"], processInfo["width"]) num_classes = processInfo["num_categories"] loss_func = "" if num_classes > 2: loss_func = "categorical_crossentropy" elif num_classes == 2: loss_func = "binary_crossentropy" logger("Creating convolutional neural network dynamically") # Convolutional Neural Network model = Sequential() # model.add( # Conv2D( # 64, # kernel_size=3, # activation="relu", # input_shape=input_shape)) # model.add(MaxPooling2D(pool_size=(2, 2))) # model.add(Conv2D(64, kernel_size=3, activation="relu")) # model.add(MaxPooling2D(pool_size=(2, 2))) # model.add(Flatten()) # model.add(Dense(num_classes, activation="softmax")) # model.compile( # optimizer="adam", # loss=loss_func, # metrics=['accuracy']) model.add( Conv2D(filters=64, kernel_size=5, activation="relu", input_shape=input_shape)) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Conv2D(filters=64, kernel_size=3, activation="relu")) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) model.add(Conv2D(filters=64, kernel_size=3, activation="relu")) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Flatten()) model.add(Dense(units=256, activation="relu")) model.add(Dropout(0.25)) model.add(Dense(units=num_classes, activation="softmax")) model.compile(optimizer="adam", loss=loss_func, metrics=['accuracy']) logger("Located image data") if augmentation: train_data = ImageDataGenerator(rescale=1. / 255, shear_range=0.2, zoom_range=0.2, horizontal_flip=True) test_data = ImageDataGenerator(rescale=1. / 255) logger('Dataset augmented through zoom, shear, flip, and rescale') else: train_data = ImageDataGenerator() test_data = ImageDataGenerator() logger("->", "Optimal image size identified: {}".format(input_shape)) X_train = train_data.flow_from_directory( data_path + training_path, target_size=input_single, color_mode=color_mode, batch_size=(32 if processInfo["train_size"] >= 32 else 1), class_mode=loss_func[:loss_func.find("_")]) X_test = test_data.flow_from_directory( data_path + testing_path, target_size=input_single, color_mode=color_mode, batch_size=(32 if processInfo["test_size"] >= 32 else 1), class_mode=loss_func[:loss_func.find("_")]) if epochs < 0: raise BaseException("Number of epochs has to be greater than 0.") logger('Training image model') history = model.fit_generator( X_train, steps_per_epoch=X_train.n // X_train.batch_size, validation_data=X_test, validation_steps=X_test.n // X_test.batch_size, epochs=epochs, verbose=verbose) logger( '->', 'Final training accuracy: {}'.format( history.history['accuracy'][len(history.history['accuracy']) - 1])) logger( '->', 'Final validation accuracy: {}'.format(history.history['val_accuracy'][ len(history.history['val_accuracy']) - 1])) # storing values the model dictionary logger("Stored model under 'convolutional_NN' key") clearLog() return { 'id': generate_id(), 'data_type': read_mode, 'data_path': data_path, 'data': { 'train': X_train, 'test': X_test }, 'shape': input_shape, "model": model, 'losses': { 'training_loss': history.history['loss'], 'val_loss': history.history['val_loss'] }, 'accuracy': { 'training_accuracy': history.history['accuracy'], 'validation_accuracy': history.history['val_accuracy'] }, 'num_classes': (2 if num_classes == 1 else num_classes), 'data_sizes': { 'train_size': processInfo['train_size'], 'test_size': processInfo['test_size'] } }
def convolutional(instruction=None, read_mode=None, preprocess=True, data_path=None, verbose=0, new_folders=True, image_column=None, training_ratio=0.8, augmentation=True, custom_arch=None, pretrained=None, epochs=10, height=None, width=None): ''' Body of the convolutional function used that is called in the neural network query if the data is presented in images. :param many parameters: used to preprocess, tune, plot generation, and parameterizing the convolutional neural network trained. :return dictionary that holds all the information for the finished model. ''' # data_path = get_folder_dir() logger("Generating datasets for classes") if pretrained: if not height: height = 224 if not width: width = 224 if height != 224 or width != 224: raise ValueError( "For pretrained models, both 'height' and 'width' must be 224." ) if preprocess: if custom_arch: raise ValueError( "If 'custom_arch' is not None, 'preprocess' must be set to false." ) read_mode_info = set_distinguisher(data_path, read_mode) read_mode = read_mode_info["read_mode"] training_path = "/proc_training_set" testing_path = "/proc_testing_set" if read_mode == "setwise": processInfo = setwise_preprocessing(data_path, new_folders, height, width) if not new_folders: training_path = "/training_set" testing_path = "/testing_set" # if image dataset in form of csv elif read_mode == "csvwise": if training_ratio <= 0 or training_ratio >= 1: raise BaseException(f"Test ratio must be between 0 and 1.") processInfo = csv_preprocessing(read_mode_info["csv_path"], data_path, instruction, image_column, training_ratio, height, width) # if image dataset in form of one folder containing class folders elif read_mode == "classwise": if training_ratio <= 0 or training_ratio >= 1: raise BaseException(f"Test ratio must be between 0 and 1.") processInfo = classwise_preprocessing(data_path, training_ratio, height, width) else: training_path = "/training_set" testing_path = "/testing_set" processInfo = already_processed(data_path) num_channels = 3 color_mode = 'rgb' if processInfo["gray_scale"]: num_channels = 1 color_mode = 'grayscale' input_shape = (processInfo["height"], processInfo["width"], num_channels) input_single = (processInfo["height"], processInfo["width"]) num_classes = processInfo["num_categories"] loss_func = "" output_layer_activation = "" if num_classes > 2: loss_func = "categorical_crossentropy" output_layer_activation = "softmax" elif num_classes == 2: num_classes = 1 loss_func = "binary_crossentropy" output_layer_activation = "sigmoid" logger("Creating convolutional neural netwwork dynamically") # Convolutional Neural Network # Build model based on custom_arch configuration if given if custom_arch: with open(custom_arch, "r") as f: custom_arch_dict = json.load(f) custom_arch_json_string = json.dumps(custom_arch_dict) model = model_from_json(custom_arch_json_string) # Build an existing state-of-the-art model elif pretrained: arch_lower = pretrained.get('arch').lower() # If user specifies value of pretrained['weights'] as 'imagenet', weights pretrained on ImageNet will be used if 'weights' in pretrained and pretrained.get('weights') == 'imagenet': # Load ImageNet pretrained weights if arch_lower == "vggnet16": base_model = VGG16(include_top=False, weights='imagenet', input_shape=input_shape) x = Flatten()(base_model.output) x = Dense(4096)(x) x = Dropout(0.5)(x) x = Dense(4096)(x) x = Dropout(0.5)(x) pred = Dense(num_classes, activation=output_layer_activation)(x) model = Model(base_model.input, pred) elif arch_lower == "vggnet19": base_model = VGG19(include_top=False, weights='imagenet', input_shape=input_shape) x = Flatten()(base_model.output) x = Dense(4096)(x) x = Dropout(0.5)(x) x = Dense(4096)(x) x = Dropout(0.5)(x) pred = Dense(num_classes, activation=output_layer_activation)(x) model = Model(base_model.input, pred) elif arch_lower == "resnet50": base_model = ResNet50(include_top=False, weights='imagenet', input_shape=input_shape) x = Flatten()(base_model.output) x = GlobalAveragePooling2D()(base_model.output) x = Dropout(0.5)(x) pred = Dense(num_classes, activation=output_layer_activation)(x) model = Model(base_model.input, pred) elif arch_lower == "resnet101": base_model = ResNet101(include_top=False, weights='imagenet', input_shape=input_shape) x = GlobalAveragePooling2D()(base_model.output) x = Dropout(0.5)(x) pred = Dense(num_classes, activation=output_layer_activation)(x) model = Model(base_model.input, pred) elif arch_lower == "resnet152": base_model = ResNet152(include_top=False, weights='imagenet', input_shape=input_shape) x = GlobalAveragePooling2D()(base_model.output) x = Dropout(0.5)(x) pred = Dense(num_classes, activation=output_layer_activation)(x) model = Model(base_model.input, pred) else: raise ModuleNotFoundError("arch \'" + pretrained.get('arch') + "\' not supported.") else: # Randomly initialized weights if arch_lower == "vggnet16": model = VGG16(include_top=True, weights=None, classes=num_classes, classifier_activation=output_layer_activation) elif arch_lower == "vggnet19": model = VGG19(include_top=True, weights=None, classes=num_classes, classifier_activation=output_layer_activation) elif arch_lower == "resnet50": model = ResNet50(include_top=True, weights=None, classes=num_classes) elif arch_lower == "resnet101": model = ResNet101(include_top=True, weights=None, classes=num_classes) elif arch_lower == "resnet152": model = ResNet152(include_top=True, weights=None, classes=num_classes) else: raise ModuleNotFoundError("arch \'" + pretrained.get('arch') + "\' not supported.") else: model = Sequential() # model.add( # Conv2D( # 64, # kernel_size=3, # activation="relu", # input_shape=input_shape)) # model.add(MaxPooling2D(pool_size=(2, 2))) # model.add(Conv2D(64, kernel_size=3, activation="relu")) # model.add(MaxPooling2D(pool_size=(2, 2))) # model.add(Flatten()) # model.add(Dense(num_classes, activation="softmax")) # model.compile( # optimizer="adam", # loss=loss_func, # metrics=['accuracy']) model.add( Conv2D(filters=64, kernel_size=5, activation="relu", input_shape=input_shape)) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Conv2D(filters=64, kernel_size=3, activation="relu")) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) model.add(Conv2D(filters=64, kernel_size=3, activation="relu")) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Flatten()) model.add(Dense(units=256, activation="relu")) model.add(Dropout(0.25)) model.add(Dense(units=num_classes, activation="softmax")) model.compile(optimizer="adam", loss=loss_func, metrics=['accuracy']) logger("Located image data") if augmentation: train_data = ImageDataGenerator(rescale=1. / 255, shear_range=0.2, zoom_range=0.2, horizontal_flip=True) test_data = ImageDataGenerator(rescale=1. / 255) logger('Dataset augmented through zoom, shear, flip, and rescale') else: train_data = ImageDataGenerator() test_data = ImageDataGenerator() logger("->", "Optimal image size identified: {}".format(input_shape)) X_train = train_data.flow_from_directory( data_path + training_path, target_size=input_single, color_mode=color_mode, batch_size=(16 if processInfo["train_size"] >= 16 else 1), class_mode=loss_func[:loss_func.find("_")]) X_test = test_data.flow_from_directory( data_path + testing_path, target_size=input_single, color_mode=color_mode, batch_size=(16 if processInfo["test_size"] >= 16 else 1), class_mode=loss_func[:loss_func.find("_")]) if epochs <= 0: raise BaseException("Number of epochs has to be greater than 0.") logger('Training image model') history = model.fit_generator( X_train, steps_per_epoch=X_train.n // X_train.batch_size, validation_data=X_test, validation_steps=X_test.n // X_test.batch_size, epochs=epochs, verbose=verbose) logger( '->', 'Final training accuracy: {}'.format( history.history['accuracy'][len(history.history['accuracy']) - 1])) logger( '->', 'Final validation accuracy: {}'.format(history.history['val_accuracy'][ len(history.history['val_accuracy']) - 1])) # storing values the model dictionary logger("Stored model under 'convolutional_NN' key") clearLog() return { 'id': generate_id(), 'data_type': read_mode, 'data_path': data_path, 'data': { 'train': X_train, 'test': X_test }, 'shape': input_shape, "model": model, 'losses': { 'training_loss': history.history['loss'], 'val_loss': history.history['val_loss'] }, 'accuracy': { 'training_accuracy': history.history['accuracy'], 'validation_accuracy': history.history['val_accuracy'] }, 'num_classes': (2 if num_classes == 1 else num_classes), 'data_sizes': { 'train_size': processInfo['train_size'], 'test_size': processInfo['test_size'] } }
def convolutional(instruction=None, read_mode=None, text=None, data_path=os.getcwd(), new_folders=True, image_column=None, training_ratio=0.8, augmentation=True): logger("Generating datasets for classes...") read_mode_info = set_distinguisher(data_path, read_mode) read_mode = read_mode_info["read_mode"] training_path = "/proc_training_set" testing_path = "/proc_testing_set" if read_mode == "setwise": processInfo = setwise_preprocessing(data_path, new_folders) if not new_folders: training_path = "/training_set" testing_path = "/testing_set" # if image dataset in form of csv elif read_mode == "pathwise or namewise": processInfo = csv_preprocessing(read_mode_info["csv_path"], data_path, instruction, image_column, training_ratio) # if image dataset in form of one folder containing class folders elif read_mode == "classwise": processInfo = classwise_preprocessing(data_path, training_ratio) input_shape = (processInfo["height"], processInfo["width"], 3) input_single = (processInfo["height"], processInfo["width"]) num_classes = processInfo["num_categories"] loss_func = "" if num_classes > 2: loss_func = "categorical_crossentropy" elif num_classes == 2: loss_func = "binary_crossentropy" logger("Creating convolutional neural network dynamically...") # Convolutional Neural Network model = Sequential() model.add( Conv2D(64, kernel_size=3, activation="relu", input_shape=input_shape)) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Conv2D(64, kernel_size=3, activation="relu")) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Flatten()) model.add(Dense(num_classes, activation="softmax")) model.compile(optimizer="adam", loss=loss_func, metrics=['accuracy']) if augmentation: train_data = ImageDataGenerator(rescale=1. / 255, shear_range=0.2, zoom_range=0.2, horizontal_flip=True) test_data = ImageDataGenerator(rescale=1. / 255) else: train_data = ImageDataGenerator() test_data = ImageDataGenerator() """ trainingImages = [] train_labels = [] validationImages = [] test_labels = [] for path in imgPaths: classLabel = path.split(os.path.sep)[-2] classes.add(classLabel) img = img_to_array(load_img(path, target_size=(64, 64))) if path.split(os.path.sep)[-3] == 'training_set': trainingImages.append(img) train_labels.append(classLabel) else: validationImages.append(img) test_labels.append(classLabel) trainingImages = np.array(trainingImages) train_labels = to_categorical(np.array(train_labels)) validationImages = np.array(validationImages) test_labels = to_categorical(np.array(test_labels)) model.compile(loss=’categorical_crossentropy’, optimizer=’sgd’, metrics=[‘accuracy’]) history=model.fit(train_images, train_labels, batch_size=100, epochs=5, verbose=1) """ X_train = train_data.flow_from_directory( data_path + training_path, target_size=input_single, color_mode='rgb', batch_size=(32 if processInfo["train_size"] >= 32 else 1), class_mode=loss_func[:loss_func.find("_")]) X_test = test_data.flow_from_directory( data_path + testing_path, target_size=input_single, color_mode='rgb', batch_size=(32 if processInfo["test_size"] >= 32 else 1), class_mode=loss_func[:loss_func.find("_")]) # print(X_train) history = model.fit(X_train, steps_per_epoch=X_train.n // X_train.batch_size, validation_data=X_test, validation_steps=X_test.n // X_test.batch_size, epochs=1) # storing values the model dictionary return { 'id': generate_id(), "model": model, 'num_classes': (2 if num_classes == 1 else num_classes), 'losses': { 'training_loss': history.history['loss'], 'val_loss': history.history['val_loss'] }, 'accuracy': { 'training_accuracy': history.history['accuracy'], 'validation_accuracy': history.history['val_accuracy'] } }