Example #1
0
def get_image_data(data_path, read_mode=None, training_ratio=0.8):
    training_path = "/proc_training_set"
    testing_path = "/proc_testing_set"

    read_type = set_distinguisher(data_path, read_mode)['read_mode']

    process_info = setwise_preprocessing(data_path, True)

    input_shape = (process_info["height"], process_info["width"], 3)
    input_single = (process_info["height"], process_info["width"])
    num_classes = process_info["num_categories"]
    loss_func = ""

    if num_classes > 2:
        loss_func = "categorical_crossentropy"
    elif num_classes == 2:
        loss_func = "binary_crossentropy"

    train_data = ImageDataGenerator(rescale=1. / 255,
                                    shear_range=0.2,
                                    zoom_range=0.2,
                                    horizontal_flip=True)
    test_data = ImageDataGenerator(rescale=1. / 255)

    X_train = train_data.flow_from_directory(
        data_path + training_path,
        target_size=input_single,
        color_mode='rgb',
        batch_size=(32 if process_info["train_size"] >= 32 else 1),
        class_mode=loss_func[:loss_func.find("_")])
    X_test = test_data.flow_from_directory(
        data_path + testing_path,
        target_size=input_single,
        color_mode='rgb',
        batch_size=(32 if process_info["test_size"] >= 32 else 1),
        class_mode=loss_func[:loss_func.find("_")])

    return X_train, X_test, process_info['height'], process_info[
        'width'], num_classes
Example #2
0
def convolutional(instruction=None,
                  read_mode=None,
                  preprocess=True,
                  verbose=0,
                  data_path=os.getcwd(),
                  new_folders=True,
                  image_column=None,
                  training_ratio=0.8,
                  augmentation=True,
                  epochs=10,
                  height=None,
                  width=None):
    '''
    Body of the convolutional function used that is called in the neural network query
    if the data is presented in images.
    :param many parameters: used to preprocess, tune, plot generation, and parameterizing the convolutional neural network trained.
    :return dictionary that holds all the information for the finished model.
    '''

    logger("Generating datasets for classes")

    if preprocess:
        read_mode_info = set_distinguisher(data_path, read_mode)
        read_mode = read_mode_info["read_mode"]

        training_path = "/proc_training_set"
        testing_path = "/proc_testing_set"

        if read_mode == "setwise":
            processInfo = setwise_preprocessing(data_path, new_folders, height,
                                                width)
            if not new_folders:
                training_path = "/training_set"
                testing_path = "/testing_set"

        # if image dataset in form of csv
        elif read_mode == "csvwise":
            if training_ratio <= 0 or training_ratio >= 1:
                raise BaseException(f"Test ratio must be between 0 and 1.")
            processInfo = csv_preprocessing(read_mode_info["csv_path"],
                                            data_path, instruction,
                                            image_column, training_ratio,
                                            height, width)

        # if image dataset in form of one folder containing class folders
        elif read_mode == "classwise":
            if training_ratio <= 0 or training_ratio >= 1:
                raise BaseException(f"Test ratio must be between 0 and 1.")
            processInfo = classwise_preprocessing(data_path, training_ratio,
                                                  height, width)

    else:
        training_path = "/training_set"
        testing_path = "/testing_set"
        processInfo = already_processed(data_path)

    num_channels = 3
    color_mode = 'rgb'
    if processInfo["gray_scale"]:
        num_channels = 1
        color_mode = 'grayscale'

    input_shape = (processInfo["height"], processInfo["width"], num_channels)
    input_single = (processInfo["height"], processInfo["width"])
    num_classes = processInfo["num_categories"]
    loss_func = ""

    if num_classes > 2:
        loss_func = "categorical_crossentropy"
    elif num_classes == 2:
        loss_func = "binary_crossentropy"

    logger("Creating convolutional neural network dynamically")
    # Convolutional Neural Network
    model = Sequential()
    # model.add(
    #     Conv2D(
    #         64,
    #         kernel_size=3,
    #         activation="relu",
    #         input_shape=input_shape))
    # model.add(MaxPooling2D(pool_size=(2, 2)))
    # model.add(Conv2D(64, kernel_size=3, activation="relu"))
    # model.add(MaxPooling2D(pool_size=(2, 2)))
    # model.add(Flatten())
    # model.add(Dense(num_classes, activation="softmax"))
    # model.compile(
    #     optimizer="adam",
    #     loss=loss_func,
    #     metrics=['accuracy'])
    model.add(
        Conv2D(filters=64,
               kernel_size=5,
               activation="relu",
               input_shape=input_shape))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(filters=64, kernel_size=3, activation="relu"))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))
    model.add(Conv2D(filters=64, kernel_size=3, activation="relu"))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Flatten())
    model.add(Dense(units=256, activation="relu"))
    model.add(Dropout(0.25))
    model.add(Dense(units=num_classes, activation="softmax"))
    model.compile(optimizer="adam", loss=loss_func, metrics=['accuracy'])
    logger("Located image data")

    if augmentation:
        train_data = ImageDataGenerator(rescale=1. / 255,
                                        shear_range=0.2,
                                        zoom_range=0.2,
                                        horizontal_flip=True)
        test_data = ImageDataGenerator(rescale=1. / 255)

        logger('Dataset augmented through zoom, shear, flip, and rescale')
    else:
        train_data = ImageDataGenerator()
        test_data = ImageDataGenerator()

    logger("->", "Optimal image size identified: {}".format(input_shape))
    X_train = train_data.flow_from_directory(
        data_path + training_path,
        target_size=input_single,
        color_mode=color_mode,
        batch_size=(32 if processInfo["train_size"] >= 32 else 1),
        class_mode=loss_func[:loss_func.find("_")])
    X_test = test_data.flow_from_directory(
        data_path + testing_path,
        target_size=input_single,
        color_mode=color_mode,
        batch_size=(32 if processInfo["test_size"] >= 32 else 1),
        class_mode=loss_func[:loss_func.find("_")])

    if epochs < 0:
        raise BaseException("Number of epochs has to be greater than 0.")
    logger('Training image model')
    history = model.fit_generator(
        X_train,
        steps_per_epoch=X_train.n // X_train.batch_size,
        validation_data=X_test,
        validation_steps=X_test.n // X_test.batch_size,
        epochs=epochs,
        verbose=verbose)

    logger(
        '->', 'Final training accuracy: {}'.format(
            history.history['accuracy'][len(history.history['accuracy']) - 1]))
    logger(
        '->',
        'Final validation accuracy: {}'.format(history.history['val_accuracy'][
            len(history.history['val_accuracy']) - 1]))
    # storing values the model dictionary

    logger("Stored model under 'convolutional_NN' key")
    clearLog()
    return {
        'id': generate_id(),
        'data_type': read_mode,
        'data_path': data_path,
        'data': {
            'train': X_train,
            'test': X_test
        },
        'shape': input_shape,
        "model": model,
        'losses': {
            'training_loss': history.history['loss'],
            'val_loss': history.history['val_loss']
        },
        'accuracy': {
            'training_accuracy': history.history['accuracy'],
            'validation_accuracy': history.history['val_accuracy']
        },
        'num_classes': (2 if num_classes == 1 else num_classes),
        'data_sizes': {
            'train_size': processInfo['train_size'],
            'test_size': processInfo['test_size']
        }
    }
Example #3
0
def convolutional(instruction=None,
                  read_mode=None,
                  preprocess=True,
                  data_path=None,
                  verbose=0,
                  new_folders=True,
                  image_column=None,
                  training_ratio=0.8,
                  augmentation=True,
                  custom_arch=None,
                  pretrained=None,
                  epochs=10,
                  height=None,
                  width=None):
    '''
    Body of the convolutional function used that is called in the neural network query
    if the data is presented in images.
    :param many parameters: used to preprocess, tune, plot generation, and parameterizing the convolutional neural network trained.
    :return dictionary that holds all the information for the finished model.
    '''

    # data_path = get_folder_dir()

    logger("Generating datasets for classes")

    if pretrained:
        if not height:
            height = 224
        if not width:
            width = 224
        if height != 224 or width != 224:
            raise ValueError(
                "For pretrained models, both 'height' and 'width' must be 224."
            )

    if preprocess:
        if custom_arch:
            raise ValueError(
                "If 'custom_arch' is not None, 'preprocess' must be set to false."
            )

        read_mode_info = set_distinguisher(data_path, read_mode)
        read_mode = read_mode_info["read_mode"]

        training_path = "/proc_training_set"
        testing_path = "/proc_testing_set"

        if read_mode == "setwise":
            processInfo = setwise_preprocessing(data_path, new_folders, height,
                                                width)
            if not new_folders:
                training_path = "/training_set"
                testing_path = "/testing_set"

        # if image dataset in form of csv
        elif read_mode == "csvwise":
            if training_ratio <= 0 or training_ratio >= 1:
                raise BaseException(f"Test ratio must be between 0 and 1.")
            processInfo = csv_preprocessing(read_mode_info["csv_path"],
                                            data_path, instruction,
                                            image_column, training_ratio,
                                            height, width)

        # if image dataset in form of one folder containing class folders
        elif read_mode == "classwise":
            if training_ratio <= 0 or training_ratio >= 1:
                raise BaseException(f"Test ratio must be between 0 and 1.")
            processInfo = classwise_preprocessing(data_path, training_ratio,
                                                  height, width)

    else:
        training_path = "/training_set"
        testing_path = "/testing_set"
        processInfo = already_processed(data_path)

    num_channels = 3
    color_mode = 'rgb'
    if processInfo["gray_scale"]:
        num_channels = 1
        color_mode = 'grayscale'

    input_shape = (processInfo["height"], processInfo["width"], num_channels)
    input_single = (processInfo["height"], processInfo["width"])
    num_classes = processInfo["num_categories"]
    loss_func = ""
    output_layer_activation = ""

    if num_classes > 2:
        loss_func = "categorical_crossentropy"
        output_layer_activation = "softmax"
    elif num_classes == 2:
        num_classes = 1
        loss_func = "binary_crossentropy"
        output_layer_activation = "sigmoid"

    logger("Creating convolutional neural netwwork dynamically")

    # Convolutional Neural Network

    # Build model based on custom_arch configuration if given
    if custom_arch:
        with open(custom_arch, "r") as f:
            custom_arch_dict = json.load(f)
            custom_arch_json_string = json.dumps(custom_arch_dict)
            model = model_from_json(custom_arch_json_string)

    # Build an existing state-of-the-art model
    elif pretrained:

        arch_lower = pretrained.get('arch').lower()

        # If user specifies value of pretrained['weights'] as 'imagenet', weights pretrained on ImageNet will be used
        if 'weights' in pretrained and pretrained.get('weights') == 'imagenet':
            # Load ImageNet pretrained weights
            if arch_lower == "vggnet16":
                base_model = VGG16(include_top=False,
                                   weights='imagenet',
                                   input_shape=input_shape)
                x = Flatten()(base_model.output)
                x = Dense(4096)(x)
                x = Dropout(0.5)(x)
                x = Dense(4096)(x)
                x = Dropout(0.5)(x)
                pred = Dense(num_classes,
                             activation=output_layer_activation)(x)
                model = Model(base_model.input, pred)
            elif arch_lower == "vggnet19":
                base_model = VGG19(include_top=False,
                                   weights='imagenet',
                                   input_shape=input_shape)
                x = Flatten()(base_model.output)
                x = Dense(4096)(x)
                x = Dropout(0.5)(x)
                x = Dense(4096)(x)
                x = Dropout(0.5)(x)
                pred = Dense(num_classes,
                             activation=output_layer_activation)(x)
                model = Model(base_model.input, pred)
            elif arch_lower == "resnet50":
                base_model = ResNet50(include_top=False,
                                      weights='imagenet',
                                      input_shape=input_shape)
                x = Flatten()(base_model.output)
                x = GlobalAveragePooling2D()(base_model.output)
                x = Dropout(0.5)(x)
                pred = Dense(num_classes,
                             activation=output_layer_activation)(x)
                model = Model(base_model.input, pred)
            elif arch_lower == "resnet101":
                base_model = ResNet101(include_top=False,
                                       weights='imagenet',
                                       input_shape=input_shape)
                x = GlobalAveragePooling2D()(base_model.output)
                x = Dropout(0.5)(x)
                pred = Dense(num_classes,
                             activation=output_layer_activation)(x)
                model = Model(base_model.input, pred)
            elif arch_lower == "resnet152":
                base_model = ResNet152(include_top=False,
                                       weights='imagenet',
                                       input_shape=input_shape)
                x = GlobalAveragePooling2D()(base_model.output)
                x = Dropout(0.5)(x)
                pred = Dense(num_classes,
                             activation=output_layer_activation)(x)
                model = Model(base_model.input, pred)
            else:
                raise ModuleNotFoundError("arch \'" + pretrained.get('arch') +
                                          "\' not supported.")

        else:
            # Randomly initialized weights
            if arch_lower == "vggnet16":
                model = VGG16(include_top=True,
                              weights=None,
                              classes=num_classes,
                              classifier_activation=output_layer_activation)
            elif arch_lower == "vggnet19":
                model = VGG19(include_top=True,
                              weights=None,
                              classes=num_classes,
                              classifier_activation=output_layer_activation)
            elif arch_lower == "resnet50":
                model = ResNet50(include_top=True,
                                 weights=None,
                                 classes=num_classes)
            elif arch_lower == "resnet101":
                model = ResNet101(include_top=True,
                                  weights=None,
                                  classes=num_classes)
            elif arch_lower == "resnet152":
                model = ResNet152(include_top=True,
                                  weights=None,
                                  classes=num_classes)
            else:
                raise ModuleNotFoundError("arch \'" + pretrained.get('arch') +
                                          "\' not supported.")
    else:
        model = Sequential()
        # model.add(
        #     Conv2D(
        #         64,
        #         kernel_size=3,
        #         activation="relu",
        #         input_shape=input_shape))
        # model.add(MaxPooling2D(pool_size=(2, 2)))
        # model.add(Conv2D(64, kernel_size=3, activation="relu"))
        # model.add(MaxPooling2D(pool_size=(2, 2)))
        # model.add(Flatten())
        # model.add(Dense(num_classes, activation="softmax"))
        # model.compile(
        #     optimizer="adam",
        #     loss=loss_func,
        #     metrics=['accuracy'])
        model.add(
            Conv2D(filters=64,
                   kernel_size=5,
                   activation="relu",
                   input_shape=input_shape))
        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Conv2D(filters=64, kernel_size=3, activation="relu"))
        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Dropout(0.25))
        model.add(Conv2D(filters=64, kernel_size=3, activation="relu"))
        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Flatten())
        model.add(Dense(units=256, activation="relu"))
        model.add(Dropout(0.25))
        model.add(Dense(units=num_classes, activation="softmax"))

    model.compile(optimizer="adam", loss=loss_func, metrics=['accuracy'])

    logger("Located image data")

    if augmentation:
        train_data = ImageDataGenerator(rescale=1. / 255,
                                        shear_range=0.2,
                                        zoom_range=0.2,
                                        horizontal_flip=True)
        test_data = ImageDataGenerator(rescale=1. / 255)

        logger('Dataset augmented through zoom, shear, flip, and rescale')
    else:
        train_data = ImageDataGenerator()
        test_data = ImageDataGenerator()

    logger("->", "Optimal image size identified: {}".format(input_shape))
    X_train = train_data.flow_from_directory(
        data_path + training_path,
        target_size=input_single,
        color_mode=color_mode,
        batch_size=(16 if processInfo["train_size"] >= 16 else 1),
        class_mode=loss_func[:loss_func.find("_")])
    X_test = test_data.flow_from_directory(
        data_path + testing_path,
        target_size=input_single,
        color_mode=color_mode,
        batch_size=(16 if processInfo["test_size"] >= 16 else 1),
        class_mode=loss_func[:loss_func.find("_")])

    if epochs <= 0:
        raise BaseException("Number of epochs has to be greater than 0.")
    logger('Training image model')
    history = model.fit_generator(
        X_train,
        steps_per_epoch=X_train.n // X_train.batch_size,
        validation_data=X_test,
        validation_steps=X_test.n // X_test.batch_size,
        epochs=epochs,
        verbose=verbose)

    logger(
        '->', 'Final training accuracy: {}'.format(
            history.history['accuracy'][len(history.history['accuracy']) - 1]))
    logger(
        '->',
        'Final validation accuracy: {}'.format(history.history['val_accuracy'][
            len(history.history['val_accuracy']) - 1]))
    # storing values the model dictionary

    logger("Stored model under 'convolutional_NN' key")
    clearLog()
    return {
        'id': generate_id(),
        'data_type': read_mode,
        'data_path': data_path,
        'data': {
            'train': X_train,
            'test': X_test
        },
        'shape': input_shape,
        "model": model,
        'losses': {
            'training_loss': history.history['loss'],
            'val_loss': history.history['val_loss']
        },
        'accuracy': {
            'training_accuracy': history.history['accuracy'],
            'validation_accuracy': history.history['val_accuracy']
        },
        'num_classes': (2 if num_classes == 1 else num_classes),
        'data_sizes': {
            'train_size': processInfo['train_size'],
            'test_size': processInfo['test_size']
        }
    }
Example #4
0
def convolutional(instruction=None,
                  read_mode=None,
                  text=None,
                  data_path=os.getcwd(),
                  new_folders=True,
                  image_column=None,
                  training_ratio=0.8,
                  augmentation=True):

    logger("Generating datasets for classes...")

    read_mode_info = set_distinguisher(data_path, read_mode)
    read_mode = read_mode_info["read_mode"]

    training_path = "/proc_training_set"
    testing_path = "/proc_testing_set"

    if read_mode == "setwise":
        processInfo = setwise_preprocessing(data_path, new_folders)
        if not new_folders:
            training_path = "/training_set"
            testing_path = "/testing_set"

    # if image dataset in form of csv
    elif read_mode == "pathwise or namewise":
        processInfo = csv_preprocessing(read_mode_info["csv_path"], data_path,
                                        instruction, image_column,
                                        training_ratio)

    # if image dataset in form of one folder containing class folders
    elif read_mode == "classwise":
        processInfo = classwise_preprocessing(data_path, training_ratio)

    input_shape = (processInfo["height"], processInfo["width"], 3)
    input_single = (processInfo["height"], processInfo["width"])
    num_classes = processInfo["num_categories"]
    loss_func = ""

    if num_classes > 2:
        loss_func = "categorical_crossentropy"
    elif num_classes == 2:
        loss_func = "binary_crossentropy"

    logger("Creating convolutional neural network dynamically...")
    # Convolutional Neural Network
    model = Sequential()
    model.add(
        Conv2D(64, kernel_size=3, activation="relu", input_shape=input_shape))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(64, kernel_size=3, activation="relu"))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Flatten())
    model.add(Dense(num_classes, activation="softmax"))
    model.compile(optimizer="adam", loss=loss_func, metrics=['accuracy'])
    if augmentation:
        train_data = ImageDataGenerator(rescale=1. / 255,
                                        shear_range=0.2,
                                        zoom_range=0.2,
                                        horizontal_flip=True)
        test_data = ImageDataGenerator(rescale=1. / 255)

    else:
        train_data = ImageDataGenerator()
        test_data = ImageDataGenerator()
        """
        trainingImages = []
        train_labels = []
        validationImages = []
        test_labels = []

        for path in imgPaths:
        classLabel = path.split(os.path.sep)[-2]
        classes.add(classLabel)
        img = img_to_array(load_img(path, target_size=(64, 64)))

        if path.split(os.path.sep)[-3] == 'training_set':
            trainingImages.append(img)
            train_labels.append(classLabel)
        else:
            validationImages.append(img)
            test_labels.append(classLabel)

        trainingImages = np.array(trainingImages)
        train_labels = to_categorical(np.array(train_labels))
        validationImages = np.array(validationImages)
        test_labels = to_categorical(np.array(test_labels))
        model.compile(loss=’categorical_crossentropy’,
                  optimizer=’sgd’,
                  metrics=[‘accuracy’])
        history=model.fit(train_images, train_labels,
                  batch_size=100,
                  epochs=5,
                  verbose=1)
        """

    X_train = train_data.flow_from_directory(
        data_path + training_path,
        target_size=input_single,
        color_mode='rgb',
        batch_size=(32 if processInfo["train_size"] >= 32 else 1),
        class_mode=loss_func[:loss_func.find("_")])
    X_test = test_data.flow_from_directory(
        data_path + testing_path,
        target_size=input_single,
        color_mode='rgb',
        batch_size=(32 if processInfo["test_size"] >= 32 else 1),
        class_mode=loss_func[:loss_func.find("_")])

    # print(X_train)
    history = model.fit(X_train,
                        steps_per_epoch=X_train.n // X_train.batch_size,
                        validation_data=X_test,
                        validation_steps=X_test.n // X_test.batch_size,
                        epochs=1)
    # storing values the model dictionary
    return {
        'id': generate_id(),
        "model": model,
        'num_classes': (2 if num_classes == 1 else num_classes),
        'losses': {
            'training_loss': history.history['loss'],
            'val_loss': history.history['val_loss']
        },
        'accuracy': {
            'training_accuracy': history.history['accuracy'],
            'validation_accuracy': history.history['val_accuracy']
        }
    }