예제 #1
0
def main():
    IMG_SIZE = (80, 80)
    channels = 1
    char_path = r'../input/the-simpsons-characters-dataset/simpsons_dataset'

    char_dict = {}
    for char in os.listdir(char_path):
        char_dict[char] = len(os.listdir(os.path.join(char_path, char)))

    # Sort in descending order
    char_dict = caer.sort_dict(char_dict, descending=True)

    # Get top 10 characters
    characters = []
    count = 0
    for i in char_dict:
        characters.append(i[0])
        count += 1
        if count >= 10:
            break

    # Create the training data
    train = caer.preprocess_from_dir(char_path,
                                     characters,
                                     channels=channels,
                                     IMG_SIZE=IMG_SIZE,
                                     isShuffle=True)

    # Separate feature set and labels
    featureSet, labels = caer.sep_train(train, IMG_SIZE=IMG_SIZE)

    # Normalize the featureSet in (0, 1)
    featureSet = caer.normalize(featureSet)
    labels = to_categorical(labels, len(characters))

    x_train, x_test, y_train, y_test = caer.train_val_split(featureSet,
                                                            labels,
                                                            val_ratio=0.2)

    del train
    del featureSet
    del labels
    gc.collect()

    BATCH_SIZE = 32
    EPOCHS = 10

    # Create new data generator
    data_gen = canaro.generators.imageDataGenerator()
    train_gen = data_gen.flow(np.array(x_train),
                              np.array(y_train),
                              batch_size=BATCH_SIZE)

    # Create a model
    model = canaro.models.createSimpsonsModel(IMG_SIZE=IMG_SIZE,
                                              channels=channels,
                                              output_dim=len(characters),
                                              loss='binary_crossentropy',
                                              decay=1e-6,
                                              learning_rate=0.001,
                                              momentum=0.9,
                                              nesterov=True)

    callbacks_list = [LearningRateScheduler(canaro.lr_schedule)]

    training = model.fit(train_gen,
                         steps_per_epoch=len(x_train) // BATCH_SIZE,
                         epochs=EPOCHS,
                         validation_data=(np.array(x_test), np.array(y_test)),
                         validation_steps=len(y_test) // BATCH_SIZE,
                         callbacks=callbacks_list)

    test_path = r'../input/the-simpsons-characters-dataset/kaggle_simpson_testset/kaggle_simpson_testset/charles_montgomery_burns_0.jpg'
    img = cv2.imread(test_path)

    predictions = model.predict(prepare(img))

    print(characters[np.argmax(predictions[0])])
예제 #2
0
train = caer.preprocess_from_dir(char_path, characters, channels=channels, IMG_SIZE=IMG_SIZE, isShuffle=True)

# Number of training samples
len(train)

# Visualizing the data (OpenCV doesn't display well in Jupyter notebooks)
plt.figure(figsize=(30,30))
plt.imshow(train[0][0], cmap='gray')
plt.show()

# Separating the array and corresponding labels
featureSet, labels = caer.sep_train(train, IMG_SIZE=IMG_SIZE)


# Normalize the featureSet ==> (0,1)
featureSet = caer.normalize(featureSet)
# Converting numerical labels to binary class vectors
labels = to_categorical(labels, len(characters))

# Creating train and validation data
x_train, x_val, y_train, y_val = caer.train_val_split(featureSet, labels, val_ratio=.2)

# Deleting variables to save memory
del train
del featureSet
del labels 
gc.collect()

# Useful variables when training
BATCH_SIZE = 32
EPOCHS = 10
# create a training data
train = caer.preprocess_from_dir(DIR=base_path,
                                 classes=characters,
                                 IMG_SIZE=image_size,
                                 channels=channels,
                                 isShuffle=True)

print(f'Number of images used for training: {len(train)}')

# seperate features and labels
features, labels = caer.sep_train(data=train,
                                  IMG_SIZE=image_size,
                                  channels=channels)

# normalize the features and we have to labels from numerical integers to one hot encode
features = caer.normalize(features)
labels = to_categorical(y=labels, num_classes=len(characters))
split_data = skm.train_test_split(features, labels, test_size=.2)
x_train, x_val, y_train, y_val = (np.array(item) for item in split_data)

del train
del features
del labels

# image data generator
Batch_SIZE = 32
datagen = canaro.generators.imageDataGenerator()
train_gen = datagen.flow(x_train, y_train, batch_size=Batch_SIZE)

# creation of model
model = canaro.models.createSimpsonsModel(IMG_SIZE=image_size,