Beispiel #1
0
def main():
    IMG_SIZE = (80, 80)
    channels = 1
    char_path = r'../input/the-simpsons-characters-dataset/simpsons_dataset'

    char_dict = {}
    for char in os.listdir(char_path):
        char_dict[char] = len(os.listdir(os.path.join(char_path, char)))

    # Sort in descending order
    char_dict = caer.sort_dict(char_dict, descending=True)

    # Get top 10 characters
    characters = []
    count = 0
    for i in char_dict:
        characters.append(i[0])
        count += 1
        if count >= 10:
            break

    # Create the training data
    train = caer.preprocess_from_dir(char_path,
                                     characters,
                                     channels=channels,
                                     IMG_SIZE=IMG_SIZE,
                                     isShuffle=True)

    # Separate feature set and labels
    featureSet, labels = caer.sep_train(train, IMG_SIZE=IMG_SIZE)

    # Normalize the featureSet in (0, 1)
    featureSet = caer.normalize(featureSet)
    labels = to_categorical(labels, len(characters))

    x_train, x_test, y_train, y_test = caer.train_val_split(featureSet,
                                                            labels,
                                                            val_ratio=0.2)

    del train
    del featureSet
    del labels
    gc.collect()

    BATCH_SIZE = 32
    EPOCHS = 10

    # Create new data generator
    data_gen = canaro.generators.imageDataGenerator()
    train_gen = data_gen.flow(np.array(x_train),
                              np.array(y_train),
                              batch_size=BATCH_SIZE)

    # Create a model
    model = canaro.models.createSimpsonsModel(IMG_SIZE=IMG_SIZE,
                                              channels=channels,
                                              output_dim=len(characters),
                                              loss='binary_crossentropy',
                                              decay=1e-6,
                                              learning_rate=0.001,
                                              momentum=0.9,
                                              nesterov=True)

    callbacks_list = [LearningRateScheduler(canaro.lr_schedule)]

    training = model.fit(train_gen,
                         steps_per_epoch=len(x_train) // BATCH_SIZE,
                         epochs=EPOCHS,
                         validation_data=(np.array(x_test), np.array(y_test)),
                         validation_steps=len(y_test) // BATCH_SIZE,
                         callbacks=callbacks_list)

    test_path = r'../input/the-simpsons-characters-dataset/kaggle_simpson_testset/kaggle_simpson_testset/charles_montgomery_burns_0.jpg'
    img = cv2.imread(test_path)

    predictions = model.predict(prepare(img))

    print(characters[np.argmax(predictions[0])])
import matplotlib.pyplot as plt
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import LearningRateScheduler


IMG_SIZE = (80,80)
channels = 1
char_path = r'../input/the-simpsons-characters-dataset/simpsons_dataset'

# Creating a character dictionary, sorting it in descending order
char_dict = {}
for char in os.listdir(char_path):
    char_dict[char] = len(os.listdir(os.path.join(char_path,char)))

# Sort in descending order
char_dict = caer.sort_dict(char_dict, descending=True)
char_dict

#  Getting the first 10 categories with the most number of images
characters = []
count = 0
for i in char_dict:
    characters.append(i[0])
    count += 1
    if count >= 10:
        break
characters

# Create the training data
train = caer.preprocess_from_dir(char_path, characters, channels=channels, IMG_SIZE=IMG_SIZE, isShuffle=True)
image_size = (80, 80)
# gray scale image, hence only 1 channel
channels = 1
base_path = 'simposons/simpsons_dataset'

trial_dict = {}

for file in os.listdir(base_path):
    subfile_path = os.path.join(base_path, file)
    subfiles = glob('{}/*'.format(subfile_path))
    # print(file, len(subfiles))
    trial_dict[file] = len(subfiles)

print(trial_dict)
# sort in descending order
trial_dict = caer.sort_dict(unsorted_dict=trial_dict, descending=True)
print(trial_dict)

characters = []
count = 0
for keys in trial_dict:
    characters.append(keys[0])
    count += 1
    if count > 10:
        break

print(characters)

# create a training data
train = caer.preprocess_from_dir(DIR=base_path,
                                 classes=characters,