예제 #1
0
def main():
    IMG_SIZE = (80, 80)
    channels = 1
    char_path = r'../input/the-simpsons-characters-dataset/simpsons_dataset'

    char_dict = {}
    for char in os.listdir(char_path):
        char_dict[char] = len(os.listdir(os.path.join(char_path, char)))

    # Sort in descending order
    char_dict = caer.sort_dict(char_dict, descending=True)

    # Get top 10 characters
    characters = []
    count = 0
    for i in char_dict:
        characters.append(i[0])
        count += 1
        if count >= 10:
            break

    # Create the training data
    train = caer.preprocess_from_dir(char_path,
                                     characters,
                                     channels=channels,
                                     IMG_SIZE=IMG_SIZE,
                                     isShuffle=True)

    # Separate feature set and labels
    featureSet, labels = caer.sep_train(train, IMG_SIZE=IMG_SIZE)

    # Normalize the featureSet in (0, 1)
    featureSet = caer.normalize(featureSet)
    labels = to_categorical(labels, len(characters))

    x_train, x_test, y_train, y_test = caer.train_val_split(featureSet,
                                                            labels,
                                                            val_ratio=0.2)

    del train
    del featureSet
    del labels
    gc.collect()

    BATCH_SIZE = 32
    EPOCHS = 10

    # Create new data generator
    data_gen = canaro.generators.imageDataGenerator()
    train_gen = data_gen.flow(np.array(x_train),
                              np.array(y_train),
                              batch_size=BATCH_SIZE)

    # Create a model
    model = canaro.models.createSimpsonsModel(IMG_SIZE=IMG_SIZE,
                                              channels=channels,
                                              output_dim=len(characters),
                                              loss='binary_crossentropy',
                                              decay=1e-6,
                                              learning_rate=0.001,
                                              momentum=0.9,
                                              nesterov=True)

    callbacks_list = [LearningRateScheduler(canaro.lr_schedule)]

    training = model.fit(train_gen,
                         steps_per_epoch=len(x_train) // BATCH_SIZE,
                         epochs=EPOCHS,
                         validation_data=(np.array(x_test), np.array(y_test)),
                         validation_steps=len(y_test) // BATCH_SIZE,
                         callbacks=callbacks_list)

    test_path = r'../input/the-simpsons-characters-dataset/kaggle_simpson_testset/kaggle_simpson_testset/charles_montgomery_burns_0.jpg'
    img = cv2.imread(test_path)

    predictions = model.predict(prepare(img))

    print(characters[np.argmax(predictions[0])])
예제 #2
0
# Sort in descending order
char_dict = caer.sort_dict(char_dict, descending=True)
char_dict

#  Getting the first 10 categories with the most number of images
characters = []
count = 0
for i in char_dict:
    characters.append(i[0])
    count += 1
    if count >= 10:
        break
characters

# Create the training data
train = caer.preprocess_from_dir(char_path, characters, channels=channels, IMG_SIZE=IMG_SIZE, isShuffle=True)

# Number of training samples
len(train)

# Visualizing the data (OpenCV doesn't display well in Jupyter notebooks)
plt.figure(figsize=(30,30))
plt.imshow(train[0][0], cmap='gray')
plt.show()

# Separating the array and corresponding labels
featureSet, labels = caer.sep_train(train, IMG_SIZE=IMG_SIZE)


# Normalize the featureSet ==> (0,1)
featureSet = caer.normalize(featureSet)
print(trial_dict)

characters = []
count = 0
for keys in trial_dict:
    characters.append(keys[0])
    count += 1
    if count > 10:
        break

print(characters)

# create a training data
train = caer.preprocess_from_dir(DIR=base_path,
                                 classes=characters,
                                 IMG_SIZE=image_size,
                                 channels=channels,
                                 isShuffle=True)

print(f'Number of images used for training: {len(train)}')

# seperate features and labels
features, labels = caer.sep_train(data=train,
                                  IMG_SIZE=image_size,
                                  channels=channels)

# normalize the features and we have to labels from numerical integers to one hot encode
features = caer.normalize(features)
labels = to_categorical(y=labels, num_classes=len(characters))
split_data = skm.train_test_split(features, labels, test_size=.2)
x_train, x_val, y_train, y_val = (np.array(item) for item in split_data)