def main(): IMG_SIZE = (80, 80) channels = 1 char_path = r'../input/the-simpsons-characters-dataset/simpsons_dataset' char_dict = {} for char in os.listdir(char_path): char_dict[char] = len(os.listdir(os.path.join(char_path, char))) # Sort in descending order char_dict = caer.sort_dict(char_dict, descending=True) # Get top 10 characters characters = [] count = 0 for i in char_dict: characters.append(i[0]) count += 1 if count >= 10: break # Create the training data train = caer.preprocess_from_dir(char_path, characters, channels=channels, IMG_SIZE=IMG_SIZE, isShuffle=True) # Separate feature set and labels featureSet, labels = caer.sep_train(train, IMG_SIZE=IMG_SIZE) # Normalize the featureSet in (0, 1) featureSet = caer.normalize(featureSet) labels = to_categorical(labels, len(characters)) x_train, x_test, y_train, y_test = caer.train_val_split(featureSet, labels, val_ratio=0.2) del train del featureSet del labels gc.collect() BATCH_SIZE = 32 EPOCHS = 10 # Create new data generator data_gen = canaro.generators.imageDataGenerator() train_gen = data_gen.flow(np.array(x_train), np.array(y_train), batch_size=BATCH_SIZE) # Create a model model = canaro.models.createSimpsonsModel(IMG_SIZE=IMG_SIZE, channels=channels, output_dim=len(characters), loss='binary_crossentropy', decay=1e-6, learning_rate=0.001, momentum=0.9, nesterov=True) callbacks_list = [LearningRateScheduler(canaro.lr_schedule)] training = model.fit(train_gen, steps_per_epoch=len(x_train) // BATCH_SIZE, epochs=EPOCHS, validation_data=(np.array(x_test), np.array(y_test)), validation_steps=len(y_test) // BATCH_SIZE, callbacks=callbacks_list) test_path = r'../input/the-simpsons-characters-dataset/kaggle_simpson_testset/kaggle_simpson_testset/charles_montgomery_burns_0.jpg' img = cv2.imread(test_path) predictions = model.predict(prepare(img)) print(characters[np.argmax(predictions[0])])
break characters # Create the training data train = caer.preprocess_from_dir(char_path, characters, channels=channels, IMG_SIZE=IMG_SIZE, isShuffle=True) # Number of training samples len(train) # Visualizing the data (OpenCV doesn't display well in Jupyter notebooks) plt.figure(figsize=(30,30)) plt.imshow(train[0][0], cmap='gray') plt.show() # Separating the array and corresponding labels featureSet, labels = caer.sep_train(train, IMG_SIZE=IMG_SIZE) # Normalize the featureSet ==> (0,1) featureSet = caer.normalize(featureSet) # Converting numerical labels to binary class vectors labels = to_categorical(labels, len(characters)) # Creating train and validation data x_train, x_val, y_train, y_val = caer.train_val_split(featureSet, labels, val_ratio=.2) # Deleting variables to save memory del train del featureSet del labels gc.collect()
break print(characters) # create a training data train = caer.preprocess_from_dir(DIR=base_path, classes=characters, IMG_SIZE=image_size, channels=channels, isShuffle=True) print(f'Number of images used for training: {len(train)}') # seperate features and labels features, labels = caer.sep_train(data=train, IMG_SIZE=image_size, channels=channels) # normalize the features and we have to labels from numerical integers to one hot encode features = caer.normalize(features) labels = to_categorical(y=labels, num_classes=len(characters)) split_data = skm.train_test_split(features, labels, test_size=.2) x_train, x_val, y_train, y_val = (np.array(item) for item in split_data) del train del features del labels # image data generator Batch_SIZE = 32 datagen = canaro.generators.imageDataGenerator()