def predict(): classifier = Classifier(number_of_classes=8) classifier.load_model('models/model.h5') input_X, images = classifier.prepare_images_from_dir('random_images/') input_X = input_X.reshape( (input_X.shape[0], data_dimension**2)).astype(np.float32) result_arr = classifier.predict(input_X).argmax(1) return str(result_arr[0])
print(X.shape) print(Y.shape) print(test_X.shape) print(test_Y.shape) X = X.reshape((X.shape[0], data_dimension**2)).astype(np.float32) test_X = test_X.reshape( (test_X.shape[0], data_dimension**2)).astype(np.float32) classifier = Classifier(number_of_classes=8) classifier.load_model('models/model.h5') parameters = { 'batch_size': 250, 'epochs': 10, 'callbacks': None, 'val_data': None } classifier.fit(X, Y, hyperparameters=parameters) classifier.save_model('models/model.h5') loss, accuracy = classifier.evaluate(test_X, test_Y) print("Loss of {}".format(loss), "Accuracy of {} %".format(accuracy * 100)) sample_X = classifier.prepare_images_from_dir('random_images/') sample_X = sample_X.reshape( (sample_X.shape[0], data_dimension**2)).astype(np.float32) print(classifier.predict(sample_X).argmax(1))
def train(classes): dir_path = 'natural_images/' output_path = 'processed_data/' sub_dir_list = os.listdir(dir_path) images = list() labels = list() for i in range(len(sub_dir_list)): label = i image_names = os.listdir(dir_path + sub_dir_list[i]) for image_path in image_names: path = dir_path + sub_dir_list[i] + "/" + image_path image = Image.open(path).convert('L') resize_image = image.resize((data_dimension, data_dimension)) array = list() for x in range(data_dimension): sub_array = list() for y in range(data_dimension): sub_array.append(resize_image.load()[x, y]) array.append(sub_array) image_data = np.array(array) image = np.array( np.reshape(image_data, (data_dimension, data_dimension, 1))) / 255 images.append(image) labels.append(label) print(str(label) + " : " + sub_dir_list[i]) x = np.array(images) y = np.array( keras.utils.to_categorical(np.array(labels), num_classes=len(sub_dir_list))) train_features, test_features, train_labels, test_labels = train_test_split( x, y, test_size=0.4) np.save('{}x.npy'.format(output_path), train_features) np.save('{}y.npy'.format(output_path), train_labels) np.save('{}test_x.npy'.format(output_path), test_features) np.save('{}test_y.npy'.format(output_path), test_labels) X = np.load('processed_data/x.npy'.format(data_dimension)) Y = np.load('processed_data/y.npy'.format(data_dimension)) test_X = np.load('processed_data/test_x.npy'.format(data_dimension)) test_Y = np.load('processed_data/test_y.npy'.format(data_dimension)) print(X.shape) print(Y.shape) print(test_X.shape) print(test_Y.shape) X = X.reshape((X.shape[0], data_dimension**2)).astype(np.float32) test_X = test_X.reshape( (test_X.shape[0], data_dimension**2)).astype(np.float32) classes = int(request.view_args['classes']) classifier = Classifier(number_of_classes=classes) classifier.save_model('models/model.h5') parameters = { 'batch_size': 250, 'epochs': 10, 'callbacks': None, 'val_data': None } classifier.fit(X, Y, hyperparameters=parameters) classifier.save_model('models/model.h5') loss, accuracy = classifier.evaluate(test_X, test_Y) print("Loss of {}".format(loss), "Accuracy of {} %".format(accuracy * 100)) sample_X, images = classifier.prepare_images_from_dir('random_images/') sample_X = sample_X.reshape( (sample_X.shape[0], data_dimension**2)).astype(np.float32) print(classifier.predict(sample_X).argmax(1)) return "Training is complete"
from Model import Classifier import numpy as np data_dimension = 28 X = np.load('data{}/x.npy'.format(data_dimension)) Y = np.load('data{}/y.npy'.format(data_dimension)) test_X = np.load('data{}/test_x.npy'.format(data_dimension)) test_Y = np.load('data{}/test_y.npy'.format(data_dimension)) X = X.reshape((X.shape[0], data_dimension**2)).astype(np.float32) test_X = test_X.reshape( (test_X.shape[0], data_dimension**2)).astype(np.float32) classifier = Classifier(number_of_classes=2) classifier.load_model('models/model.h5') parameters = { 'batch_size': 250, 'epochs': 10, 'callbacks': None, 'val_data': None } classifier.fit(X, Y, hyperparameters=parameters) classifier.save_model('models/0001.h5') loss, accuracy = classifier.evaluate(test_X, test_Y) print("Loss of {}".format(loss), "Accuracy of {} %".format(accuracy * 100)) print(classifier.predict(test_X).argmax(axis=1))
def run_cv_model_by_batch(args, train, test, folds, batch_col, feats, sample_submission, nn_epochs, nn_batch_size): training_time = time() seed_everything(args['Seed']) K.clear_session() config = tf.compat.v1.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) sess = tf.compat.v1.Session(graph=tf.compat.v1.get_default_graph(), config=config) tf.compat.v1.keras.backend.set_session(sess) oof_ = np.zeros( (len(train), 11) ) # build out of folds matrix with 11 columns, they represent our target variables classes (from 0 to 10) preds_ = np.zeros((len(test), 11)) target = ['open_channels'] group = train['group'] kf = GroupKFold(folds=args['Folds']) splits = [x for x in kf.split(train, train[target], group)] new_splits = [] for sp in splits: new_split = [] new_split.append(np.unique(group[sp[0]])) new_split.append(np.unique(group[sp[1]])) new_split.append(sp[1]) new_splits.append(new_split) # pivot target columns to transform the net to a multiclass classification estructure (you can also leave it in 1 vector with sparsecategoricalcrossentropy loss function) #Getting the list of correct channels for the predictions train_tr_list = [] tr = pd.concat([pd.get_dummies(train.open_channels), train[['group']]], axis=1) tr.columns = ['target_' + str(i) for i in range(11)] + ['group'] target_cols = ['target_' + str(i) for i in range(11)] train_tr = np.array( list(tr.groupby('group').apply( lambda x: x[target_cols].values))).astype(np.float32) train_tr_list.append(train_tr) del train_tr #Getting the list of correct channels for the multitask predictions for shift_ in args['Multitask']: #Shifting the predictions by the correct ammount tr_copy = tr.copy() tr_copy[target_cols] = tr_copy.loc[:, target_cols].shift(shift_).fillna(0) train_tr = np.array( list( tr_copy.groupby('group').apply( lambda x: x[target_cols].values))).astype(np.float32) train_tr_list.append(train_tr) del train_tr gc.collect() start = time() for i in range(len(train_tr_list)): np.savez_compressed('train_tr_{}'.format(i), a=train_tr_list[i]) print(f'Took {time() - start} to clock') del train_tr_list gc.collect() train_tr = [ np.load('/kaggle/working/train_tr_{}.npz'.format(i)) for i in range(4) ] #The compressed targets! train = np.array( list(train.groupby('group').apply(lambda x: x[feats].values))) test = np.array( list(test.groupby('group').apply(lambda x: x[feats].values))) Training_df = [] for n_fold, (tr_idx, val_idx, val_orig_idx) in enumerate(new_splits[0:], start=0): train_x = train[tr_idx] train_y = [train_tr[i]['a'][tr_idx] for i in range(len(train_tr))] valid_x = train[val_idx] valid_y = [train_tr[i]['a'][val_idx] for i in range(len(train_tr))] print(f'Our training dataset shape is {train_x.shape}') print(f'Our validation dataset shape is {valid_x.shape}') gc.collect() shape_ = ( None, train_x.shape[2] ) # input is going to be the number of feature we are using (dimension 2 of 0, 1, 2) model = Classifier(shape_, args) # using our lr_schedule function cb_lr_schedule = LearningRateScheduler(lr_schedule) H = model.fit( train_x, train_y, epochs=nn_epochs, callbacks=[ cb_lr_schedule, MacroF1(model, train_x, train_y, valid_x, valid_y) ], # adding custom evaluation metric for each epoch batch_size=nn_batch_size, verbose=2, validation_data=(valid_x, valid_y)) preds_f = model.predict(valid_x) preds_f = preds_f[0] #f1_score_ = f1_score(np.argmax(valid_y, axis=2).reshape(-1), np.argmax(preds_f, axis=2).reshape(-1), average = 'macro') # need to get the class with the biggest probability print('Training fold {} completed. macro f1 score : {:1.5f}'.format( n_fold + 1, H.history['F1_val'][-1])) preds_f = preds_f.reshape(-1, preds_f.shape[-1]) oof_[val_orig_idx, :] += preds_f te_preds = model.predict(test) te_preds = te_preds[0] model.save("model-wavenet_fold{}.h5".format(n_fold + 1)) te_preds = te_preds.reshape(-1, te_preds.shape[-1]) preds_ += te_preds / args['Folds'] #Creating a dataframe of the training dynamics of this fold df = pd.DataFrame.from_dict(H.history) df['Fold'] = [n_fold] * df.shape[0] Training_df.append(df) #Getting some space in memory del ([model, train_x, train_y, valid_x, valid_y]) gc.collect() print('Training completed...') print(f'Training time: {time() - training_time}') # calculate the oof macro f1_score print('Collection final submissions...') f1_score_ = f1_score( np.argmax(train_tr[0]['a'], axis=2).reshape(-1), np.argmax(oof_, axis=1), average='macro' ) # axis 2 for the 3 Dimension array and axis 1 for the 2 Domension Array (extracting the best class) print(f'Training completed. oof macro f1 score : {f1_score_:1.5f}') sample_submission['open_channels'] = np.argmax(preds_, axis=1).astype(int) sample_submission.to_csv('submission_wavenet.csv', index=False, float_format='%.4f') #create the datafrane for graphing training dynamics Training_dynamics = pd.concat(Training_df) Training_dynamics.to_csv('Training_by_Epoch.csv', index=False) #Reducing the data footprint, compressing, and saving softmax probs # of val and test data as numpy compressed files save_start = time() oof_ = oof_.astype(np.float16) preds_ = preds_.astype(np.float16) #Saving the validation predictions and test predictions for a stacknet print('Saving Validation Probs and test Probs to npz') np.savez_compressed("Train_probs.npz", train_probs=oof_) np.savez_compressed("Test_probs.npz", test_probs=preds_) print('Done Saving. Took {} seconds'.format(time() - save_start))