def predict(): classifier = Classifier(number_of_classes=8) classifier.load_model('models/model.h5') input_X, images = classifier.prepare_images_from_dir('random_images/') input_X = input_X.reshape( (input_X.shape[0], data_dimension**2)).astype(np.float32) result_arr = classifier.predict(input_X).argmax(1) return str(result_arr[0])
X = np.load('processed_data/x.npy'.format(data_dimension)) Y = np.load('processed_data/y.npy'.format(data_dimension)) test_X = np.load('processed_data/test_x.npy'.format(data_dimension)) test_Y = np.load('processed_data/test_y.npy'.format(data_dimension)) print(X.shape) print(Y.shape) print(test_X.shape) print(test_Y.shape) X = X.reshape((X.shape[0], data_dimension**2)).astype(np.float32) test_X = test_X.reshape( (test_X.shape[0], data_dimension**2)).astype(np.float32) classifier = Classifier(number_of_classes=8) classifier.load_model('models/model.h5') parameters = { 'batch_size': 250, 'epochs': 10, 'callbacks': None, 'val_data': None } classifier.fit(X, Y, hyperparameters=parameters) classifier.save_model('models/model.h5') loss, accuracy = classifier.evaluate(test_X, test_Y) print("Loss of {}".format(loss), "Accuracy of {} %".format(accuracy * 100))
import os import datetime os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' X = np.load('processed_data/x.npy') Y = np.load('processed_data/y.npy') test_X = np.load('processed_data/test_x.npy') test_Y = np.load('processed_data/test_y.npy') print(X.shape) print(Y.shape) print(test_X.shape) print(test_Y.shape) classifier = Classifier(number_of_classes=2, maxlen=35) # classifier.load_model( 'models/model.h5' ) parameters = { 'batch_size': 100, 'epochs': 100, 'callbacks': [ TensorBoard(log_dir=os.path.join( "logs", "fit", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"), )) ], 'val_data': (test_X, test_Y)
from Model import Classifier import numpy as np X = np.load( 'processed_data/x.npy' ) Y = np.load( 'processed_data/y.npy' ) test_X = np.load( 'processed_data/test_x.npy' ) test_Y = np.load( 'processed_data/test_y.npy' ) print( X.shape ) print( Y.shape ) print( test_X.shape ) print( test_Y.shape ) classifier = Classifier( input_length=X.shape[1] ) parameters = { 'batch_size' : 500 , 'epochs' : 10 , 'callbacks' : None , 'val_data' : ( test_X , test_Y ) } classifier.fit( X , Y , parameters ) classifier.save_model( 'models/model.h5' )
num_channels = 3 X = np.load( 'processed_data/x.npy') Y = np.load( 'processed_data/y.npy') test_X = np.load( 'processed_data/test_x.npy') test_Y = np.load( 'processed_data/test_y.npy') print( X.shape ) print( Y.shape ) print( test_X.shape ) print( test_Y.shape ) X = X.reshape( ( X.shape[0] , data_dimension**2 * num_channels ) ).astype( np.float32 ) test_X = test_X.reshape( ( test_X.shape[0] , data_dimension**2 * num_channels ) ).astype( np.float32 ) classifier = Classifier( number_of_classes=10 ) classifier.load_model( 'models/model.h5') parameters = { 'batch_size' : 120 , 'epochs' : 3 , 'callbacks' : None , #[ TensorBoard( log_dir='logs/{}'.format( time.time() ) ) ] , 'val_data' : ( test_X , test_Y ) } #classifier.fit( X , Y , hyperparameters=parameters ) #classifier.save_model( 'models/model.h5') loss , accuracy = classifier.evaluate( test_X , test_Y ) print( "Loss of {}".format( loss ) , "Accuracy of {} %".format( accuracy * 100 ) )
def train(classes): dir_path = 'natural_images/' output_path = 'processed_data/' sub_dir_list = os.listdir(dir_path) images = list() labels = list() for i in range(len(sub_dir_list)): label = i image_names = os.listdir(dir_path + sub_dir_list[i]) for image_path in image_names: path = dir_path + sub_dir_list[i] + "/" + image_path image = Image.open(path).convert('L') resize_image = image.resize((data_dimension, data_dimension)) array = list() for x in range(data_dimension): sub_array = list() for y in range(data_dimension): sub_array.append(resize_image.load()[x, y]) array.append(sub_array) image_data = np.array(array) image = np.array( np.reshape(image_data, (data_dimension, data_dimension, 1))) / 255 images.append(image) labels.append(label) print(str(label) + " : " + sub_dir_list[i]) x = np.array(images) y = np.array( keras.utils.to_categorical(np.array(labels), num_classes=len(sub_dir_list))) train_features, test_features, train_labels, test_labels = train_test_split( x, y, test_size=0.4) np.save('{}x.npy'.format(output_path), train_features) np.save('{}y.npy'.format(output_path), train_labels) np.save('{}test_x.npy'.format(output_path), test_features) np.save('{}test_y.npy'.format(output_path), test_labels) X = np.load('processed_data/x.npy'.format(data_dimension)) Y = np.load('processed_data/y.npy'.format(data_dimension)) test_X = np.load('processed_data/test_x.npy'.format(data_dimension)) test_Y = np.load('processed_data/test_y.npy'.format(data_dimension)) print(X.shape) print(Y.shape) print(test_X.shape) print(test_Y.shape) X = X.reshape((X.shape[0], data_dimension**2)).astype(np.float32) test_X = test_X.reshape( (test_X.shape[0], data_dimension**2)).astype(np.float32) classes = int(request.view_args['classes']) classifier = Classifier(number_of_classes=classes) classifier.save_model('models/model.h5') parameters = { 'batch_size': 250, 'epochs': 10, 'callbacks': None, 'val_data': None } classifier.fit(X, Y, hyperparameters=parameters) classifier.save_model('models/model.h5') loss, accuracy = classifier.evaluate(test_X, test_Y) print("Loss of {}".format(loss), "Accuracy of {} %".format(accuracy * 100)) sample_X, images = classifier.prepare_images_from_dir('random_images/') sample_X = sample_X.reshape( (sample_X.shape[0], data_dimension**2)).astype(np.float32) print(classifier.predict(sample_X).argmax(1)) return "Training is complete"
from Model import Classifier import numpy as np data_dimension = 28 X = np.load('data{}/x.npy'.format(data_dimension)) Y = np.load('data{}/y.npy'.format(data_dimension)) test_X = np.load('data{}/test_x.npy'.format(data_dimension)) test_Y = np.load('data{}/test_y.npy'.format(data_dimension)) X = X.reshape((X.shape[0], data_dimension**2)).astype(np.float32) test_X = test_X.reshape( (test_X.shape[0], data_dimension**2)).astype(np.float32) classifier = Classifier(number_of_classes=2) classifier.load_model('models/model.h5') parameters = { 'batch_size': 250, 'epochs': 10, 'callbacks': None, 'val_data': None } classifier.fit(X, Y, hyperparameters=parameters) classifier.save_model('models/0001.h5') loss, accuracy = classifier.evaluate(test_X, test_Y) print("Loss of {}".format(loss), "Accuracy of {} %".format(accuracy * 100)) print(classifier.predict(test_X).argmax(axis=1))
def TrainClassifier(dataloaders:dict,model:Classifier ,criterion:nn.Module,test_loader = None ,num_of_epochs:int = 25): best_acc = -np.Inf metrics_val_dict = {'ACC':[], 'LOSS':[]} metrics_train_dict = {'ACC':[], 'LOSS':[]} metrics_test_dict = {'ACC':[], 'LOSS':[]} optimizer = optim.Adam(model.parameters(),lr = 1e-3) lr_sched = lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.4) for _ in range (num_of_epochs): #Reset the correct to 0 after passing through all the dataset correct = 0 total_train = 0 total_trian_loss = 0.0 total_val_loss = 0.0 total_test_loss = 0.0 total_test = 0 model = model.train() for images,labels in dataloaders['train']: #ind = np.arange(images.shape[0]) if torch.cuda.is_available(): images = images.cuda() labels = labels.cuda() optimizer.zero_grad() outputs = model(images).squeeze(dim=1) #ind_max = torch.argmax(outputs,dim=1) #logits = outputs[ind,ind_max] loss = criterion(outputs, labels.double()) total_trian_loss += loss loss.backward() optimizer.step() #_, predicted = torch.max(outputs, 1) predicted = torch.sigmoid(outputs) > 0.5 total_train += images.size(0) correct += (predicted == labels).sum().item() train_acc = correct /float(total_train) metrics_train_dict['ACC'].append(train_acc) metrics_train_dict['LOSS'].append(total_trian_loss.item()/len(dataloaders['train'])) model = model.eval() with torch.no_grad(): correct_test = 0 correct = 0 total = 0 for images, labels in dataloaders['val']: if torch.cuda.is_available(): images = images.cuda() labels = labels.cuda() #ind = np.arange(images.shape[0]) outputs = model(images).squeeze(dim=1) #ind_max = torch.argmax(outputs,dim=1) #logits = outputs[ind,ind_max] loss = criterion(outputs, labels.double()) total_val_loss += loss #_, predicted = torch.max(outputs.data, 1) predicted = torch.sigmoid(outputs) > 0.5 total += labels.size(0) correct += (predicted == labels).sum().item() if test_loader is not None: for images, labels in test_loader: if torch.cuda.is_available(): images = images.cuda() labels = labels.cuda() #ind = np.arange(images.shape[0]) outputs = model(images).squeeze(dim=1) #ind_max = torch.argmax(outputs,dim=1) #logits = outputs[ind,ind_max] loss = criterion(outputs, labels.double()) total_test_loss += loss #_, predicted = torch.max(outputs.data, 1) predicted = torch.sigmoid(outputs) > 0.5 total_test += labels.size(0) correct_test += (predicted == labels).sum().item() metrics_val_dict['ACC'].append(correct/float(total)) metrics_val_dict ['LOSS'].append(total_val_loss.item()/len(dataloaders['train'])) metrics_test_dict['ACC'].append(correct_test/float(total_test)) metrics_test_dict ['LOSS'].append(total_test_loss.item()/len(test_loader)) # saves best model if best_acc < metrics_val_dict['ACC'][-1]: best_acc = metrics_val_dict['ACC'][-1] # save model f_name = f'classifier' SaveModel(f_name,model) lr_sched.step() return metrics_train_dict , metrics_val_dict ,metrics_test_dict
def run_cv_model_by_batch(args, train, test, folds, batch_col, feats, sample_submission, nn_epochs, nn_batch_size): training_time = time() seed_everything(args['Seed']) K.clear_session() config = tf.compat.v1.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) sess = tf.compat.v1.Session(graph=tf.compat.v1.get_default_graph(), config=config) tf.compat.v1.keras.backend.set_session(sess) oof_ = np.zeros( (len(train), 11) ) # build out of folds matrix with 11 columns, they represent our target variables classes (from 0 to 10) preds_ = np.zeros((len(test), 11)) target = ['open_channels'] group = train['group'] kf = GroupKFold(folds=args['Folds']) splits = [x for x in kf.split(train, train[target], group)] new_splits = [] for sp in splits: new_split = [] new_split.append(np.unique(group[sp[0]])) new_split.append(np.unique(group[sp[1]])) new_split.append(sp[1]) new_splits.append(new_split) # pivot target columns to transform the net to a multiclass classification estructure (you can also leave it in 1 vector with sparsecategoricalcrossentropy loss function) #Getting the list of correct channels for the predictions train_tr_list = [] tr = pd.concat([pd.get_dummies(train.open_channels), train[['group']]], axis=1) tr.columns = ['target_' + str(i) for i in range(11)] + ['group'] target_cols = ['target_' + str(i) for i in range(11)] train_tr = np.array( list(tr.groupby('group').apply( lambda x: x[target_cols].values))).astype(np.float32) train_tr_list.append(train_tr) del train_tr #Getting the list of correct channels for the multitask predictions for shift_ in args['Multitask']: #Shifting the predictions by the correct ammount tr_copy = tr.copy() tr_copy[target_cols] = tr_copy.loc[:, target_cols].shift(shift_).fillna(0) train_tr = np.array( list( tr_copy.groupby('group').apply( lambda x: x[target_cols].values))).astype(np.float32) train_tr_list.append(train_tr) del train_tr gc.collect() start = time() for i in range(len(train_tr_list)): np.savez_compressed('train_tr_{}'.format(i), a=train_tr_list[i]) print(f'Took {time() - start} to clock') del train_tr_list gc.collect() train_tr = [ np.load('/kaggle/working/train_tr_{}.npz'.format(i)) for i in range(4) ] #The compressed targets! train = np.array( list(train.groupby('group').apply(lambda x: x[feats].values))) test = np.array( list(test.groupby('group').apply(lambda x: x[feats].values))) Training_df = [] for n_fold, (tr_idx, val_idx, val_orig_idx) in enumerate(new_splits[0:], start=0): train_x = train[tr_idx] train_y = [train_tr[i]['a'][tr_idx] for i in range(len(train_tr))] valid_x = train[val_idx] valid_y = [train_tr[i]['a'][val_idx] for i in range(len(train_tr))] print(f'Our training dataset shape is {train_x.shape}') print(f'Our validation dataset shape is {valid_x.shape}') gc.collect() shape_ = ( None, train_x.shape[2] ) # input is going to be the number of feature we are using (dimension 2 of 0, 1, 2) model = Classifier(shape_, args) # using our lr_schedule function cb_lr_schedule = LearningRateScheduler(lr_schedule) H = model.fit( train_x, train_y, epochs=nn_epochs, callbacks=[ cb_lr_schedule, MacroF1(model, train_x, train_y, valid_x, valid_y) ], # adding custom evaluation metric for each epoch batch_size=nn_batch_size, verbose=2, validation_data=(valid_x, valid_y)) preds_f = model.predict(valid_x) preds_f = preds_f[0] #f1_score_ = f1_score(np.argmax(valid_y, axis=2).reshape(-1), np.argmax(preds_f, axis=2).reshape(-1), average = 'macro') # need to get the class with the biggest probability print('Training fold {} completed. macro f1 score : {:1.5f}'.format( n_fold + 1, H.history['F1_val'][-1])) preds_f = preds_f.reshape(-1, preds_f.shape[-1]) oof_[val_orig_idx, :] += preds_f te_preds = model.predict(test) te_preds = te_preds[0] model.save("model-wavenet_fold{}.h5".format(n_fold + 1)) te_preds = te_preds.reshape(-1, te_preds.shape[-1]) preds_ += te_preds / args['Folds'] #Creating a dataframe of the training dynamics of this fold df = pd.DataFrame.from_dict(H.history) df['Fold'] = [n_fold] * df.shape[0] Training_df.append(df) #Getting some space in memory del ([model, train_x, train_y, valid_x, valid_y]) gc.collect() print('Training completed...') print(f'Training time: {time() - training_time}') # calculate the oof macro f1_score print('Collection final submissions...') f1_score_ = f1_score( np.argmax(train_tr[0]['a'], axis=2).reshape(-1), np.argmax(oof_, axis=1), average='macro' ) # axis 2 for the 3 Dimension array and axis 1 for the 2 Domension Array (extracting the best class) print(f'Training completed. oof macro f1 score : {f1_score_:1.5f}') sample_submission['open_channels'] = np.argmax(preds_, axis=1).astype(int) sample_submission.to_csv('submission_wavenet.csv', index=False, float_format='%.4f') #create the datafrane for graphing training dynamics Training_dynamics = pd.concat(Training_df) Training_dynamics.to_csv('Training_by_Epoch.csv', index=False) #Reducing the data footprint, compressing, and saving softmax probs # of val and test data as numpy compressed files save_start = time() oof_ = oof_.astype(np.float16) preds_ = preds_.astype(np.float16) #Saving the validation predictions and test predictions for a stacknet print('Saving Validation Probs and test Probs to npz') np.savez_compressed("Train_probs.npz", train_probs=oof_) np.savez_compressed("Test_probs.npz", test_probs=preds_) print('Done Saving. Took {} seconds'.format(time() - save_start))