예제 #1
0
def predict():
    classifier = Classifier(number_of_classes=8)
    classifier.load_model('models/model.h5')
    input_X, images = classifier.prepare_images_from_dir('random_images/')
    input_X = input_X.reshape(
        (input_X.shape[0], data_dimension**2)).astype(np.float32)
    result_arr = classifier.predict(input_X).argmax(1)
    return str(result_arr[0])
X = np.load('processed_data/x.npy'.format(data_dimension))
Y = np.load('processed_data/y.npy'.format(data_dimension))
test_X = np.load('processed_data/test_x.npy'.format(data_dimension))
test_Y = np.load('processed_data/test_y.npy'.format(data_dimension))

print(X.shape)
print(Y.shape)
print(test_X.shape)
print(test_Y.shape)

X = X.reshape((X.shape[0], data_dimension**2)).astype(np.float32)
test_X = test_X.reshape(
    (test_X.shape[0], data_dimension**2)).astype(np.float32)

classifier = Classifier(number_of_classes=8)
classifier.load_model('models/model.h5')

parameters = {
    'batch_size': 250,
    'epochs': 10,
    'callbacks': None,
    'val_data': None
}

classifier.fit(X, Y, hyperparameters=parameters)
classifier.save_model('models/model.h5')

loss, accuracy = classifier.evaluate(test_X, test_Y)
print("Loss of {}".format(loss), "Accuracy of {} %".format(accuracy * 100))
예제 #3
0
import os
import datetime

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

X = np.load('processed_data/x.npy')
Y = np.load('processed_data/y.npy')
test_X = np.load('processed_data/test_x.npy')
test_Y = np.load('processed_data/test_y.npy')

print(X.shape)
print(Y.shape)
print(test_X.shape)
print(test_Y.shape)

classifier = Classifier(number_of_classes=2, maxlen=35)
# classifier.load_model( 'models/model.h5' )

parameters = {
    'batch_size':
    100,
    'epochs':
    100,
    'callbacks': [
        TensorBoard(log_dir=os.path.join(
            "logs",
            "fit",
            datetime.datetime.now().strftime("%Y%m%d-%H%M%S"),
        ))
    ],
    'val_data': (test_X, test_Y)
예제 #4
0
from Model import Classifier
import numpy as np

X = np.load( 'processed_data/x.npy' )
Y = np.load( 'processed_data/y.npy' )
test_X = np.load( 'processed_data/test_x.npy' )
test_Y = np.load( 'processed_data/test_y.npy' )

print( X.shape )
print( Y.shape )
print( test_X.shape )
print( test_Y.shape )

classifier = Classifier( input_length=X.shape[1] )
parameters = {
	'batch_size' : 500 ,
	'epochs' : 10 ,
	'callbacks' : None ,
	'val_data' : ( test_X , test_Y )
}
classifier.fit( X , Y , parameters )
classifier.save_model( 'models/model.h5' )
예제 #5
0
num_channels = 3

X = np.load( 'processed_data/x.npy')
Y = np.load( 'processed_data/y.npy')
test_X = np.load( 'processed_data/test_x.npy')
test_Y = np.load( 'processed_data/test_y.npy')

print( X.shape )
print( Y.shape )
print( test_X.shape )
print( test_Y.shape )

X = X.reshape( ( X.shape[0] , data_dimension**2 * num_channels  ) ).astype( np.float32 )
test_X = test_X.reshape( ( test_X.shape[0] , data_dimension**2 * num_channels  ) ).astype( np.float32 )

classifier = Classifier( number_of_classes=10 )
classifier.load_model( 'models/model.h5')

parameters = {
    'batch_size' : 120 ,
    'epochs' : 3 ,
    'callbacks' : None , #[ TensorBoard( log_dir='logs/{}'.format( time.time() ) ) ] ,
    'val_data' : ( test_X , test_Y )
}

#classifier.fit( X , Y  , hyperparameters=parameters )
#classifier.save_model( 'models/model.h5')

loss , accuracy = classifier.evaluate( test_X , test_Y )
print( "Loss of {}".format( loss ) , "Accuracy of {} %".format( accuracy * 100 ) )
예제 #6
0
def train(classes):
    dir_path = 'natural_images/'
    output_path = 'processed_data/'

    sub_dir_list = os.listdir(dir_path)
    images = list()
    labels = list()
    for i in range(len(sub_dir_list)):
        label = i
        image_names = os.listdir(dir_path + sub_dir_list[i])
        for image_path in image_names:
            path = dir_path + sub_dir_list[i] + "/" + image_path
            image = Image.open(path).convert('L')
            resize_image = image.resize((data_dimension, data_dimension))
            array = list()
            for x in range(data_dimension):
                sub_array = list()
                for y in range(data_dimension):
                    sub_array.append(resize_image.load()[x, y])
                array.append(sub_array)
            image_data = np.array(array)
            image = np.array(
                np.reshape(image_data,
                           (data_dimension, data_dimension, 1))) / 255
            images.append(image)
            labels.append(label)
        print(str(label) + " : " + sub_dir_list[i])

    x = np.array(images)
    y = np.array(
        keras.utils.to_categorical(np.array(labels),
                                   num_classes=len(sub_dir_list)))

    train_features, test_features, train_labels, test_labels = train_test_split(
        x, y, test_size=0.4)

    np.save('{}x.npy'.format(output_path), train_features)
    np.save('{}y.npy'.format(output_path), train_labels)
    np.save('{}test_x.npy'.format(output_path), test_features)
    np.save('{}test_y.npy'.format(output_path), test_labels)

    X = np.load('processed_data/x.npy'.format(data_dimension))
    Y = np.load('processed_data/y.npy'.format(data_dimension))
    test_X = np.load('processed_data/test_x.npy'.format(data_dimension))
    test_Y = np.load('processed_data/test_y.npy'.format(data_dimension))

    print(X.shape)
    print(Y.shape)
    print(test_X.shape)
    print(test_Y.shape)

    X = X.reshape((X.shape[0], data_dimension**2)).astype(np.float32)
    test_X = test_X.reshape(
        (test_X.shape[0], data_dimension**2)).astype(np.float32)
    classes = int(request.view_args['classes'])
    classifier = Classifier(number_of_classes=classes)
    classifier.save_model('models/model.h5')

    parameters = {
        'batch_size': 250,
        'epochs': 10,
        'callbacks': None,
        'val_data': None
    }

    classifier.fit(X, Y, hyperparameters=parameters)
    classifier.save_model('models/model.h5')

    loss, accuracy = classifier.evaluate(test_X, test_Y)
    print("Loss of {}".format(loss), "Accuracy of {} %".format(accuracy * 100))

    sample_X, images = classifier.prepare_images_from_dir('random_images/')
    sample_X = sample_X.reshape(
        (sample_X.shape[0], data_dimension**2)).astype(np.float32)
    print(classifier.predict(sample_X).argmax(1))
    return "Training is complete"
예제 #7
0
from Model import Classifier
import numpy as np

data_dimension = 28

X = np.load('data{}/x.npy'.format(data_dimension))
Y = np.load('data{}/y.npy'.format(data_dimension))
test_X = np.load('data{}/test_x.npy'.format(data_dimension))
test_Y = np.load('data{}/test_y.npy'.format(data_dimension))

X = X.reshape((X.shape[0], data_dimension**2)).astype(np.float32)
test_X = test_X.reshape(
    (test_X.shape[0], data_dimension**2)).astype(np.float32)

classifier = Classifier(number_of_classes=2)
classifier.load_model('models/model.h5')

parameters = {
    'batch_size': 250,
    'epochs': 10,
    'callbacks': None,
    'val_data': None
}

classifier.fit(X, Y, hyperparameters=parameters)
classifier.save_model('models/0001.h5')

loss, accuracy = classifier.evaluate(test_X, test_Y)
print("Loss of {}".format(loss), "Accuracy of {} %".format(accuracy * 100))
print(classifier.predict(test_X).argmax(axis=1))
예제 #8
0
def TrainClassifier(dataloaders:dict,model:Classifier
                    ,criterion:nn.Module,test_loader = None
                    ,num_of_epochs:int = 25):
    
    
    best_acc = -np.Inf
    metrics_val_dict = {'ACC':[],
                         
                         'LOSS':[]}

    metrics_train_dict = {'ACC':[],
                         
                         'LOSS':[]}   


    metrics_test_dict = {'ACC':[],
                         
                         'LOSS':[]}

    optimizer = optim.Adam(model.parameters(),lr = 1e-3)
    lr_sched = lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.4)
    
    
    for _ in range (num_of_epochs):
        
        #Reset the correct to 0 after passing through all the dataset
        correct = 0
        total_train = 0
        total_trian_loss = 0.0
        total_val_loss = 0.0
        total_test_loss = 0.0
        total_test = 0
        model = model.train()
        for images,labels in dataloaders['train']:
            #ind = np.arange(images.shape[0])

            if torch.cuda.is_available():
                images = images.cuda()
                labels = labels.cuda()
                
            optimizer.zero_grad()
            outputs = model(images).squeeze(dim=1)
            #ind_max = torch.argmax(outputs,dim=1)
            #logits = outputs[ind,ind_max]
            loss = criterion(outputs, labels.double())
            total_trian_loss += loss
            loss.backward()
            optimizer.step()  
            #_, predicted = torch.max(outputs, 1) 
            predicted = torch.sigmoid(outputs) > 0.5

            total_train += images.size(0)
            correct += (predicted == labels).sum().item()
            
        train_acc = correct /float(total_train) 
        metrics_train_dict['ACC'].append(train_acc)
        metrics_train_dict['LOSS'].append(total_trian_loss.item()/len(dataloaders['train']))

        model = model.eval()  
        with torch.no_grad():
            correct_test = 0
            correct = 0
            total = 0
            for images, labels in dataloaders['val']:

                if torch.cuda.is_available():
                    
                    images = images.cuda()
                    labels = labels.cuda()

                #ind = np.arange(images.shape[0])
                outputs = model(images).squeeze(dim=1)
                
                #ind_max = torch.argmax(outputs,dim=1)
                
                #logits = outputs[ind,ind_max]
                
                loss = criterion(outputs, labels.double())

                total_val_loss += loss
                #_, predicted = torch.max(outputs.data, 1)
                predicted = torch.sigmoid(outputs) > 0.5
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

            if test_loader is not None:
                for images, labels in test_loader:

                    if torch.cuda.is_available():
                        images = images.cuda()
                        labels = labels.cuda()

                    #ind = np.arange(images.shape[0])
                    outputs = model(images).squeeze(dim=1)
                    
                    #ind_max = torch.argmax(outputs,dim=1)

                    #logits = outputs[ind,ind_max]
                
                    loss = criterion(outputs, labels.double())
                    total_test_loss += loss
                    #_, predicted = torch.max(outputs.data, 1)
                    predicted = torch.sigmoid(outputs) > 0.5
                    total_test += labels.size(0)
                    correct_test += (predicted == labels).sum().item()


                
        metrics_val_dict['ACC'].append(correct/float(total))
        metrics_val_dict ['LOSS'].append(total_val_loss.item()/len(dataloaders['train']))

        metrics_test_dict['ACC'].append(correct_test/float(total_test))
        metrics_test_dict ['LOSS'].append(total_test_loss.item()/len(test_loader))


        # saves best model       
        if best_acc < metrics_val_dict['ACC'][-1]:
            best_acc = metrics_val_dict['ACC'][-1]
            # save model
            
            f_name = f'classifier'
            SaveModel(f_name,model)

        lr_sched.step()

    return metrics_train_dict , metrics_val_dict ,metrics_test_dict
예제 #9
0
def run_cv_model_by_batch(args, train, test, folds, batch_col, feats,
                          sample_submission, nn_epochs, nn_batch_size):
    training_time = time()
    seed_everything(args['Seed'])
    K.clear_session()
    config = tf.compat.v1.ConfigProto(intra_op_parallelism_threads=1,
                                      inter_op_parallelism_threads=1)
    sess = tf.compat.v1.Session(graph=tf.compat.v1.get_default_graph(),
                                config=config)
    tf.compat.v1.keras.backend.set_session(sess)
    oof_ = np.zeros(
        (len(train), 11)
    )  # build out of folds matrix with 11 columns, they represent our target variables classes (from 0 to 10)
    preds_ = np.zeros((len(test), 11))
    target = ['open_channels']
    group = train['group']
    kf = GroupKFold(folds=args['Folds'])
    splits = [x for x in kf.split(train, train[target], group)]

    new_splits = []
    for sp in splits:
        new_split = []
        new_split.append(np.unique(group[sp[0]]))
        new_split.append(np.unique(group[sp[1]]))
        new_split.append(sp[1])
        new_splits.append(new_split)
    # pivot target columns to transform the net to a multiclass classification estructure (you can also leave it in 1 vector with sparsecategoricalcrossentropy loss function)
#Getting the list of correct channels for the predictions
    train_tr_list = []
    tr = pd.concat([pd.get_dummies(train.open_channels), train[['group']]],
                   axis=1)
    tr.columns = ['target_' + str(i) for i in range(11)] + ['group']
    target_cols = ['target_' + str(i) for i in range(11)]
    train_tr = np.array(
        list(tr.groupby('group').apply(
            lambda x: x[target_cols].values))).astype(np.float32)
    train_tr_list.append(train_tr)
    del train_tr
    #Getting the list of correct channels for the multitask predictions
    for shift_ in args['Multitask']:
        #Shifting the predictions by the correct ammount
        tr_copy = tr.copy()
        tr_copy[target_cols] = tr_copy.loc[:,
                                           target_cols].shift(shift_).fillna(0)
        train_tr = np.array(
            list(
                tr_copy.groupby('group').apply(
                    lambda x: x[target_cols].values))).astype(np.float32)
        train_tr_list.append(train_tr)
        del train_tr
        gc.collect()

    start = time()
    for i in range(len(train_tr_list)):
        np.savez_compressed('train_tr_{}'.format(i), a=train_tr_list[i])
    print(f'Took {time() - start} to clock')
    del train_tr_list
    gc.collect()
    train_tr = [
        np.load('/kaggle/working/train_tr_{}.npz'.format(i)) for i in range(4)
    ]  #The compressed targets!

    train = np.array(
        list(train.groupby('group').apply(lambda x: x[feats].values)))
    test = np.array(
        list(test.groupby('group').apply(lambda x: x[feats].values)))
    Training_df = []
    for n_fold, (tr_idx, val_idx, val_orig_idx) in enumerate(new_splits[0:],
                                                             start=0):
        train_x = train[tr_idx]
        train_y = [train_tr[i]['a'][tr_idx] for i in range(len(train_tr))]
        valid_x = train[val_idx]
        valid_y = [train_tr[i]['a'][val_idx] for i in range(len(train_tr))]
        print(f'Our training dataset shape is {train_x.shape}')
        print(f'Our validation dataset shape is {valid_x.shape}')

        gc.collect()
        shape_ = (
            None, train_x.shape[2]
        )  # input is going to be the number of feature we are using (dimension 2 of 0, 1, 2)
        model = Classifier(shape_, args)
        # using our lr_schedule function
        cb_lr_schedule = LearningRateScheduler(lr_schedule)
        H = model.fit(
            train_x,
            train_y,
            epochs=nn_epochs,
            callbacks=[
                cb_lr_schedule,
                MacroF1(model, train_x, train_y, valid_x, valid_y)
            ],  # adding custom evaluation metric for each epoch
            batch_size=nn_batch_size,
            verbose=2,
            validation_data=(valid_x, valid_y))
        preds_f = model.predict(valid_x)
        preds_f = preds_f[0]
        #f1_score_ = f1_score(np.argmax(valid_y, axis=2).reshape(-1),  np.argmax(preds_f, axis=2).reshape(-1), average = 'macro') # need to get the class with the biggest probability
        print('Training fold {} completed. macro f1 score : {:1.5f}'.format(
            n_fold + 1, H.history['F1_val'][-1]))
        preds_f = preds_f.reshape(-1, preds_f.shape[-1])
        oof_[val_orig_idx, :] += preds_f
        te_preds = model.predict(test)
        te_preds = te_preds[0]
        model.save("model-wavenet_fold{}.h5".format(n_fold + 1))
        te_preds = te_preds.reshape(-1, te_preds.shape[-1])
        preds_ += te_preds / args['Folds']

        #Creating a dataframe of the training dynamics of this fold
        df = pd.DataFrame.from_dict(H.history)
        df['Fold'] = [n_fold] * df.shape[0]
        Training_df.append(df)

        #Getting some space in memory
        del ([model, train_x, train_y, valid_x, valid_y])
        gc.collect()
    print('Training completed...')
    print(f'Training time: {time() - training_time}')
    # calculate the oof macro f1_score
    print('Collection final submissions...')
    f1_score_ = f1_score(
        np.argmax(train_tr[0]['a'], axis=2).reshape(-1),
        np.argmax(oof_, axis=1),
        average='macro'
    )  # axis 2 for the 3 Dimension array and axis 1 for the 2 Domension Array (extracting the best class)
    print(f'Training completed. oof macro f1 score : {f1_score_:1.5f}')
    sample_submission['open_channels'] = np.argmax(preds_, axis=1).astype(int)
    sample_submission.to_csv('submission_wavenet.csv',
                             index=False,
                             float_format='%.4f')

    #create the datafrane for graphing training dynamics
    Training_dynamics = pd.concat(Training_df)
    Training_dynamics.to_csv('Training_by_Epoch.csv', index=False)

    #Reducing the data footprint, compressing, and saving softmax probs
    # of val and test data as numpy compressed files
    save_start = time()
    oof_ = oof_.astype(np.float16)
    preds_ = preds_.astype(np.float16)
    #Saving the validation predictions and test predictions for a stacknet
    print('Saving Validation Probs and test Probs to npz')
    np.savez_compressed("Train_probs.npz", train_probs=oof_)
    np.savez_compressed("Test_probs.npz", test_probs=preds_)
    print('Done Saving.  Took {} seconds'.format(time() - save_start))