print(X.shape)
print(Y.shape)
print(test_X.shape)
print(test_Y.shape)

X = X.reshape((X.shape[0], data_dimension**2)).astype(np.float32)
test_X = test_X.reshape(
    (test_X.shape[0], data_dimension**2)).astype(np.float32)

classifier = Classifier(number_of_classes=8)
classifier.load_model('models/model.h5')

parameters = {
    'batch_size': 250,
    'epochs': 10,
    'callbacks': None,
    'val_data': None
}

classifier.fit(X, Y, hyperparameters=parameters)
classifier.save_model('models/model.h5')

loss, accuracy = classifier.evaluate(test_X, test_Y)
print("Loss of {}".format(loss), "Accuracy of {} %".format(accuracy * 100))

sample_X = classifier.prepare_images_from_dir('random_images/')
sample_X = sample_X.reshape(
    (sample_X.shape[0], data_dimension**2)).astype(np.float32)
print(classifier.predict(sample_X).argmax(1))
Esempio n. 2
0
def train(classes):
    dir_path = 'natural_images/'
    output_path = 'processed_data/'

    sub_dir_list = os.listdir(dir_path)
    images = list()
    labels = list()
    for i in range(len(sub_dir_list)):
        label = i
        image_names = os.listdir(dir_path + sub_dir_list[i])
        for image_path in image_names:
            path = dir_path + sub_dir_list[i] + "/" + image_path
            image = Image.open(path).convert('L')
            resize_image = image.resize((data_dimension, data_dimension))
            array = list()
            for x in range(data_dimension):
                sub_array = list()
                for y in range(data_dimension):
                    sub_array.append(resize_image.load()[x, y])
                array.append(sub_array)
            image_data = np.array(array)
            image = np.array(
                np.reshape(image_data,
                           (data_dimension, data_dimension, 1))) / 255
            images.append(image)
            labels.append(label)
        print(str(label) + " : " + sub_dir_list[i])

    x = np.array(images)
    y = np.array(
        keras.utils.to_categorical(np.array(labels),
                                   num_classes=len(sub_dir_list)))

    train_features, test_features, train_labels, test_labels = train_test_split(
        x, y, test_size=0.4)

    np.save('{}x.npy'.format(output_path), train_features)
    np.save('{}y.npy'.format(output_path), train_labels)
    np.save('{}test_x.npy'.format(output_path), test_features)
    np.save('{}test_y.npy'.format(output_path), test_labels)

    X = np.load('processed_data/x.npy'.format(data_dimension))
    Y = np.load('processed_data/y.npy'.format(data_dimension))
    test_X = np.load('processed_data/test_x.npy'.format(data_dimension))
    test_Y = np.load('processed_data/test_y.npy'.format(data_dimension))

    print(X.shape)
    print(Y.shape)
    print(test_X.shape)
    print(test_Y.shape)

    X = X.reshape((X.shape[0], data_dimension**2)).astype(np.float32)
    test_X = test_X.reshape(
        (test_X.shape[0], data_dimension**2)).astype(np.float32)
    classes = int(request.view_args['classes'])
    classifier = Classifier(number_of_classes=classes)
    classifier.save_model('models/model.h5')

    parameters = {
        'batch_size': 250,
        'epochs': 10,
        'callbacks': None,
        'val_data': None
    }

    classifier.fit(X, Y, hyperparameters=parameters)
    classifier.save_model('models/model.h5')

    loss, accuracy = classifier.evaluate(test_X, test_Y)
    print("Loss of {}".format(loss), "Accuracy of {} %".format(accuracy * 100))

    sample_X, images = classifier.prepare_images_from_dir('random_images/')
    sample_X = sample_X.reshape(
        (sample_X.shape[0], data_dimension**2)).astype(np.float32)
    print(classifier.predict(sample_X).argmax(1))
    return "Training is complete"
Esempio n. 3
0
test_X = np.load('processed_data/test_x.npy')
test_Y = np.load('processed_data/test_y.npy')

print(X.shape)
print(Y.shape)
print(test_X.shape)
print(test_Y.shape)

classifier = Classifier(number_of_classes=2, maxlen=35)
# classifier.load_model( 'models/model.h5' )

parameters = {
    'batch_size':
    100,
    'epochs':
    100,
    'callbacks': [
        TensorBoard(log_dir=os.path.join(
            "logs",
            "fit",
            datetime.datetime.now().strftime("%Y%m%d-%H%M%S"),
        ))
    ],
    'val_data': (test_X, test_Y)
}
classifier.fit(X, Y, parameters)
classifier.save_model('models/model.h5')

loss, accuracy = classifier.evaluate(test_X, test_Y)
print("Loss of {}".format(loss), "Accuracy of {} %".format(accuracy * 100))
Esempio n. 4
0
def run_cv_model_by_batch(args, train, test, folds, batch_col, feats,
                          sample_submission, nn_epochs, nn_batch_size):
    training_time = time()
    seed_everything(args['Seed'])
    K.clear_session()
    config = tf.compat.v1.ConfigProto(intra_op_parallelism_threads=1,
                                      inter_op_parallelism_threads=1)
    sess = tf.compat.v1.Session(graph=tf.compat.v1.get_default_graph(),
                                config=config)
    tf.compat.v1.keras.backend.set_session(sess)
    oof_ = np.zeros(
        (len(train), 11)
    )  # build out of folds matrix with 11 columns, they represent our target variables classes (from 0 to 10)
    preds_ = np.zeros((len(test), 11))
    target = ['open_channels']
    group = train['group']
    kf = GroupKFold(folds=args['Folds'])
    splits = [x for x in kf.split(train, train[target], group)]

    new_splits = []
    for sp in splits:
        new_split = []
        new_split.append(np.unique(group[sp[0]]))
        new_split.append(np.unique(group[sp[1]]))
        new_split.append(sp[1])
        new_splits.append(new_split)
    # pivot target columns to transform the net to a multiclass classification estructure (you can also leave it in 1 vector with sparsecategoricalcrossentropy loss function)
#Getting the list of correct channels for the predictions
    train_tr_list = []
    tr = pd.concat([pd.get_dummies(train.open_channels), train[['group']]],
                   axis=1)
    tr.columns = ['target_' + str(i) for i in range(11)] + ['group']
    target_cols = ['target_' + str(i) for i in range(11)]
    train_tr = np.array(
        list(tr.groupby('group').apply(
            lambda x: x[target_cols].values))).astype(np.float32)
    train_tr_list.append(train_tr)
    del train_tr
    #Getting the list of correct channels for the multitask predictions
    for shift_ in args['Multitask']:
        #Shifting the predictions by the correct ammount
        tr_copy = tr.copy()
        tr_copy[target_cols] = tr_copy.loc[:,
                                           target_cols].shift(shift_).fillna(0)
        train_tr = np.array(
            list(
                tr_copy.groupby('group').apply(
                    lambda x: x[target_cols].values))).astype(np.float32)
        train_tr_list.append(train_tr)
        del train_tr
        gc.collect()

    start = time()
    for i in range(len(train_tr_list)):
        np.savez_compressed('train_tr_{}'.format(i), a=train_tr_list[i])
    print(f'Took {time() - start} to clock')
    del train_tr_list
    gc.collect()
    train_tr = [
        np.load('/kaggle/working/train_tr_{}.npz'.format(i)) for i in range(4)
    ]  #The compressed targets!

    train = np.array(
        list(train.groupby('group').apply(lambda x: x[feats].values)))
    test = np.array(
        list(test.groupby('group').apply(lambda x: x[feats].values)))
    Training_df = []
    for n_fold, (tr_idx, val_idx, val_orig_idx) in enumerate(new_splits[0:],
                                                             start=0):
        train_x = train[tr_idx]
        train_y = [train_tr[i]['a'][tr_idx] for i in range(len(train_tr))]
        valid_x = train[val_idx]
        valid_y = [train_tr[i]['a'][val_idx] for i in range(len(train_tr))]
        print(f'Our training dataset shape is {train_x.shape}')
        print(f'Our validation dataset shape is {valid_x.shape}')

        gc.collect()
        shape_ = (
            None, train_x.shape[2]
        )  # input is going to be the number of feature we are using (dimension 2 of 0, 1, 2)
        model = Classifier(shape_, args)
        # using our lr_schedule function
        cb_lr_schedule = LearningRateScheduler(lr_schedule)
        H = model.fit(
            train_x,
            train_y,
            epochs=nn_epochs,
            callbacks=[
                cb_lr_schedule,
                MacroF1(model, train_x, train_y, valid_x, valid_y)
            ],  # adding custom evaluation metric for each epoch
            batch_size=nn_batch_size,
            verbose=2,
            validation_data=(valid_x, valid_y))
        preds_f = model.predict(valid_x)
        preds_f = preds_f[0]
        #f1_score_ = f1_score(np.argmax(valid_y, axis=2).reshape(-1),  np.argmax(preds_f, axis=2).reshape(-1), average = 'macro') # need to get the class with the biggest probability
        print('Training fold {} completed. macro f1 score : {:1.5f}'.format(
            n_fold + 1, H.history['F1_val'][-1]))
        preds_f = preds_f.reshape(-1, preds_f.shape[-1])
        oof_[val_orig_idx, :] += preds_f
        te_preds = model.predict(test)
        te_preds = te_preds[0]
        model.save("model-wavenet_fold{}.h5".format(n_fold + 1))
        te_preds = te_preds.reshape(-1, te_preds.shape[-1])
        preds_ += te_preds / args['Folds']

        #Creating a dataframe of the training dynamics of this fold
        df = pd.DataFrame.from_dict(H.history)
        df['Fold'] = [n_fold] * df.shape[0]
        Training_df.append(df)

        #Getting some space in memory
        del ([model, train_x, train_y, valid_x, valid_y])
        gc.collect()
    print('Training completed...')
    print(f'Training time: {time() - training_time}')
    # calculate the oof macro f1_score
    print('Collection final submissions...')
    f1_score_ = f1_score(
        np.argmax(train_tr[0]['a'], axis=2).reshape(-1),
        np.argmax(oof_, axis=1),
        average='macro'
    )  # axis 2 for the 3 Dimension array and axis 1 for the 2 Domension Array (extracting the best class)
    print(f'Training completed. oof macro f1 score : {f1_score_:1.5f}')
    sample_submission['open_channels'] = np.argmax(preds_, axis=1).astype(int)
    sample_submission.to_csv('submission_wavenet.csv',
                             index=False,
                             float_format='%.4f')

    #create the datafrane for graphing training dynamics
    Training_dynamics = pd.concat(Training_df)
    Training_dynamics.to_csv('Training_by_Epoch.csv', index=False)

    #Reducing the data footprint, compressing, and saving softmax probs
    # of val and test data as numpy compressed files
    save_start = time()
    oof_ = oof_.astype(np.float16)
    preds_ = preds_.astype(np.float16)
    #Saving the validation predictions and test predictions for a stacknet
    print('Saving Validation Probs and test Probs to npz')
    np.savez_compressed("Train_probs.npz", train_probs=oof_)
    np.savez_compressed("Test_probs.npz", test_probs=preds_)
    print('Done Saving.  Took {} seconds'.format(time() - save_start))