コード例 #1
0
ファイル: train.py プロジェクト: dwyatte/data-sci-bowl-2016
def train():
    """
    Training systole and diastole models.
    """
    print('Loading and compiling models...')
    model_systole = get_model()
    model_diastole = get_model()

    print('Loading training data...')
    X, y = load_train_data()

    print('Pre-processing images...')
    X = preprocess(X)

    # split to training and test
    X_train, y_train, X_test, y_test = split_data(X, y, split_ratio=0.2)

    nb_iter = 200
    epochs_per_iter = 1
    batch_size = 32
    calc_crps = 1  # calculate CRPS every n-th iteration (set to 0 if CRPS estimation is not needed)

    # remember min val. losses (best iterations), used as sigmas for submission
    min_val_loss_systole = sys.float_info.max
    min_val_loss_diastole = sys.float_info.max

    print('-'*50)
    print('Training...')
    print('-'*50)

    for i in range(nb_iter):
        print('-'*50)
        print('Iteration {0}/{1}'.format(i + 1, nb_iter))
        print('-'*50)

        print('Augmenting images - rotations')
        X_train_aug = rotation_augmentation(X_train, 15)
        print('Augmenting images - shifts')
        X_train_aug = shift_augmentation(X_train_aug, 0.1, 0.1)

        print('Fitting systole model...')
        hist_systole = model_systole.fit(X_train_aug, y_train[:, 0], shuffle=True, nb_epoch=epochs_per_iter,
                                         batch_size=batch_size, validation_data=(X_test, y_test[:, 0]))

        print('Fitting diastole model...')
        hist_diastole = model_diastole.fit(X_train_aug, y_train[:, 1], shuffle=True, nb_epoch=epochs_per_iter,
                                           batch_size=batch_size, validation_data=(X_test, y_test[:, 1]))

        # sigmas for predicted data, actually loss function values (RMSE)
        loss_systole = hist_systole.history['loss'][-1]
        loss_diastole = hist_diastole.history['loss'][-1]
        val_loss_systole = hist_systole.history['val_loss'][-1]
        val_loss_diastole = hist_diastole.history['val_loss'][-1]

        if calc_crps > 0 and i % calc_crps == 0:
            print('Evaluating CRPS...')
            pred_systole = model_systole.predict(X_train, batch_size=batch_size, verbose=1)
            pred_diastole = model_diastole.predict(X_train, batch_size=batch_size, verbose=1)
            val_pred_systole = model_systole.predict(X_test, batch_size=batch_size, verbose=1)
            val_pred_diastole = model_diastole.predict(X_test, batch_size=batch_size, verbose=1)

            # CDF for train and test data (actually a step function)
            cdf_train = real_to_cdf(np.concatenate((y_train[:, 0], y_train[:, 1])))
            cdf_test = real_to_cdf(np.concatenate((y_test[:, 0], y_test[:, 1])))

            # CDF for predicted data
            cdf_pred_systole = real_to_cdf(pred_systole, loss_systole)
            cdf_pred_diastole = real_to_cdf(pred_diastole, loss_diastole)
            cdf_val_pred_systole = real_to_cdf(val_pred_systole, val_loss_systole)
            cdf_val_pred_diastole = real_to_cdf(val_pred_diastole, val_loss_diastole)

            # evaluate CRPS on training data
            crps_train = crps(cdf_train, np.concatenate((cdf_pred_systole, cdf_pred_diastole)))
            print('CRPS(train) = {0}'.format(crps_train))

            # evaluate CRPS on test data
            crps_test = crps(cdf_test, np.concatenate((cdf_val_pred_systole, cdf_val_pred_diastole)))
            print('CRPS(test) = {0}'.format(crps_test))

        print('Saving weights...')
        # save weights so they can be loaded later
        model_systole.save_weights('weights_systole.hdf5', overwrite=True)
        model_diastole.save_weights('weights_diastole.hdf5', overwrite=True)

        # for best (lowest) val losses, save weights
        if val_loss_systole < min_val_loss_systole:
            min_val_loss_systole = val_loss_systole
            model_systole.save_weights('weights_systole_best.hdf5', overwrite=True)

        if val_loss_diastole < min_val_loss_diastole:
            min_val_loss_diastole = val_loss_diastole
            model_diastole.save_weights('weights_diastole_best.hdf5', overwrite=True)

        # save best (lowest) val losses in file (to be later used for generating submission)
        with open('val_loss.txt', mode='w+') as f:
            f.write(str(min_val_loss_systole))
            f.write('\n')
            f.write(str(min_val_loss_diastole))
コード例 #2
0
def train():
    """
    Training systole and diastole models.
    """

 	# Compile training and testing functions
    [model, train_fn, val_fn, predict_fn] = get_model()

    # Load training data
    print('Loading training data...')
    X, y = load_train_data()

    print('Pre-processing images...')
   # X = preprocess(X)

    # split to training and test
    X_train, y_train, X_test, y_test = split_data(X, y, split_ratio=0.2)
    y_train_systole                  = from_values_to_step_probability(y_train[:, 0], n_range=600) # ADDED
    y_train_diastole                 = from_values_to_step_probability(y_train[:, 1], n_range=600) # ADDED
    y_test_systole                   = from_values_to_step_probability(y_test[:, 0], n_range=600)  # ADDED
    y_test_diastole                  = from_values_to_step_probability(y_test[:, 1], n_range=600)  # ADDED

    # concatenate systole and diastole outputs
    y_train                          = np.concatenate((y_train_systole, y_train_diastole), axis=1)
    y_test                           = np.concatenate((y_test_systole, y_test_diastole), axis=1)

    nb_epoch = 200
    batch_size = 32
    calc_crps = 0  # calculate CRPS every n-th iteration (set to 0 if CRPS estimation is not needed)

    print('-'*50)
    print('Training...')
    print('-'*50)

    min_val_err  = sys.float_info.max

    for i in range(nb_epoch):
        print('-'*50)
        print('Iteration {0}/{1}'.format(i + 1, nb_epoch))
        print('-'*50)

        print('Augmenting images - rotations')
        X_train_aug = rotation_augmentation(X_train, 15)
        print('Augmenting images - shifts')
        X_train_aug = shift_augmentation(X_train_aug, 0.1, 0.1)

        # In each epoch, we do a full pass over the training data:
        print('Fitting model...')
        train_err     = 0
        train_batches = 0
        for batch in iterate_minibatches(X_train_aug, y_train, batch_size, shuffle=True):
            inputs, targets     = batch
            train_err          += train_fn(inputs, targets)
            train_batches      += 1

        # And a full pass over the validation data:
        val_err     = 0
        val_batches = 0
        for batch in iterate_minibatches(X_test, y_test, batch_size, shuffle=False):
            inputs, targets     = batch
            val_err            += val_fn(inputs, targets)
            val_batches        += 1

        assert(calc_crps == 0)
        if calc_crps > 0 and i % calc_crps == 0:
            print('Evaluating CRPS...')
            train_pred        = predict_fn(X_train)
            val_pred          = predict_fn(X_test)

            pred_systole      = train_pred[:600]
            pred_diastole     = train_pred[600:]


            # CDF for train and test prediction
            pred_systole      = np.cumsum(pred_systole, axis=1)
            pred_diastole     = np.cumsum(pred_diastole, axis=1)
            val_pred_systole  = np.cumsum(val_pred_systole, axis=1)
            val_pred_diastole = np.cumsum(val_pred_diastole, axis=1)

            # evaluate CRPS on training data
            crps_train = crps(y_train, np.concatenate((pred_systole)))
            print('CRPS(train) = {0}'.format(crps_train))

            # evaluate CRPS on test data
            crps_test = crps(cdf_test, np.concatenate((val_pred_systole, val_pred_diastole)))
            print('CRPS(test) = {0}'.format(crps_test))

        print('Saving weights...')
        # save weights so they can be loaded later
        np.savez('weights.hdf5.npz', *get_all_param_values(model))

        # for best (lowest) val losses, save weights
        if val_err < min_val_err:
            min_val_err = val_err
            np.savez('weights_best.hdf5.npz', *get_all_param_values(model))

        # save best (lowest) val losses in file (to be later used for generating submission)
        with open('val_loss.txt', mode='w+') as f:
            f.write(str(min_val_err))
コード例 #3
0
def train():
    """
    Training systole and diastole models.
    """
    logging.info('Loading and compiling models...')
    model_systole = get_model()
    model_diastole = get_model()

    logging.info('Loading training data...')
    X, y = load_train_data()

    logging.info('Pre-processing images...')
    X = preprocess(X)

    # split to training and test
    X_train, y_train, X_test, y_test = split_data(X, y, split_ratio=0.15)

    nb_iter = 600
    epochs_per_iter = 1
    batch_size = 32
    calc_crps = 1  # calculate CRPS every n-th iteration (set to 0 if CRPS estimation is not needed)

    # remember min val. losses (best iterations), used as sigmas for submission
    min_val_loss_systole = sys.float_info.max
    min_val_loss_diastole = sys.float_info.max

    logging.info('-'*50)
    logging.info('Training...')
    logging.info('-'*50)

    for i in range(nb_iter):
        logging.info('-'*50)
        logging.info('Iteration {0}/{1}'.format(i + 1, nb_iter))
        logging.info('-'*50)

        logging.info('Augmenting images - rotations')
        X_train_aug = rotation_augmentation(X_train, 20)
        logging.info('Augmenting images - shifts')
        X_train_aug = shift_augmentation(X_train_aug, 0.1, 0.1)

        logging.info('Fitting systole model...')
        hist_systole = model_systole.fit(X_train_aug, y_train[:, 0], shuffle=True, nb_epoch=epochs_per_iter,
                                         batch_size=batch_size, validation_data=(X_test, y_test[:, 0]))

        logging.info('Fitting diastole model...')
        hist_diastole = model_diastole.fit(X_train_aug, y_train[:, 1], shuffle=True, nb_epoch=epochs_per_iter,
                                           batch_size=batch_size, validation_data=(X_test, y_test[:, 1]))

        # sigmas for predicted data, actually loss function values (RMSE)
        loss_systole = hist_systole.history['loss'][-1]
        loss_diastole = hist_diastole.history['loss'][-1]
        val_loss_systole = hist_systole.history['val_loss'][-1]
        val_loss_diastole = hist_diastole.history['val_loss'][-1]

        if calc_crps > 0 and i % calc_crps == 0:
            logging.info('Evaluating CRPS...')
            pred_systole = model_systole.predict(X_train, batch_size=batch_size, verbose=1)
            pred_diastole = model_diastole.predict(X_train, batch_size=batch_size, verbose=1)
            val_pred_systole = model_systole.predict(X_test, batch_size=batch_size, verbose=1)
            val_pred_diastole = model_diastole.predict(X_test, batch_size=batch_size, verbose=1)

            # CDF for train and test data (actually a step function)
            cdf_train = real_to_cdf(np.concatenate((y_train[:, 0], y_train[:, 1])))
            cdf_test = real_to_cdf(np.concatenate((y_test[:, 0], y_test[:, 1])))

            # CDF for predicted data
            cdf_pred_systole = real_to_cdf(pred_systole, loss_systole)
            cdf_pred_diastole = real_to_cdf(pred_diastole, loss_diastole)
            cdf_val_pred_systole = real_to_cdf(val_pred_systole, val_loss_systole)
            cdf_val_pred_diastole = real_to_cdf(val_pred_diastole, val_loss_diastole)

            # evaluate CRPS on training data
            crps_train = crps(cdf_train, np.concatenate((cdf_pred_systole, cdf_pred_diastole)))
            logging.info('CRPS(train) = {0}'.format(crps_train))

            # evaluate CRPS on test data
            crps_test = crps(cdf_test, np.concatenate((cdf_val_pred_systole, cdf_val_pred_diastole)))
            logging.info('CRPS(test) = {0}'.format(crps_test))

        logging.info('Saving weights...')
        # save weights so they can be loaded later
        model_systole.save_weights('../models/weights/weights_systole.hdf5', overwrite=True)
        model_diastole.save_weights('../models/weights/weights_diastole.hdf5', overwrite=True)

        # for best (lowest) val losses, save weights
        if val_loss_systole < min_val_loss_systole:
            min_val_loss_systole = val_loss_systole
            model_systole.save_weights('../models/weights/weights_systole_best.hdf5', overwrite=True)

        if val_loss_diastole < min_val_loss_diastole:
            min_val_loss_diastole = val_loss_diastole
            model_diastole.save_weights('../models/weights/weights_diastole_best.hdf5', overwrite=True)

        # save best (lowest) val losses in file (to be later used for generating submission)
        with open('./logs/val_loss.txt', mode='w+') as f:
            f.write(str(min_val_loss_systole))
            f.write('\n')
            f.write(str(min_val_loss_diastole))
コード例 #4
0
def train():
    """
    Training systole and diastole models.
    """

 	# Compile training and testing functions
    [model_sys, train_fn_sys, val_fn_sys, predict_fn_sys] = get_model()
    [model_dia, train_fn_dia, val_fn_dia, predict_fn_dia] = get_model()

    # Load training data
    print('Loading training data...')
    X, y = load_train_data()

    print('Pre-processing images...')
   # X = preprocess(X)

    # split to training and test
    X_train, y_train, X_test, y_test = split_data(X, y, split_ratio=0.2)
    y_train_systole                  = from_values_to_step_probability(y_train[:, 0], n_range=600) # ADDED
    y_train_diastole                 = from_values_to_step_probability(y_train[:, 1], n_range=600) # ADDED
    y_test_systole                   = from_values_to_step_probability(y_test[:, 0], n_range=600)  # ADDED
    y_test_diastole                  = from_values_to_step_probability(y_test[:, 1], n_range=600)  # ADDED

    nb_epoch = 200
    batch_size = 32
    calc_crps = 0  # calculate CRPS every n-th iteration (set to 0 if CRPS estimation is not needed)

    print('-'*50)
    print('Training...')
    print('-'*50)

    min_val_err_systole  = sys.float_info.max
    min_val_err_diastole = sys.float_info.max

    for i in range(nb_epoch):
        print('-'*50)
        print('Iteration {0}/{1}'.format(i + 1, nb_epoch))
        print('-'*50)

        print('Augmenting images - rotations')
        X_train_aug = rotation_augmentation(X_train, 15)
        print('Augmenting images - shifts')
        X_train_aug = shift_augmentation(X_train_aug, 0.1, 0.1)

        # In each epoch, we do a full pass over the training data:
        print('Fitting systole model...')
        train_err_sys     = 0
        train_batches_sys = 0
        for batch in iterate_minibatches(X_train_aug, y_train_systole, batch_size, shuffle=True):
            inputs, targets     = batch
            train_err_sys      += train_fn_sys(inputs, targets)
            train_batches_sys  += 1

        # And a full pass over the validation data:
        val_err_sys     = 0
        val_batches_sys = 0
        for batch in iterate_minibatches(X_test, y_test_systole, batch_size, shuffle=False):
            inputs, targets = batch
            val_err_sys     += val_fn_sys(inputs, targets)
            val_batches_sys += 1

        print('Systole model error evaluation : {0}'.format(val_err_sys))
        print('Fitting diastole model...')
        train_err_dia     = 0
        train_batches_dia = 0
        for batch in iterate_minibatches(X_train_aug, y_train_diastole, batch_size, shuffle=True):
            inputs, targets    = batch
            train_err_dia     += train_fn_dia(inputs, targets)
            train_batches_dia += 1

        # And a full pass over the validation data:
        val_err_dia     = 0
        val_batches_dia = 0
        for batch in iterate_minibatches(X_test, y_test_diastole, batch_size, shuffle=False):
            inputs, targets  = batch
            val_err_dia     += val_fn_dia(inputs, targets)
            val_batches_dia += 1
        print('Diastole model error evaluation : {0}'.format(val_err_dia))

        if calc_crps > 0 and i % calc_crps == 0:
            print('Evaluating CRPS...')
            pred_systole      = predict_fn_sys(X_train)
            pred_diastole     = predict_fn_dia(X_train)
            val_pred_systole  = predict_fn_sys(X_test)
            val_pred_diastole = predict_fn_dia(X_test)

            # CDF for train and test data (actually a step function)
            cdf_train = np.concatenate((y_train_systole, y_train_diastole))
            cdf_test  = np.concatenate((y_test_systole, y_test_diastole))

            # CDF for train and test prediction
            pred_systole      = np.cumsum(pred_systole, axis=1)
            pred_diastole     = np.cumsum(pred_diastole, axis=1)
            val_pred_systole  = np.cumsum(val_pred_systole, axis=1)
            val_pred_diastole = np.cumsum(val_pred_diastole, axis=1)

            # evaluate CRPS on training data
            crps_train = crps(cdf_train, np.concatenate((pred_systole, pred_diastole)))
            print('CRPS(train) = {0}'.format(crps_train))

            # evaluate CRPS on test data
            crps_test = crps(cdf_test, np.concatenate((val_pred_systole, val_pred_diastole)))
            print('CRPS(test) = {0}'.format(crps_test))

        print('Saving weights...')
        # save weights so they can be loaded later
        np.savez('weights_systole.hdf5.npz', *get_all_param_values(model_sys))
        np.savez('weights_diastole.hdf5.npz', *get_all_param_values(model_dia))

        # for best (lowest) val losses, save weights
        if val_err_sys < min_val_err_systole:
            min_val_err_systole = val_err_sys
            np.savez('weights_systole_best.hdf5.npz', *get_all_param_values(model_sys))

        if val_err_dia < min_val_err_diastole:
            min_val_err_diastole = val_err_dia
            np.savez('weights_diastole_best.hdf5.npz', *get_all_param_values(model_dia))

        # save best (lowest) val losses in file (to be later used for generating submission)
        with open('val_loss.txt', mode='w+') as f:
            f.write(str(min_val_err_systole))
            f.write('\n')
            f.write(str(min_val_err_diastole))
コード例 #5
0
ファイル: train.py プロジェクト: tfjgeorge/kaggle-heart
def train():
    """
    Training model.
    """

 	# Compile training and testing functions
    [model, train_fn, val_fn, predict_fn] = get_model()

    # Load training data
    print('Loading training data...')
    X, y = load_train_data()

    #print('Pre-processing images...')
    #X = preprocess(X)

    # split to training and test
    X_train, y_train, X_test, y_test = split_data(X, y, split_ratio=0.2)

    nb_epoch = 200
    batch_size = 32
    calc_crps = 0  # calculate CRPS every n-th iteration (set to 0 if CRPS estimation is not needed) NOT IMPLEMENTED YET

    print('-'*50)
    print('Training...')
    print('-'*50)

    min_val_err  = sys.float_info.max
    patience     = 0
    for i in range(nb_epoch):
        print('-'*50)
        print('Iteration {0}/{1}'.format(i + 1, nb_epoch))
        print('-'*50)

        print('Augmenting images - rotations')
        X_train_aug = rotation_augmentation(X_train, 15)
        print('Augmenting images - shifts')
        X_train_aug = shift_augmentation(X_train_aug, 0.1, 0.1)

        # In each epoch, we do a full pass over the training data:
        print('Fitting model...')
        train_err     = 0
        train_batches = 0
        for batch in iterate_minibatches(X_train_aug, y_train, batch_size, shuffle=True):
            inputs, targets     = batch
            train_err          += train_fn(inputs, targets)
            train_batches      += 1

        # And a full pass over the validation data:
        val_err     = 0
        val_batches = 0
        for batch in iterate_minibatches(X_test, y_test, batch_size, shuffle=False):
            inputs, targets     = batch
            val_err            += val_fn(inputs, targets)
            val_batches        += 1

        print('Saving weights...')
        # save weights so they can be loaded later
        # np.savez('weights.npz', *get_all_param_values(model))

        # for best (lowest) val losses, save weights
        if val_err < min_val_err:
            patience    = 0
            min_val_err = val_err
            np.savez('weights_best.npz', *get_all_param_values(model))
        else:
            patience   += 1

        print('error on validation set: ' + str(val_err))
        print('patience variable is: ' + str(patience))
        print('\n')
        
        # save best (lowest) val losses in file (to be later used for generating submission)
        with open('val_loss.txt', mode='a') as f:
            f.write(str(val_err))
            f.write('\n')
        
        if (patience == 8):
            break
コード例 #6
0
def train():
    print('Loading and compiling models...')
    model_systole = get_model()
    model_diastole = get_model()

    print('Loading training data...')
    X, y = load_train_data()

    print('Pre-processing images...')
    X = preprocess(X)

    X_train, y_train, X_test, y_test = split_data(X, y, split_ratio=0.2)

    nb_iter = 200
    epochs_per_iter = 1
    batch_size = 32
    calc_crps = 1  
    min_val_loss_systole = sys.float_info.max
    min_val_loss_diastole = sys.float_info.max

    print('-'*50)
    print('Training...')
    print('-'*50)

    for i in range(nb_iter):
        print('-'*50)
        print('Iteration {0}/{1}'.format(i + 1, nb_iter))
        print('-'*50)

        print('Augmenting images - rotations')
        X_train_aug = rotation_augmentation(X_train, 15)
        print('Augmenting images - shifts')
        X_train_aug = shift_augmentation(X_train_aug, 0.1, 0.1)

        print('Fitting systole model...')
        hist_systole = model_systole.fit(X_train_aug, y_train[:, 0], shuffle=True, nb_epoch=epochs_per_iter,
                                         batch_size=batch_size, validation_data=(X_test, y_test[:, 0]))

        print('Fitting diastole model...')
        hist_diastole = model_diastole.fit(X_train_aug, y_train[:, 1], shuffle=True, nb_epoch=epochs_per_iter,
                                           batch_size=batch_size, validation_data=(X_test, y_test[:, 1]))

    
        loss_systole = hist_systole.history['loss'][-1]
        loss_diastole = hist_diastole.history['loss'][-1]
        val_loss_systole = hist_systole.history['val_loss'][-1]
        val_loss_diastole = hist_diastole.history['val_loss'][-1]

        if calc_crps > 0 and i % calc_crps == 0:
            print('Evaluating CRPS...')
            pred_systole = model_systole.predict(X_train, batch_size=batch_size, verbose=1)
            pred_diastole = model_diastole.predict(X_train, batch_size=batch_size, verbose=1)
            val_pred_systole = model_systole.predict(X_test, batch_size=batch_size, verbose=1)
            val_pred_diastole = model_diastole.predict(X_test, batch_size=batch_size, verbose=1)

            cdf_train = real_to_cdf(np.concatenate((y_train[:, 0], y_train[:, 1])))
            cdf_test = real_to_cdf(np.concatenate((y_test[:, 0], y_test[:, 1])))

            cdf_pred_systole = real_to_cdf(pred_systole, loss_systole)
            cdf_pred_diastole = real_to_cdf(pred_diastole, loss_diastole)
            cdf_val_pred_systole = real_to_cdf(val_pred_systole, val_loss_systole)
            cdf_val_pred_diastole = real_to_cdf(val_pred_diastole, val_loss_diastole)

            crps_train = crps(cdf_train, np.concatenate((cdf_pred_systole, cdf_pred_diastole)))
            print('CRPS(train) = {0}'.format(crps_train))

            crps_test = crps(cdf_test, np.concatenate((cdf_val_pred_systole, cdf_val_pred_diastole)))
            print('CRPS(test) = {0}'.format(crps_test))

        print('Saving weights...')
        model_systole.save_weights('weights_systole.hdf5', overwrite=True)
        model_diastole.save_weights('weights_diastole.hdf5', overwrite=True)

        if val_loss_systole < min_val_loss_systole:
            min_val_loss_systole = val_loss_systole
            model_systole.save_weights('weights_systole_best.hdf5', overwrite=True)

        if val_loss_diastole < min_val_loss_diastole:
            min_val_loss_diastole = val_loss_diastole
            model_diastole.save_weights('weights_diastole_best.hdf5', overwrite=True)

    
        with open('val_loss.txt', mode='w+') as f:
            f.write(str(min_val_loss_systole))
            f.write('\n')
            f.write(str(min_val_loss_diastole))
コード例 #7
0
def train():
    "training ONE model for systole and diastole "
    print('Loading and compiling models...')
    model_systole = get_model3()

    print('Loading models weights...')
    model_systole.load_weights('weights_systole_best.hdf5')
    
    print('Loading training data...')
    X, y = load_train_data()

    print('Pre-processing images...')			# denoising filter
    X = preprocess(X)

    # split to training and test
    X_train, y_train, X_test, y_test = split_data(X, y, split_ratio=0.2)

    # save test subset
    with open('y_test.txt', mode='w+') as f:
    	f.write(str(y_test))
    	f.write('\n')

    nb_iter = 20
    epochs_per_iter = 1
    batch_size = 32					# if too small, will converge to unreliable models
    								# if too big, it wont fit into memory
    
    calc = 4  # Every n-th iteration (0 if not needed)
    
    # apply function to rotate the images
    print('Augmenting images - rotations')
    X_train_aug = rotation_augmentation(X_train, 15)
    
    # apply function to shift the images
    print('Augmenting images - shifts')
    X_train_aug = shift_augmentation(X_train_aug, 0.1, 0.1)    
    
    # remember min val. losses (best iterations)
    min_val_loss_systole = sys.float_info.max	
							
    print('-'*50)
    print('Training...')
    print('-'*50)

    for i in range(nb_iter):
        print('-'*50)
        print('Iteration {0}/{1}'.format(i + 1, nb_iter))
        print('-'*50)


        print('Fitting diastole/systole model...')
        hist_systole = model_systole.fit(X_train_aug, y_train[:, :], shuffle=True, nb_epoch=epochs_per_iter,
                                         batch_size=batch_size, validation_data=(X_test, y_test[:, :]))

   	# 	loss function values (RMSE)
        
        loss_last = hist_systole.history['loss'][-1]			# one number for the iter
        val_loss_last = hist_systole.history['val_loss'][-1]
		
									# since hist_systole.history['loss'] returns an array
									# pick the last value with [-1]
        
        loss = hist_systole.history['loss'][:]					# all iter
        val_loss = hist_systole.history['val_loss'][:]

        
        with open('loss_last.txt', mode='a') as f:
        	f.write(str(loss_last))
        	f.write('\n')

        with open('val_loss_last.txt', mode='a') as f:
        	f.write(str(val_loss_last))
        	f.write('\n')        	  

        with open('loss.txt', mode='a') as f:
        	f.write(str(loss))
        	f.write('\n')

        with open('val_loss.txt', mode='a') as f:
        	f.write(str(val_loss))
        	f.write('\n')       


# usually accuracy = correct predictions / total predictions

# using RMSE as a loss function, means if value of loss function is 20 
# - that is an indicator the model usually misses the true value by ~20ml

        if calc > 0 and i % calc == 0:
            print('Getting predictions...')
            pred_systole = model_systole.predict(X_train, batch_size=batch_size, verbose=1)
            															# npy 1283 x 2
            val_pred_systole = model_systole.predict(X_test, batch_size=batch_size, verbose=1)
            															# npy 320 x 2 
            # save predictions
            
            with open('pred_systole.txt', mode='a') as f:
            	f.write(str(pred_systole))
            	f.write('\n')
            	
            with open('val_pred_systole.txt', mode='a') as f:
            	f.write(str(val_pred_systole))
            	f.write('\n')

   	# 	save weights so they can be loaded later
		
        print('Saving weights...')     
        model_systole.save_weights('weights_systole.hdf5', overwrite=True)

   	# 	for best (lowest) val losses, save weights
        
        if val_loss_last < min_val_loss_systole:
            min_val_loss_systole = val_loss_last
            model_systole.save_weights('weights_systole_best.hdf5', overwrite=True)

   	# 	save best (lowest) val losses in file (to be later used for submission)
        
        with open('min_val_loss.txt', mode='w+') as f:
            f.write(str(min_val_loss_systole))
            f.write('\n')
コード例 #8
0
def train():
    """
    Training systole and diastole models.
    """
    print('Loading and compiling models...')
    model_systole = get_model(img_size)
    model_diastole = get_model(img_size)

    print('Loading training data...')
    X, y = load_train_data()

    print('Pre-processing images...')
    X = preprocess(X)

    # split to training and test
    X_train, y_train, X_test, y_test = split_data(X, y, split_ratio=0.2)

    # define image generator for random rotations
    datagen = ImageDataGenerator(featurewise_center=False,
                                 featurewise_std_normalization=False,
                                 rotation_range=15)

    nb_iter = 300
    epochs_per_iter = 1
    batch_size = 64
    calc_crps = 1  # calculate CRPS every n-th iteration (set to 0 if CRPS estimation is not needed)

    # remember min val. losses (best iterations), used as sigmas for submission
    min_val_loss_systole = sys.float_info.max
    min_val_loss_diastole = sys.float_info.max

    if not os.path.exists(STATS):
        os.makedirs(STATS)

    with open(STATS + 'RMSE_CRPS.txt', 'w') as f:
        names = ['train_RMSE_d', 'train_RMSE_s', 'test_RMSE_d', 'test_RMSE_s', 'train_crps', 'test_crps']
        f.write('\t'.join([str(name) for name in names]) + '\n')

    print('-'*50)
    print('Training...')
    print('-'*50)

    for i in range(nb_iter):
        print('-'*50)
        print('Iteration {0}/{1}'.format(i + 1, nb_iter))
        print('-'*50)

        print('Augmenting images - rotations')
        X_train_aug = rotation_augmentation(X_train, 15)
        print('Augmenting images - shifts')
        X_train_aug = shift_augmentation(X_train_aug, 0.1, 0.1)
        print('Fitting systole model...')
        hist_systole = model_systole.fit(X_train_aug, y_train[:, 0], shuffle=True, nb_epoch=epochs_per_iter, batch_size=batch_size, validation_data=(X_test, y_test[:, 0]))
        print('Fitting diastole model...')

        hist_diastole = model_diastole.fit(X_train_aug, y_train[:, 1], shuffle=True, nb_epoch=epochs_per_iter, batch_size=batch_size, validation_data=(X_test, y_test[:, 1]))

        # sigmas for predicted data, actually loss function values (RMSE)
        loss_systole = hist_systole.history['loss'][-1]
        loss_diastole = hist_diastole.history['loss'][-1]
        val_loss_systole = hist_systole.history['val_loss'][-1]
        val_loss_diastole = hist_diastole.history['val_loss'][-1]

        if calc_crps > 0 and i % calc_crps == 0:
            print('Evaluating CRPS...')
            pred_systole = model_systole.predict(X_train, batch_size=batch_size, verbose=1)
            pred_diastole = model_diastole.predict(X_train, batch_size=batch_size, verbose=1)
            val_pred_systole = model_systole.predict(X_test, batch_size=batch_size, verbose=1)
            val_pred_diastole = model_diastole.predict(X_test, batch_size=batch_size, verbose=1)

            # CDF for train and test data (actually a step function)
            cdf_train = real_to_cdf(np.concatenate((y_train[:, 0], y_train[:, 1])))
            cdf_test = real_to_cdf(np.concatenate((y_test[:, 0], y_test[:, 1])))

            # CDF for predicted data
            cdf_pred_systole = real_to_cdf(pred_systole, loss_systole)
            cdf_pred_diastole = real_to_cdf(pred_diastole, loss_diastole)
            cdf_val_pred_systole = real_to_cdf(val_pred_systole, val_loss_systole)
            cdf_val_pred_diastole = real_to_cdf(val_pred_diastole, val_loss_diastole)

            # evaluate CRPS on training data
            crps_train = crps(cdf_train, np.concatenate((cdf_pred_systole, cdf_pred_diastole)))
            print('CRPS(train) = {0}'.format(crps_train))

            # evaluate CRPS on test data
            crps_test = crps(cdf_test, np.concatenate((cdf_val_pred_systole, cdf_val_pred_diastole)))
            print('CRPS(test) = {0}'.format(crps_test))

        print('Saving weights...')


        # save weights so they can be loaded later
        model_systole.save_weights(MODELS + 'weights_systole.hdf5', overwrite=True)
        model_diastole.save_weights(MODELS + 'weights_diastole.hdf5', overwrite=True)

        # for best (lowest) val losses, save weights
        if val_loss_systole < min_val_loss_systole:
            min_val_loss_systole = val_loss_systole
            model_systole.save_weights(MODELS + 'weights_systole_best.hdf5', overwrite=True)

        if val_loss_diastole < min_val_loss_diastole:
            min_val_loss_diastole = val_loss_diastole
            model_diastole.save_weights(MODELS + 'weights_diastole_best.hdf5', overwrite=True)

        # save best (lowest) val losses in file (to be later used for generating submission)
        with open(MODELS + 'val_loss.txt', mode='w+') as f:
            f.write(str(min_val_loss_systole))
            f.write('\n')
            f.write(str(min_val_loss_diastole))

        with open(STATS + 'RMSE_CRPS.txt', 'a') as f:
            # train_RMSE_d train_RMSE_s test_RMSE_d test_RMSE_s train_crps test_crps
            rmse_values = [loss_diastole, loss_systole, val_loss_diastole, val_loss_systole]
            crps_values = [crps_train, crps_test]
            f.write('\t'.join([str(val) for val in rmse_values + crps_values]) + '\n')

        print('Saving stats images...')
        write_images(STATS)
        
        if (i != 0) & ((i + 1) % 100 == 0):
	    print('Submitting learned model....')
            SUBMISSION_FOLDER = SUBMISSION + preproc_type + "/" + model_name + "/" + get_name() + "_ITERS" + str(i + 1) + "/" 
            if not os.path.exists(SUBMISSION_FOLDER):
                os.makedirs(SUBMISSION_FOLDER)
            copyfile(MODELS + 'weights_systole_best.hdf5', SUBMISSION_FOLDER + 'weights_systole_best.hdf5')
            copyfile(MODELS + 'weights_diastole_best.hdf5', SUBMISSION_FOLDER + 'weights_diastole_best.hdf5')
            copyfile(MODELS + 'val_loss.txt', SUBMISSION_FOLDER + 'val_loss.txt')
            os.system('python submission.py %s %s %s' % (preproc_type, model_name, SUBMISSION_FOLDER))
コード例 #9
0
def train():
    """
    Training systole and diastole models.
    """
    print('Loading and compiling models...')
    model_systole = get_model()
    model_diastole = get_model()

    print('Loading training data...')
    X, y = load_train_data()

    print('Pre-processing images...')
    X = preprocess(X)

    # split to training and test
    X_train, y_train, X_test, y_test = split_data(X, y, split_ratio=0.2)

    #Iteraties was 200
    nb_iter = 250

    epochs_per_iter = 1

    ## Batch-size was 32, ivm processing op laptop heb ik hiervan 12 gemaakt voor test #3
    batch_size = 12
    calc_crps = 1  # calculate CRPS every n-th iteration (set to 0 if CRPS estimation is not needed)

    # remember min val. losses (best iterations), used as sigmas for submission
    min_val_loss_systole = sys.float_info.max
    min_val_loss_diastole = sys.float_info.max

    print('-' * 50)
    print('Training...')
    start = datetime.now()

    print('-' * 50)

    for i in range(nb_iter):
        print('-' * 50)
        print('Iteration {0}/{1}'.format(i + 1, nb_iter))
        print('-' * 50)

        print('Augmenting images - rotations')
        X_train_aug = rotation_augmentation(X_train, 15)
        print('Augmenting images - shifts')
        X_train_aug = shift_augmentation(X_train_aug, 0.1, 0.1)

        csv_logger_diastole = CSVLogger('training_diastole.log',
                                        append=True,
                                        separator=';')
        csv_logger_systole = CSVLogger('training_systole.log',
                                       append=True,
                                       separator=';')

        print('Fitting systole model...')
        hist_systole = model_systole.fit(X_train_aug,
                                         y_train[:, 0],
                                         shuffle=True,
                                         nb_epoch=epochs_per_iter,
                                         batch_size=batch_size,
                                         validation_data=(X_test, y_test[:,
                                                                         0]),
                                         callbacks=[csv_logger_systole])

        print('Fitting diastole model...')
        hist_diastole = model_diastole.fit(X_train_aug,
                                           y_train[:, 1],
                                           shuffle=True,
                                           nb_epoch=epochs_per_iter,
                                           batch_size=batch_size,
                                           validation_data=(X_test, y_test[:,
                                                                           1]),
                                           callbacks=[csv_logger_diastole])

        dialoss_history = hist_diastole.history["loss"]
        sysloss_history = hist_systole.history["loss"]

        import numpy
        numpy_dialoss_history = numpy.array(dialoss_history)
        numpy_sysloss_history = numpy.array(sysloss_history)
        numpy.savetxt("dialoss_history.txt",
                      numpy_dialoss_history,
                      delimiter=",")
        numpy.savetxt("sysloss_history.txt",
                      numpy_sysloss_history,
                      delimiter=",")

        # sigmas for predicted data, actually loss function values (RMSE)
        loss_systole = hist_systole.history['loss'][-1]
        loss_diastole = hist_diastole.history['loss'][-1]
        val_loss_systole = hist_systole.history['val_loss'][-1]
        val_loss_diastole = hist_diastole.history['val_loss'][-1]

        if calc_crps > 0 and i % calc_crps == 0:
            print('Evaluating CRPS...')
            pred_systole = model_systole.predict(X_train,
                                                 batch_size=batch_size,
                                                 verbose=1)
            pred_diastole = model_diastole.predict(X_train,
                                                   batch_size=batch_size,
                                                   verbose=1)
            val_pred_systole = model_systole.predict(X_test,
                                                     batch_size=batch_size,
                                                     verbose=1)
            val_pred_diastole = model_diastole.predict(X_test,
                                                       batch_size=batch_size,
                                                       verbose=1)

            ## DEZE BOVENSTAANDE VALUES ZIJN DE RESULTATEN ##

            ## try

            ##    accuracy_systole = pred_systole - val_pred_systole

            print("Pred_diastole:")
            print(pred_diastole)

            print("Pred_systole:")
            print(pred_systole)

            print("Val_pred_sysyole:")
            print(val_pred_systole)

            print("Val_pred_diastole:")
            print(val_pred_diastole)

            # CDF for train and test data (actually a step function)
            cdf_train = real_to_cdf(
                np.concatenate((y_train[:, 0], y_train[:, 1])))
            cdf_test = real_to_cdf(np.concatenate((y_test[:, 0], y_test[:,
                                                                        1])))

            # CDF for predicted data
            cdf_pred_systole = real_to_cdf(pred_systole, loss_systole)
            cdf_pred_diastole = real_to_cdf(pred_diastole, loss_diastole)
            cdf_val_pred_systole = real_to_cdf(val_pred_systole,
                                               val_loss_systole)
            cdf_val_pred_diastole = real_to_cdf(val_pred_diastole,
                                                val_loss_diastole)

            # evaluate CRPS on training data
            crps_train = crps(
                cdf_train, np.concatenate(
                    (cdf_pred_systole, cdf_pred_diastole)))
            print('CRPS(train) = {0}'.format(crps_train))

            # evaluate CRPS on test data
            crps_test = crps(
                cdf_test,
                np.concatenate((cdf_val_pred_systole, cdf_val_pred_diastole)))
            print('CRPS(test) = {0}'.format(crps_test))
            """ BEGIN PLOTTING RESULTS """

            import matplotlib.pyplot as plt
            import numpy

        #    score = model_systole.evaluate(X_test, Y_test, verbose=0)
        #    print("Score systole")
        #    print(score)
        """ EIND PLOTTING RESULT """

        print('Saving weights...')
        # save weights so they can be loaded later
        model_systole.save_weights('weights_systole.hdf5', overwrite=True)
        model_diastole.save_weights('weights_diastole.hdf5', overwrite=True)

        # for best (lowest) val losses, save weights
        if val_loss_systole < min_val_loss_systole:
            min_val_loss_systole = val_loss_systole
            model_systole.save_weights('weights_systole_best.hdf5',
                                       overwrite=True)

        if val_loss_diastole < min_val_loss_diastole:
            min_val_loss_diastole = val_loss_diastole
            model_diastole.save_weights('weights_diastole_best.hdf5',
                                        overwrite=True)

            ##Start accuracy plot for systole and diastole

            ## pyplot.plot(history.history['acc'])
            ##pyplot.show()

        # save best (lowest) val losses in file (to be later used for generating submission)
        with open('val_loss.txt', mode='w+') as f:
            f.write(str(min_val_loss_systole))
            f.write('\n')
            f.write(str(min_val_loss_diastole))
コード例 #10
0
ファイル: train.py プロジェクト: tfjgeorge/kaggle-heart
def train():
    """
    Training model.
    """

    # Compile training and testing functions
    [model, train_fn, val_fn, predict_fn] = get_model()

    # Load training data
    print('Loading training data...')
    X, y = load_train_data()

    #print('Pre-processing images...')
    #X = preprocess(X)

    # split to training and test
    X_train, y_train, X_test, y_test = split_data(X, y, split_ratio=0.2)

    nb_epoch = 200
    batch_size = 32
    calc_crps = 0  # calculate CRPS every n-th iteration (set to 0 if CRPS estimation is not needed) NOT IMPLEMENTED YET

    print('-' * 50)
    print('Training...')
    print('-' * 50)

    min_val_err = sys.float_info.max
    patience = 0
    for i in range(nb_epoch):
        print('-' * 50)
        print('Iteration {0}/{1}'.format(i + 1, nb_epoch))
        print('-' * 50)

        print('Augmenting images - rotations')
        X_train_aug = rotation_augmentation(X_train, 15)
        print('Augmenting images - shifts')
        X_train_aug = shift_augmentation(X_train_aug, 0.1, 0.1)

        # In each epoch, we do a full pass over the training data:
        print('Fitting model...')
        train_err = 0
        train_batches = 0
        for batch in iterate_minibatches(X_train_aug,
                                         y_train,
                                         batch_size,
                                         shuffle=True):
            inputs, targets = batch
            train_err += train_fn(inputs, targets)
            train_batches += 1

        # And a full pass over the validation data:
        val_err = 0
        val_batches = 0
        for batch in iterate_minibatches(X_test,
                                         y_test,
                                         batch_size,
                                         shuffle=False):
            inputs, targets = batch
            val_err += val_fn(inputs, targets)
            val_batches += 1

        print('Saving weights...')
        # save weights so they can be loaded later
        # np.savez('weights.npz', *get_all_param_values(model))

        # for best (lowest) val losses, save weights
        if val_err < min_val_err:
            patience = 0
            min_val_err = val_err
            np.savez('weights_best.npz', *get_all_param_values(model))
        else:
            patience += 1

        print('error on validation set: ' + str(val_err))
        print('patience variable is: ' + str(patience))
        print('\n')

        # save best (lowest) val losses in file (to be later used for generating submission)
        with open('val_loss.txt', mode='a') as f:
            f.write(str(val_err))
            f.write('\n')

        if (patience == 8):
            break