Example #1
0
def submission():
    """
    Generate submission file for the trained models.
    """
    print('Loading and compiling models...')
    model_systole = get_model()
    model_diastole = get_model()

    print('Loading models weights...')
    model_systole.load_weights('weights_systole23.hdf5')
    model_diastole.load_weights('weights_diastole50.hdf5')

    # load val losses to use as sigmas for CDF
    with open('val_loss.txt', mode='r') as f:
        val_loss_systole = float(f.readline())
        val_loss_diastole = float(f.readline())

    print('Loading validation data...')
    X, ids = load_validation_data()

    #print('Pre-processing images...')
    #X = preprocess(X)

    batch_size = 32
    print('Predicting on validation data...')
    pred_systole = model_systole.predict(X, batch_size=batch_size, verbose=1)
    pred_diastole = model_diastole.predict(X, batch_size=batch_size, verbose=1)

    # real predictions to CDF
    cdf_pred_systole = real_to_cdf(pred_systole, val_loss_systole)
    cdf_pred_diastole = real_to_cdf(pred_diastole, val_loss_diastole)

    print('Accumulating results...')
    sub_systole = accumulate_study_results(ids, cdf_pred_systole)
    sub_diastole = accumulate_study_results(ids, cdf_pred_diastole)

    # write to submission file
    print('Writing submission to file...')
    fi = csv.reader(open('data/sample_submission_validate.csv'))
    f = open('submission.csv', 'w')
    fo = csv.writer(f, lineterminator='\n')
    fo.writerow(fi.next())
    for line in fi:
        idx = line[0]
        key, target = idx.split('_')
        key = int(key)
        out = [idx]
        if key in sub_systole:
            if target == 'Diastole':
                out.extend(list(sub_diastole[key][0]))
            else:
                out.extend(list(sub_systole[key][0]))
        else:
            print('Miss {0}'.format(idx))
        fo.writerow(out)
    f.close()

    print('Done.')
Example #2
0
def train():
    """
    Training systole and diastole models.
    """
    print('Loading and compiling models...')
    model_systole = get_model()
    model_diastole = get_model()

    print('Loading training data...')
    X, y = load_train_data()

    print('Pre-processing images...')
    X = preprocess(X)

    # split to training and test
    X_train, y_train, X_test, y_test = split_data(X, y, split_ratio=0.2)

    nb_iter = 200
    epochs_per_iter = 1
    batch_size = 32
    calc_crps = 1  # calculate CRPS every n-th iteration (set to 0 if CRPS estimation is not needed)

    # remember min val. losses (best iterations), used as sigmas for submission
    min_val_loss_systole = sys.float_info.max
    min_val_loss_diastole = sys.float_info.max

    print('-'*50)
    print('Training...')
    print('-'*50)

    for i in range(nb_iter):
        print('-'*50)
        print('Iteration {0}/{1}'.format(i + 1, nb_iter))
        print('-'*50)

        print('Augmenting images - rotations')
        X_train_aug = rotation_augmentation(X_train, 15)
        print('Augmenting images - shifts')
        X_train_aug = shift_augmentation(X_train_aug, 0.1, 0.1)

        print('Fitting systole model...')
        hist_systole = model_systole.fit(X_train_aug, y_train[:, 0], shuffle=True, nb_epoch=epochs_per_iter,
                                         batch_size=batch_size, validation_data=(X_test, y_test[:, 0]))

        print('Fitting diastole model...')
        hist_diastole = model_diastole.fit(X_train_aug, y_train[:, 1], shuffle=True, nb_epoch=epochs_per_iter,
                                           batch_size=batch_size, validation_data=(X_test, y_test[:, 1]))

        # sigmas for predicted data, actually loss function values (RMSE)
        loss_systole = hist_systole.history['loss'][-1]
        loss_diastole = hist_diastole.history['loss'][-1]
        val_loss_systole = hist_systole.history['val_loss'][-1]
        val_loss_diastole = hist_diastole.history['val_loss'][-1]

        if calc_crps > 0 and i % calc_crps == 0:
            print('Evaluating CRPS...')
            pred_systole = model_systole.predict(X_train, batch_size=batch_size, verbose=1)
            pred_diastole = model_diastole.predict(X_train, batch_size=batch_size, verbose=1)
            val_pred_systole = model_systole.predict(X_test, batch_size=batch_size, verbose=1)
            val_pred_diastole = model_diastole.predict(X_test, batch_size=batch_size, verbose=1)

            # CDF for train and test data (actually a step function)
            cdf_train = real_to_cdf(np.concatenate((y_train[:, 0], y_train[:, 1])))
            cdf_test = real_to_cdf(np.concatenate((y_test[:, 0], y_test[:, 1])))

            # CDF for predicted data
            cdf_pred_systole = real_to_cdf(pred_systole, loss_systole)
            cdf_pred_diastole = real_to_cdf(pred_diastole, loss_diastole)
            cdf_val_pred_systole = real_to_cdf(val_pred_systole, val_loss_systole)
            cdf_val_pred_diastole = real_to_cdf(val_pred_diastole, val_loss_diastole)

            # evaluate CRPS on training data
            crps_train = crps(cdf_train, np.concatenate((cdf_pred_systole, cdf_pred_diastole)))
            print('CRPS(train) = {0}'.format(crps_train))

            # evaluate CRPS on test data
            crps_test = crps(cdf_test, np.concatenate((cdf_val_pred_systole, cdf_val_pred_diastole)))
            print('CRPS(test) = {0}'.format(crps_test))

        print('Saving weights...')
        # save weights so they can be loaded later
        model_systole.save_weights('weights_systole.hdf5', overwrite=True)
        model_diastole.save_weights('weights_diastole.hdf5', overwrite=True)

        # for best (lowest) val losses, save weights
        if val_loss_systole < min_val_loss_systole:
            min_val_loss_systole = val_loss_systole
            model_systole.save_weights('weights_systole_best.hdf5', overwrite=True)

        if val_loss_diastole < min_val_loss_diastole:
            min_val_loss_diastole = val_loss_diastole
            model_diastole.save_weights('weights_diastole_best.hdf5', overwrite=True)

        # save best (lowest) val losses in file (to be later used for generating submission)
        with open('val_loss.txt', mode='w+') as f:
            f.write(str(min_val_loss_systole))
            f.write('\n')
            f.write(str(min_val_loss_diastole))
Example #3
0
def train(data_prefix, prefix, seed, run):
    """
    Training systole and diastole models.
    """
    print('Loading training data...')
    X, y = load_train_data(data_prefix, seed)

    print('Loading and compiling models...')
    model_systole = get_model()
    model_diastole = get_model()

    # split to training and test
    X_train, y_train, X_test, y_test = split_data(X, y, split_ratio=0.2)

    nb_iter = 200
    epochs_per_iter = 1
    batch_size = 32
    calc_crps = 1  # calculate CRPS every n-th iteration (set to 0 if CRPS estimation is not needed)

    # remember min val. losses (best iterations), used as sigmas for submission
    min_val_loss_systole = sys.float_info.max
    min_val_loss_diastole = sys.float_info.max

    print('-' * 50)
    print('Training...')
    print('-' * 50)

    datagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=
        False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range=
        15,  # randomly rotate images in the range (degrees, 0 to 180)
        width_shift_range=
        0.1,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=
        0.1,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=True,  # randomly flip images
        vertical_flip=True)  # randomly flip images

    # compute quantities required for featurewise normalization
    # (std, mean, and principal components if ZCA whitening is applied)

    datagen.fit(X_train)

    systole_checkpointer_best = ModelCheckpoint(filepath=prefix +
                                                "weights_systole_best.hdf5",
                                                verbose=1,
                                                save_best_only=True)
    diastole_checkpointer_best = ModelCheckpoint(filepath=prefix +
                                                 "weights_diastole_best.hdf5",
                                                 verbose=1,
                                                 save_best_only=True)
    systole_checkpointer = ModelCheckpoint(filepath=prefix +
                                           "weights_systole.hdf5",
                                           verbose=1,
                                           save_best_only=False)
    diastole_checkpointer = ModelCheckpoint(filepath=prefix +
                                            "weights_diastole.hdf5",
                                            verbose=1,
                                            save_best_only=False)

    if run == 0 or run == 1:
        print('Fitting Systole Shapes')
        hist_systole = model_systole.fit_generator(
            datagen.flow(X_train, y_train[:, 2], batch_size=batch_size),
            samples_per_epoch=X_train.shape[0],
            nb_epoch=nb_iter,
            show_accuracy=False,
            validation_data=(X_test, y_test[:, 2]),
            callbacks=[systole_checkpointer, systole_checkpointer_best],
            nb_worker=4)

    if run == 0 or run == 2:
        print('Fitting Diastole Shapes')
        hist_diastole = model_diastole.fit_generator(
            datagen.flow(X_train, y_train[:, 2], batch_size=batch_size),
            samples_per_epoch=X_train.shape[0],
            nb_epoch=nb_iter,
            show_accuracy=False,
            validation_data=(X_test, y_test[:, 2]),
            callbacks=[diastole_checkpointer, diastole_checkpointer_best],
            nb_worker=4)

    if run == 0 or run == 1:
        loss_systole = hist_systole.history['loss'][-1]
        val_loss_systole = hist_systole.history['val_loss'][-1]

    if run == 0 or run == 2:
        loss_diastole = hist_diastole.history['loss'][-1]
        val_loss_diastole = hist_diastole.history['val_loss'][-1]

    if calc_crps > 0 and run == 0:
        print('Evaluating CRPS...')
        pred_systole = model_systole.predict(X_train,
                                             batch_size=batch_size,
                                             verbose=1)
        val_pred_systole = model_systole.predict(X_test,
                                                 batch_size=batch_size,
                                                 verbose=1)

        pred_diastole = model_diastole.predict(X_train,
                                               batch_size=batch_size,
                                               verbose=1)
        val_pred_diastole = model_diastole.predict(X_test,
                                                   batch_size=batch_size,
                                                   verbose=1)

        # CDF for train and test data (actually a step function)
        cdf_train = real_to_cdf(np.concatenate((y_train[:, 0], y_train[:, 1])))
        cdf_test = real_to_cdf(np.concatenate((y_test[:, 0], y_test[:, 1])))

        # CDF for predicted data
        cdf_pred_systole = real_to_cdf(pred_systole, loss_systole)
        cdf_val_pred_systole = real_to_cdf(val_pred_systole, val_loss_systole)

        cdf_pred_diastole = real_to_cdf(pred_diastole, loss_diastole)
        cdf_val_pred_diastole = real_to_cdf(val_pred_diastole,
                                            val_loss_diastole)

        # evaluate CRPS on training data
        crps_train = crps(
            cdf_train, np.concatenate((cdf_pred_systole, cdf_pred_diastole)))
        print('CRPS(train) = {0}'.format(crps_train))

        # evaluate CRPS on test data
        crps_test = crps(
            cdf_test,
            np.concatenate((cdf_val_pred_systole, cdf_val_pred_diastole)))
        print('CRPS(test) = {0}'.format(crps_test))

    # save best (lowest) val losses in file (to be later used for generating submission)
    with open(prefix + 'val_loss.txt', mode='w+') as f:
        if run == 0 or run == 1:
            f.write(str(min(hist_systole.history['val_loss'])))
            f.write('\n')
        if run == 0 or run == 2:
            f.write(str(min(hist_diastole.history['val_loss'])))
Example #4
0
def train():
    """
    Training systole and diastole models.
    """
    logging.info('Loading and compiling models...')
    model_systole = get_model()
    model_diastole = get_model()

    logging.info('Loading training data...')
    X, y = load_train_data()

    logging.info('Pre-processing images...')
    X = preprocess(X)

    # split to training and test
    X_train, y_train, X_test, y_test = split_data(X, y, split_ratio=0.15)

    nb_iter = 600
    epochs_per_iter = 1
    batch_size = 32
    calc_crps = 1  # calculate CRPS every n-th iteration (set to 0 if CRPS estimation is not needed)

    # remember min val. losses (best iterations), used as sigmas for submission
    min_val_loss_systole = sys.float_info.max
    min_val_loss_diastole = sys.float_info.max

    logging.info('-'*50)
    logging.info('Training...')
    logging.info('-'*50)

    for i in range(nb_iter):
        logging.info('-'*50)
        logging.info('Iteration {0}/{1}'.format(i + 1, nb_iter))
        logging.info('-'*50)

        logging.info('Augmenting images - rotations')
        X_train_aug = rotation_augmentation(X_train, 20)
        logging.info('Augmenting images - shifts')
        X_train_aug = shift_augmentation(X_train_aug, 0.1, 0.1)

        logging.info('Fitting systole model...')
        hist_systole = model_systole.fit(X_train_aug, y_train[:, 0], shuffle=True, nb_epoch=epochs_per_iter,
                                         batch_size=batch_size, validation_data=(X_test, y_test[:, 0]))

        logging.info('Fitting diastole model...')
        hist_diastole = model_diastole.fit(X_train_aug, y_train[:, 1], shuffle=True, nb_epoch=epochs_per_iter,
                                           batch_size=batch_size, validation_data=(X_test, y_test[:, 1]))

        # sigmas for predicted data, actually loss function values (RMSE)
        loss_systole = hist_systole.history['loss'][-1]
        loss_diastole = hist_diastole.history['loss'][-1]
        val_loss_systole = hist_systole.history['val_loss'][-1]
        val_loss_diastole = hist_diastole.history['val_loss'][-1]

        if calc_crps > 0 and i % calc_crps == 0:
            logging.info('Evaluating CRPS...')
            pred_systole = model_systole.predict(X_train, batch_size=batch_size, verbose=1)
            pred_diastole = model_diastole.predict(X_train, batch_size=batch_size, verbose=1)
            val_pred_systole = model_systole.predict(X_test, batch_size=batch_size, verbose=1)
            val_pred_diastole = model_diastole.predict(X_test, batch_size=batch_size, verbose=1)

            # CDF for train and test data (actually a step function)
            cdf_train = real_to_cdf(np.concatenate((y_train[:, 0], y_train[:, 1])))
            cdf_test = real_to_cdf(np.concatenate((y_test[:, 0], y_test[:, 1])))

            # CDF for predicted data
            cdf_pred_systole = real_to_cdf(pred_systole, loss_systole)
            cdf_pred_diastole = real_to_cdf(pred_diastole, loss_diastole)
            cdf_val_pred_systole = real_to_cdf(val_pred_systole, val_loss_systole)
            cdf_val_pred_diastole = real_to_cdf(val_pred_diastole, val_loss_diastole)

            # evaluate CRPS on training data
            crps_train = crps(cdf_train, np.concatenate((cdf_pred_systole, cdf_pred_diastole)))
            logging.info('CRPS(train) = {0}'.format(crps_train))

            # evaluate CRPS on test data
            crps_test = crps(cdf_test, np.concatenate((cdf_val_pred_systole, cdf_val_pred_diastole)))
            logging.info('CRPS(test) = {0}'.format(crps_test))

        logging.info('Saving weights...')
        # save weights so they can be loaded later
        model_systole.save_weights('../models/weights/weights_systole.hdf5', overwrite=True)
        model_diastole.save_weights('../models/weights/weights_diastole.hdf5', overwrite=True)

        # for best (lowest) val losses, save weights
        if val_loss_systole < min_val_loss_systole:
            min_val_loss_systole = val_loss_systole
            model_systole.save_weights('../models/weights/weights_systole_best.hdf5', overwrite=True)

        if val_loss_diastole < min_val_loss_diastole:
            min_val_loss_diastole = val_loss_diastole
            model_diastole.save_weights('../models/weights/weights_diastole_best.hdf5', overwrite=True)

        # save best (lowest) val losses in file (to be later used for generating submission)
        with open('./logs/val_loss.txt', mode='w+') as f:
            f.write(str(min_val_loss_systole))
            f.write('\n')
            f.write(str(min_val_loss_diastole))
Example #5
0
if os.path.isfile('/data/run2/weights_systole_best.hdf5'):
    print('loading weights')
    model_systole.load_weights('/data/run2/weights_systole_best.hdf5')

if os.path.isfile('/data/run2/weights_diastole_best.hdf5'):
    model_diastole.load_weights('/data/run2/weights_diastole_best.hdf5')

print('Loading training data...')
X, y, metadata = load_train_data()

print(metadata[0, :].shape, metadata[0].shape, metadata[0:1, :].shape)
pred_systole = model_systole.predict({'input1':X[40:50, :, : , :], 'input2':metadata[40:50, :], 'output':y[40:50, 0]})['output']
pred_diastole = model_diastole.predict({'input1':X[40:50,:,:,:], 'input2':metadata[40:50, :], 'output':y[40:50, 1]})['output']

# CDF for train and test data (actually a step function)
cdf_train = real_to_cdf(np.concatenate((y[40:50, 0], y[40:50, 1])))

# CDF for predicted data
cdf_pred_systole = real_to_cdf(pred_systole, 27.7407048805)
cdf_pred_diastole = real_to_cdf(pred_diastole, 38.5512729527)


np.save('systolecdf.npy', cdf_pred_systole)
np.save('diastolecdf.npy', cdf_pred_diastole)
np.save('gt.npy', cdf_train)




# print(cdf_pred_diastole)
# # evaluate CRPS on training data
Example #6
0
def build_submission(config):
    model_systole = get_model()
    model_diastole = get_model()

    print('Loading models weights...')

    model_systole.load_weights(config.systole_weights)
    model_diastole.load_weights(config.diastole_weights)


    # load val losses to use as sigmas for CDF
    with open(config.val_loss_systole, 'r') as f:
        val_loss_systole = float(f.readline())

    with open(config.val_loss_diastole, 'r') as f:
        val_loss_diastole = float(f.readline())

    print('Loading validation data...')
    X, ids, mult = load_validation_data()

    batch_size = 32
    print('Predicting on validation data...')


    pred_normed_systole = model_systole.predict(X, batch_size=batch_size, verbose=1)
    pred_normed_diastole = model_diastole.predict(X, batch_size=batch_size, verbose=1)

    print('Normed_systole:', pred_normed_systole.shape)
    print('Normed_diastole:', pred_normed_diastole.shape)

    print('mult:', mult.shape)

    pred_systole = pred_normed_systole[:,0] * mult
    pred_diastole = pred_normed_diastole[:,0] * mult

    print('systole:', pred_systole.shape)
    print('diastole:', pred_diastole.shape)


    # real predictions to CDF
    cdf_pred_systole = real_to_cdf(pred_systole, val_loss_systole)
    cdf_pred_diastole = real_to_cdf(pred_diastole, val_loss_diastole)

    print('Accumulating results...')
    sub_systole = accumulate_study_results(ids, cdf_pred_systole)
    sub_diastole = accumulate_study_results(ids, cdf_pred_diastole)

    # write to submission file
    print('Writing submission to file...')
    fi = csv.reader(open('/data/sample_submission_validate.csv'))
    f = open(config.submission, 'w')
    fo = csv.writer(f, lineterminator='\n')
    fo.writerow(next(fi))
    for line in fi:
        idx = line[0]
        key, target = idx.split('_')
        key = int(key)
        out = [idx]
        if key in sub_systole:
            if target == 'Diastole':
                out.extend(list(sub_diastole[key][0]))
            else:
                out.extend(list(sub_systole[key][0]))
        else:
            print('Miss {0}'.format(idx))
        fo.writerow(out)
    f.close()

    print('Done.')
Example #7
0
def train():
    """
    Training systole and diastole models.
    """
    print('Loading and compiling models...')
    model_systole = get_model()
    model_diastole = get_model()

    #import best model if it exists
    if os.path.isfile('/data/run2/weights_systole_best.hdf5'):
        print('loading weights')
        model_systole.load_weights('/data/run2/weights_systole_best.hdf5')

    if os.path.isfile('/data/run2/weights_diastole_best.hdf5'):
        model_diastole.load_weights('/data/run2/weights_diastole_best.hdf5')

    print('Loading training data...')
    X, y, metadata = load_train_data()

    #print('Pre-processing images...')
    #X = preprocess(X)
    #np.save('/data/pre/pre/X_train.npy', X)

    # split to training and test
    X_train, y_train, X_test, y_test, metadata_train, metadata_test = split_data(
        X, y, metadata, split_ratio=0.2)

    nb_iter = 200
    epochs_per_iter = 1
    batch_size = 8
    calc_crps = 5  # calculate CRPS every n-th iteration (set to 0 if CRPS estimation is not needed)

    # remember min val. losses (best iterations), used as sigmas for submission
    min_val_loss_systole = sys.float_info.max
    min_val_loss_diastole = sys.float_info.max

    print('-' * 50)
    print('Training...')
    print('-' * 50)

    for i in range(0, nb_iter):
        print('-' * 50)
        print('Iteration {0}/{1}'.format(i + 1, nb_iter))
        print('-' * 50)

        # print('Augmenting images - rotations')
        # X_train_aug = rotation_augmentation(X_train, 15)
        # print('Augmenting images - shifts')
        # X_train_aug = shift_augmentation(X_train_aug, 0.1, 0.1)
        # print('Augmenting images - shifts')
        # X_train_aug = shift_augmentation(X_train, 0.1, 0.1)
        X_train_aug = X_train

        print('Fitting systole model...')
        hist_systole = model_systole.fit(
            {
                'input1': X_train_aug,
                'input2': metadata_train,
                'output': y_train[:, 0]
            },
            shuffle=True,
            nb_epoch=epochs_per_iter,
            batch_size=batch_size,
            validation_data={
                'input1': X_test,
                'input2': metadata_test,
                'output': y_test[:, 0]
            })

        print('Fitting diastole model...')
        hist_diastole = model_diastole.fit(
            {
                'input1': X_train_aug,
                'input2': metadata_train,
                'output': y_train[:, 1]
            },
            shuffle=True,
            nb_epoch=epochs_per_iter,
            batch_size=batch_size,
            validation_data={
                'input1': X_test,
                'input2': metadata_test,
                'output': y_test[:, 1]
            })

        # sigmas for predicted data, actually loss function values (RMSE)
        loss_systole = hist_systole.history['loss'][-1]
        loss_diastole = hist_diastole.history['loss'][-1]
        val_loss_systole = hist_systole.history['val_loss'][-1]
        val_loss_diastole = hist_diastole.history['val_loss'][-1]

        if calc_crps > 0 and i % calc_crps == 0:
            print('Evaluating CRPS...')
            pred_systole = model_systole.predict(
                {
                    'input1': X_train,
                    'input2': metadata_train,
                    'output': y_train[:, 0]
                },
                batch_size=batch_size,
                verbose=1)['output']
            pred_diastole = model_diastole.predict(
                {
                    'input1': X_train,
                    'input2': metadata_train,
                    'output': y_train[:, 1]
                },
                batch_size=batch_size,
                verbose=1)['output']
            val_pred_systole = model_systole.predict(
                {
                    'input1': X_test,
                    'input2': metadata_test,
                    'output': y_test[:, 0]
                },
                batch_size=batch_size,
                verbose=1)['output']
            val_pred_diastole = model_diastole.predict(
                {
                    'input1': X_test,
                    'input2': metadata_test,
                    'output': y_test[:, 1]
                },
                batch_size=batch_size,
                verbose=1)['output']

            # Get sigmas
            # sigma_systole = as_tensor_variable(root_mean_squared_error(y_train[:, 0], pred_systole))
            # sigma_diastole = as_tensor_variable(root_mean_squared_error(y_train[:, 1], pred_systole))
            # val_sigma_systole = as_tensor_variable(root_mean_squared_error(y_test[:, 0], val_pred_systole))
            # val_sigma_diastole = as_tensor_variable(root_mean_squared_error(y_test[:, 1], val_pred_diastole))

            # CDF for train and test data (actually a step function)
            cdf_train = real_to_cdf(
                np.concatenate((y_train[:, 0], y_train[:, 1])))
            cdf_test = real_to_cdf(np.concatenate((y_test[:, 0], y_test[:,
                                                                        1])))

            # CDF for predicted data
            cdf_pred_systole = real_to_cdf(pred_systole, loss_systole)
            cdf_pred_diastole = real_to_cdf(pred_diastole, loss_diastole)
            cdf_val_pred_systole = real_to_cdf(val_pred_systole,
                                               val_loss_systole)
            cdf_val_pred_diastole = real_to_cdf(val_pred_diastole,
                                                val_loss_diastole)

            # evaluate CRPS on training data
            crps_train = crps(
                cdf_train, np.concatenate(
                    (cdf_pred_systole, cdf_pred_diastole)))
            print('CRPS(train) = {0}'.format(crps_train))

            # evaluate CRPS on test data
            crps_test = crps(
                cdf_test,
                np.concatenate((cdf_val_pred_systole, cdf_val_pred_diastole)))
            print('CRPS(test) = {0}'.format(crps_test))

        # for best (lowest) val losses, save weights
        if val_loss_systole < min_val_loss_systole:
            min_val_loss_systole = val_loss_systole
            model_systole.save_weights('/data/run2/weights_systole_best.hdf5',
                                       overwrite=True)

        if val_loss_diastole < min_val_loss_diastole:
            min_val_loss_diastole = val_loss_diastole
            model_diastole.save_weights(
                '/data/run2/weights_diastole_best.hdf5', overwrite=True)

        # save best (lowest) val losses in file (to be later used for generating submission)
        with open('/data/run2/val_loss.txt', mode='w+') as f:
            f.write(str(min_val_loss_systole))
            f.write('\n')
            f.write(str(min_val_loss_diastole))

        with open("/data/run2/loss.txt", "a+") as myfile:
            myfile.write('\t'.join(
                (str(i + 1), str(loss_systole), str(loss_diastole),
                 str(val_loss_systole), str(val_loss_diastole),
                 str(crps_train), str(crps_test))))
            myfile.write('\n')
Example #8
0
def c3d_submission():
    print('Loading and compiling models...')
    model_systole = c3d_sys()
    model_diastole =c3d_dia()

    print('Loading models weights...')
    #model_systole.load_weights('vgg16_weights_112/weights_systole_best.hdf5')
    #model_diastole.load_weights('vgg16_weights_112/weights_diastole_best.hdf5')
    set_weights(model_systole, 'sys_best_c3d.pkl')
    set_weights(model_diastole, 'dia_best_c3d.pkl')

    # load val losses to use as sigmas for CDF
    with open('val_loss.txt', mode='r') as f:
        val_loss_systole = float(f.readline())
        val_loss_diastole = float(f.readline())

    print('Loading validation data...')
    X, ids = load_validation_data()

    print('Pre-processing images...')
    #X = preprocess(X)

    batch_size = 32
    x_len = X.shape[0]
    print('Predicting on validation data...')
    #pred_systole1 = model_systole.predict(X[:x_len,np.newaxis,:,:,:])
    #pred_systole2 = model_systole.predict(X[x_len:,np.newaxis,:,:,:])

    #pred_diastole1 = model_diastole.predict(X[:x_len, np.newaxis,:,:,:])
    #pred_diastole2 = model_diastole.predict(X[x_len:, np.newaxis,:,:,:])

    #pred_systole = (pred_systole1 + pred_systole2) * 0.5
    #pred_diastole = (pred_diastole1 + pred_diastole2) *0.5
    pred_systole = model_systole.predict(X[:, np.newaxis, :, :,:])
    pred_diastole = model_diastole.predict(X[:,np.newaxis, :,:,:])

    pred_systole = (pred_systole[:len(pred_systole)/2] + pred_systole[len(pred_systole)/2:] ) * 0.5

    pred_diastole =(pred_diastole[:len(pred_diastole)/2] + pred_diastole[len(pred_diastole)/2:] ) * 0.5
    
    # real predictions to CDF
    cdf_pred_systole = real_to_cdf(pred_systole, val_loss_systole)
    cdf_pred_diastole = real_to_cdf(pred_diastole, val_loss_diastole)

    print('Accumulating results...')
    sub_systole = accumulate_study_results(ids, cdf_pred_systole)
    sub_diastole = accumulate_study_results(ids, cdf_pred_diastole)

    # write to submission file
    print('Writing submission to file...')
    fi = csv.reader(open('data/sample_submission_validate.csv'))
    f = open('c3d_channel_15.csv', "w+")
    fo = csv.writer(f, lineterminator='\n')
    fo.writerow(fi.next())
    for line in fi:
        idx = line[0]
        key, target = idx.split('_')
        key = int(key)
        out = [idx]
        if key in sub_systole:
            if target == 'Diastole':
                out.extend(list(sub_diastole[key][0]))
            else:
                out.extend(list(sub_systole[key][0]))
        else:
            print('Miss {0}'.format(idx))
        fo.writerow(out)
    f.close()

    print('Done.')
Example #9
0
def train(data_prefix, prefix, seed, run):
    """
    Training systole and diastole models.
    """
    print('Loading training data...')
    X, y = load_train_data(data_prefix, seed)


    print('Loading and compiling models...')
    model_systole = get_model()
    model_diastole = get_model()

    # split to training and test
    X_train, y_train, X_test, y_test = split_data(X, y, split_ratio=0.2)

    nb_iter = 200
    epochs_per_iter = 1
    batch_size = 32
    calc_crps = 1  # calculate CRPS every n-th iteration (set to 0 if CRPS estimation is not needed)

    # remember min val. losses (best iterations), used as sigmas for submission
    min_val_loss_systole = sys.float_info.max
    min_val_loss_diastole = sys.float_info.max

    print('-'*50)
    print('Training...')
    print('-'*50)

    datagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range=15,  # randomly rotate images in the range (degrees, 0 to 180)
        width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=True,  # randomly flip images
        vertical_flip=True)  # randomly flip images

    # compute quantities required for featurewise normalization
    # (std, mean, and principal components if ZCA whitening is applied)

    datagen.fit(X_train)

    systole_checkpointer_best = ModelCheckpoint(filepath=prefix + "weights_systole_best.hdf5", verbose=1, save_best_only=True)
    diastole_checkpointer_best = ModelCheckpoint(filepath=prefix + "weights_diastole_best.hdf5", verbose=1, save_best_only=True)
    systole_checkpointer = ModelCheckpoint(filepath=prefix + "weights_systole.hdf5", verbose=1, save_best_only=False)
    diastole_checkpointer = ModelCheckpoint(filepath=prefix + "weights_diastole.hdf5", verbose=1, save_best_only=False)


    if run == 0 or run == 1:
        print('Fitting Systole Shapes')
        hist_systole = model_systole.fit_generator(datagen.flow(X_train, y_train[:, 2], batch_size=batch_size),
                                                   samples_per_epoch=X_train.shape[0],
                                                   nb_epoch=nb_iter, show_accuracy=False,
                                                   validation_data=(X_test, y_test[:, 2]),
                                                   callbacks=[systole_checkpointer, systole_checkpointer_best],
                                                   nb_worker=4)

    if run == 0 or run == 2:
        print('Fitting Diastole Shapes')
        hist_diastole = model_diastole.fit_generator(datagen.flow(X_train, y_train[:, 2], batch_size=batch_size),
                                                     samples_per_epoch=X_train.shape[0],
                                                     nb_epoch=nb_iter, show_accuracy=False,
                                                     validation_data=(X_test, y_test[:, 2]),
                                                     callbacks=[diastole_checkpointer, diastole_checkpointer_best],
                                                     nb_worker=4)

    if run == 0 or run == 1:
        loss_systole = hist_systole.history['loss'][-1]
        val_loss_systole = hist_systole.history['val_loss'][-1]

    if run == 0 or run == 2:
        loss_diastole = hist_diastole.history['loss'][-1]
        val_loss_diastole = hist_diastole.history['val_loss'][-1]

    if calc_crps > 0 and run == 0:
        print('Evaluating CRPS...')
        pred_systole = model_systole.predict(X_train, batch_size=batch_size, verbose=1)
        val_pred_systole = model_systole.predict(X_test, batch_size=batch_size, verbose=1)

        pred_diastole = model_diastole.predict(X_train, batch_size=batch_size, verbose=1)
        val_pred_diastole = model_diastole.predict(X_test, batch_size=batch_size, verbose=1)

        # CDF for train and test data (actually a step function)
        cdf_train = real_to_cdf(np.concatenate((y_train[:, 0], y_train[:, 1])))
        cdf_test = real_to_cdf(np.concatenate((y_test[:, 0], y_test[:, 1])))

        # CDF for predicted data
        cdf_pred_systole = real_to_cdf(pred_systole, loss_systole)
        cdf_val_pred_systole = real_to_cdf(val_pred_systole, val_loss_systole)

        cdf_pred_diastole = real_to_cdf(pred_diastole, loss_diastole)
        cdf_val_pred_diastole = real_to_cdf(val_pred_diastole, val_loss_diastole)

        # evaluate CRPS on training data
        crps_train = crps(cdf_train, np.concatenate((cdf_pred_systole, cdf_pred_diastole)))
        print('CRPS(train) = {0}'.format(crps_train))

        # evaluate CRPS on test data
        crps_test = crps(cdf_test, np.concatenate((cdf_val_pred_systole, cdf_val_pred_diastole)))
        print('CRPS(test) = {0}'.format(crps_test))

    # save best (lowest) val losses in file (to be later used for generating submission)
    with open(prefix + 'val_loss.txt', mode='w+') as f:
        if run == 0 or run == 1:
            f.write(str(min(hist_systole.history['val_loss'])))
            f.write('\n')
        if run == 0 or run == 2:
            f.write(str(min(hist_diastole.history['val_loss'])))
def train():
    print('Loading and compiling models...')
    model_systole = get_model()
    model_diastole = get_model()

    print('Loading training data...')
    X, y = load_train_data()

    print('Pre-processing images...')
    X = preprocess(X)

    X_train, y_train, X_test, y_test = split_data(X, y, split_ratio=0.2)

    nb_iter = 200
    epochs_per_iter = 1
    batch_size = 32
    calc_crps = 1  
    min_val_loss_systole = sys.float_info.max
    min_val_loss_diastole = sys.float_info.max

    print('-'*50)
    print('Training...')
    print('-'*50)

    for i in range(nb_iter):
        print('-'*50)
        print('Iteration {0}/{1}'.format(i + 1, nb_iter))
        print('-'*50)

        print('Augmenting images - rotations')
        X_train_aug = rotation_augmentation(X_train, 15)
        print('Augmenting images - shifts')
        X_train_aug = shift_augmentation(X_train_aug, 0.1, 0.1)

        print('Fitting systole model...')
        hist_systole = model_systole.fit(X_train_aug, y_train[:, 0], shuffle=True, nb_epoch=epochs_per_iter,
                                         batch_size=batch_size, validation_data=(X_test, y_test[:, 0]))

        print('Fitting diastole model...')
        hist_diastole = model_diastole.fit(X_train_aug, y_train[:, 1], shuffle=True, nb_epoch=epochs_per_iter,
                                           batch_size=batch_size, validation_data=(X_test, y_test[:, 1]))

    
        loss_systole = hist_systole.history['loss'][-1]
        loss_diastole = hist_diastole.history['loss'][-1]
        val_loss_systole = hist_systole.history['val_loss'][-1]
        val_loss_diastole = hist_diastole.history['val_loss'][-1]

        if calc_crps > 0 and i % calc_crps == 0:
            print('Evaluating CRPS...')
            pred_systole = model_systole.predict(X_train, batch_size=batch_size, verbose=1)
            pred_diastole = model_diastole.predict(X_train, batch_size=batch_size, verbose=1)
            val_pred_systole = model_systole.predict(X_test, batch_size=batch_size, verbose=1)
            val_pred_diastole = model_diastole.predict(X_test, batch_size=batch_size, verbose=1)

            cdf_train = real_to_cdf(np.concatenate((y_train[:, 0], y_train[:, 1])))
            cdf_test = real_to_cdf(np.concatenate((y_test[:, 0], y_test[:, 1])))

            cdf_pred_systole = real_to_cdf(pred_systole, loss_systole)
            cdf_pred_diastole = real_to_cdf(pred_diastole, loss_diastole)
            cdf_val_pred_systole = real_to_cdf(val_pred_systole, val_loss_systole)
            cdf_val_pred_diastole = real_to_cdf(val_pred_diastole, val_loss_diastole)

            crps_train = crps(cdf_train, np.concatenate((cdf_pred_systole, cdf_pred_diastole)))
            print('CRPS(train) = {0}'.format(crps_train))

            crps_test = crps(cdf_test, np.concatenate((cdf_val_pred_systole, cdf_val_pred_diastole)))
            print('CRPS(test) = {0}'.format(crps_test))

        print('Saving weights...')
        model_systole.save_weights('weights_systole.hdf5', overwrite=True)
        model_diastole.save_weights('weights_diastole.hdf5', overwrite=True)

        if val_loss_systole < min_val_loss_systole:
            min_val_loss_systole = val_loss_systole
            model_systole.save_weights('weights_systole_best.hdf5', overwrite=True)

        if val_loss_diastole < min_val_loss_diastole:
            min_val_loss_diastole = val_loss_diastole
            model_diastole.save_weights('weights_diastole_best.hdf5', overwrite=True)

    
        with open('val_loss.txt', mode='w+') as f:
            f.write(str(min_val_loss_systole))
            f.write('\n')
            f.write(str(min_val_loss_diastole))
Example #11
0
def train():
    """
    Training systole and diastole models.
    """
    print('Loading and compiling models...')
    model_systole = get_model()
    model_diastole = get_model()

    #import best model if it exists
    if os.path.isfile('/data/run2/weights_systole_best.hdf5'):
        print('loading weights')
        model_systole.load_weights('/data/run2/weights_systole_best.hdf5')

    if os.path.isfile('/data/run2/weights_diastole_best.hdf5'):
        model_diastole.load_weights('/data/run2/weights_diastole_best.hdf5')

    print('Loading training data...')
    X, y, metadata = load_train_data()

    #print('Pre-processing images...')
    #X = preprocess(X)
    #np.save('/data/pre/pre/X_train.npy', X)


    # split to training and test
    X_train, y_train, X_test, y_test, metadata_train, metadata_test = split_data(X, y, metadata, split_ratio=0.2)

    nb_iter = 200
    epochs_per_iter = 1
    batch_size = 8
    calc_crps = 5  # calculate CRPS every n-th iteration (set to 0 if CRPS estimation is not needed)

    # remember min val. losses (best iterations), used as sigmas for submission
    min_val_loss_systole = sys.float_info.max
    min_val_loss_diastole = sys.float_info.max

    print('-'*50)
    print('Training...')
    print('-'*50)

    for i in range(0,nb_iter):
        print('-'*50)
        print('Iteration {0}/{1}'.format(i + 1, nb_iter))
        print('-'*50)

        # print('Augmenting images - rotations')
        # X_train_aug = rotation_augmentation(X_train, 15)
        # print('Augmenting images - shifts')
        # X_train_aug = shift_augmentation(X_train_aug, 0.1, 0.1)
        # print('Augmenting images - shifts')
        # X_train_aug = shift_augmentation(X_train, 0.1, 0.1)
        X_train_aug = X_train

        print('Fitting systole model...')
        hist_systole = model_systole.fit({'input1':X_train_aug, 'input2':metadata_train, 'output':y_train[:, 0]}, shuffle=True, nb_epoch=epochs_per_iter,
                                         batch_size=batch_size, validation_data={'input1':X_test,'input2':metadata_test, 'output':y_test[:, 0]})

        print('Fitting diastole model...')
        hist_diastole = model_diastole.fit({'input1':X_train_aug, 'input2':metadata_train, 'output':y_train[:, 1]}, shuffle=True, nb_epoch=epochs_per_iter,
                                           batch_size=batch_size, validation_data={'input1':X_test, 'input2':metadata_test, 'output':y_test[:, 1]})

        # sigmas for predicted data, actually loss function values (RMSE)
        loss_systole = hist_systole.history['loss'][-1]
        loss_diastole = hist_diastole.history['loss'][-1]
        val_loss_systole = hist_systole.history['val_loss'][-1]
        val_loss_diastole = hist_diastole.history['val_loss'][-1]

        if calc_crps > 0 and i % calc_crps == 0:
            print('Evaluating CRPS...')
            pred_systole = model_systole.predict({'input1':X_train, 'input2':metadata_train, 'output':y_train[:, 0]}, batch_size=batch_size, verbose=1)['output']
            pred_diastole = model_diastole.predict({'input1':X_train, 'input2':metadata_train, 'output':y_train[:, 1]}, batch_size=batch_size, verbose=1)['output']
            val_pred_systole = model_systole.predict({'input1':X_test, 'input2':metadata_test, 'output':y_test[:, 0]}, batch_size=batch_size, verbose=1)['output']
            val_pred_diastole = model_diastole.predict({'input1':X_test, 'input2':metadata_test, 'output':y_test[:, 1]}, batch_size=batch_size, verbose=1)['output']

            # Get sigmas
            # sigma_systole = as_tensor_variable(root_mean_squared_error(y_train[:, 0], pred_systole))
            # sigma_diastole = as_tensor_variable(root_mean_squared_error(y_train[:, 1], pred_systole))
            # val_sigma_systole = as_tensor_variable(root_mean_squared_error(y_test[:, 0], val_pred_systole))
            # val_sigma_diastole = as_tensor_variable(root_mean_squared_error(y_test[:, 1], val_pred_diastole))

            # CDF for train and test data (actually a step function)
            cdf_train = real_to_cdf(np.concatenate((y_train[:, 0], y_train[:, 1])))
            cdf_test = real_to_cdf(np.concatenate((y_test[:, 0], y_test[:, 1])))

            # CDF for predicted data
            cdf_pred_systole = real_to_cdf(pred_systole, loss_systole)
            cdf_pred_diastole = real_to_cdf(pred_diastole, loss_diastole)
            cdf_val_pred_systole = real_to_cdf(val_pred_systole, val_loss_systole)
            cdf_val_pred_diastole = real_to_cdf(val_pred_diastole, val_loss_diastole)

            # evaluate CRPS on training data
            crps_train = crps(cdf_train, np.concatenate((cdf_pred_systole, cdf_pred_diastole)))
            print('CRPS(train) = {0}'.format(crps_train))

            # evaluate CRPS on test data
            crps_test = crps(cdf_test, np.concatenate((cdf_val_pred_systole, cdf_val_pred_diastole)))
            print('CRPS(test) = {0}'.format(crps_test))

        # for best (lowest) val losses, save weights
        if val_loss_systole < min_val_loss_systole:
            min_val_loss_systole = val_loss_systole
            model_systole.save_weights('/data/run2/weights_systole_best.hdf5', overwrite=True)

        if val_loss_diastole < min_val_loss_diastole:
            min_val_loss_diastole = val_loss_diastole
            model_diastole.save_weights('/data/run2/weights_diastole_best.hdf5', overwrite=True)

        # save best (lowest) val losses in file (to be later used for generating submission)
        with open('/data/run2/val_loss.txt', mode='w+') as f:
            f.write(str(min_val_loss_systole))
            f.write('\n')
            f.write(str(min_val_loss_diastole))

        with open("/data/run2/loss.txt", "a+") as myfile:
            myfile.write('\t'.join((str(i+1), str(loss_systole),str(loss_diastole),str(val_loss_systole),str(val_loss_diastole), str(crps_train), str(crps_test))))
            myfile.write('\n')
Example #12
0
def train():
    """
    Training systole and diastole models.
    """
    print('Loading and compiling models...')
    model_systole = VGG_16_112('vgg16_weights_112/weights_systole_best.hdf5')
    model_diastole =VGG_16_112('vgg16_weights_112/weights_diastole_best.hdf5')
    #model_systole = VGG_16_112()
    #model_diastole = VGG_16_112()
    print('Loading training data...')
    X, y = load_train_data()

    # print('Pre-processing images...')
    # X = preprocess(X)

    # split to training and test
    X_train_aug, y_train, X_test, y_test = split_data(X, y, split_ratio=0.2)
    #X_train_aug = X_train
    nb_epoch = 100
    epochs_per_iter = 1
    batch_size = 32


    calc_crps = 1  # calculate CRPS every n-th iteration (set to 0 if CRPS estimation is not needed)

    # remember min val. losses (best iterations), used as sigmas for submission
    min_val_loss_systole = sys.float_info.max
    min_val_loss_diastole = sys.float_info.max

    datagen = ImageDataGenerator(
        featurewise_center=True,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=True,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range=90,  # randomly rotate images in the range (degrees, 0 to 180)
        width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=True,  # randomly flip images
        vertical_flip=False)  # randomly flip images

    datagen.fit(X_train_aug)

    for i in range(nb_epoch):
        print('-'*40)
        print('Epoch', i)
        print('-'*40)
        print("Training systole...")
        # batch train with realtime data augmentation
        loss_systole = val_loss_systole = 0 
        progbar = generic_utils.Progbar(X_train_aug.shape[0])
        for X_batch, Y_batch in datagen.flow(X_train_aug, y_train[:,0]):
            loss_systole = model_systole.train_on_batch(X_batch, Y_batch)
            loss_systole = loss_systole[-1]
            progbar.add(X_batch.shape[0], values=[("train loss", loss_systole)])

        print("Testing systole...")
        # test time!
        progbar = generic_utils.Progbar(X_test.shape[0])
        for X_batch, Y_batch in datagen.flow(X_test, y_test[:,0]):
            val_loss_systole = model_systole.test_on_batch(X_batch, Y_batch)
            val_loss_systole = val_loss_systole[-1]
            progbar.add(X_batch.shape[0], values=[("test loss", val_loss_systole)])

        print("Training diastole...")
        loss_diastole = val_loss_diastole = 0 
        progbar = generic_utils.Progbar(X_train_aug.shape[0])
        for X_batch, Y_batch in datagen.flow(X_train_aug, y_train[:,1]):
            loss_diastole = model_diastole.train_on_batch(X_batch, Y_batch)
            loss_diastole = loss_diastole[-1]
            progbar.add(X_batch.shape[0], values=[("train loss", loss_diastole)])

        print("Testing diastole...")
        # test time!
        progbar = generic_utils.Progbar(X_test.shape[0])
        for X_batch, Y_batch in datagen.flow(X_test, y_test[:,1]):
            val_loss_diastole = model_diastole.test_on_batch(X_batch, Y_batch)
            val_loss_diastole = val_loss_diastole[-1]
            progbar.add(X_batch.shape[0], values=[("test loss", val_loss_diastole)])
            
        # print('Fitting systole model...')
        # hist_systole = model_systole.fit(X_train_aug, y_train[:, 0], shuffle=True, nb_epoch=epochs_per_iter,
        #                                  batch_size=batch_size), validation_data=(X_test, y_test[:, 0]))

        # print('Fitting diastole model...')
        # hist_diastole = model_diastole.fit(X_train_aug, y_train[:, 1], shuffle=True, nb_epoch=epochs_per_iter,
        #                                    batch_size=batch_size, validation_data=(X_test, y_test[:, 1]))

        # sigmas for predicted data, actually loss function values (RMSE)
        # loss_systole = hist_systole.history['loss'][-1]
        # loss_diastole = hist_diastole.history['loss'][-1]
        # val_loss_systole = hist_systole.history['val_loss'][-1]
        # val_loss_diastole = hist_diastole.history['val_loss'][-1]

        if calc_crps > 0 and i % calc_crps == 0:
            print('Evaluating CRPS...')
            pred_systole = model_systole.predict(X_train_aug, batch_size=batch_size, verbose=1)
            pred_diastole = model_diastole.predict(X_train_aug, batch_size=batch_size, verbose=1)
            val_pred_systole = model_systole.predict(X_test, batch_size=batch_size, verbose=1)
            val_pred_diastole = model_diastole.predict(X_test, batch_size=batch_size, verbose=1)

            # CDF for train and test data (actually a step function)
            cdf_train = real_to_cdf(np.concatenate((y_train[:, 0], y_train[:, 1])))
            cdf_test = real_to_cdf(np.concatenate((y_test[:, 0], y_test[:, 1])))

            # CDF for predicted data
            cdf_pred_systole = real_to_cdf(pred_systole, loss_systole)
            cdf_pred_diastole = real_to_cdf(pred_diastole, loss_diastole)
            cdf_val_pred_systole = real_to_cdf(val_pred_systole, val_loss_systole)
            cdf_val_pred_diastole = real_to_cdf(val_pred_diastole, val_loss_diastole)

            # evaluate CRPS on training data
            crps_train = crps(cdf_train, np.concatenate((cdf_pred_systole, cdf_pred_diastole)))
            print('CRPS(train) = {0}'.format(crps_train))

            # evaluate CRPS on test data
            crps_test = crps(cdf_test, np.concatenate((cdf_val_pred_systole, cdf_val_pred_diastole)))
            print('CRPS(test) = {0}'.format(crps_test))

        print('Saving weights...')
        # save weights so they can be loaded later
        model_systole.save_weights('vgg16_weights_112/weights_systole.hdf5', overwrite=True)
        model_diastole.save_weights('vgg16_weights_112/weights_diastole.hdf5', overwrite=True)

        # for best (lowest) val losses, save weights
        if val_loss_systole < min_val_loss_systole:
            min_val_loss_systole = val_loss_systole
            model_systole.save_weights('vgg16_weights_112/weights_systole_best.hdf5', overwrite=True)

        if val_loss_diastole < min_val_loss_diastole:
            min_val_loss_diastole = val_loss_diastole
            model_diastole.save_weights('vgg16_weights_112/weights_diastole_best.hdf5', overwrite=True)

        # save best (lowest) val losses in file (to be later used for generating submission)
        with open('val_loss.txt', mode='w+') as f:
            f.write(str(min_val_loss_systole))
            f.write('\n')
            f.write(str(min_val_loss_diastole))
Example #13
0
def train():
    """
    Training systole and diastole models.
    """
    print('Loading and compiling models...')
    model_systole = get_model(img_size)
    model_diastole = get_model(img_size)

    print('Loading training data...')
    X, y = load_train_data()

    print('Pre-processing images...')
    X = preprocess(X)

    # split to training and test
    X_train, y_train, X_test, y_test = split_data(X, y, split_ratio=0.2)

    # define image generator for random rotations
    datagen = ImageDataGenerator(featurewise_center=False,
                                 featurewise_std_normalization=False,
                                 rotation_range=15)

    nb_iter = 300
    epochs_per_iter = 1
    batch_size = 64
    calc_crps = 1  # calculate CRPS every n-th iteration (set to 0 if CRPS estimation is not needed)

    # remember min val. losses (best iterations), used as sigmas for submission
    min_val_loss_systole = sys.float_info.max
    min_val_loss_diastole = sys.float_info.max

    if not os.path.exists(STATS):
        os.makedirs(STATS)

    with open(STATS + 'RMSE_CRPS.txt', 'w') as f:
        names = ['train_RMSE_d', 'train_RMSE_s', 'test_RMSE_d', 'test_RMSE_s', 'train_crps', 'test_crps']
        f.write('\t'.join([str(name) for name in names]) + '\n')

    print('-'*50)
    print('Training...')
    print('-'*50)

    for i in range(nb_iter):
        print('-'*50)
        print('Iteration {0}/{1}'.format(i + 1, nb_iter))
        print('-'*50)

        print('Augmenting images - rotations')
        X_train_aug = rotation_augmentation(X_train, 15)
        print('Augmenting images - shifts')
        X_train_aug = shift_augmentation(X_train_aug, 0.1, 0.1)
        print('Fitting systole model...')
        hist_systole = model_systole.fit(X_train_aug, y_train[:, 0], shuffle=True, nb_epoch=epochs_per_iter, batch_size=batch_size, validation_data=(X_test, y_test[:, 0]))
        print('Fitting diastole model...')

        hist_diastole = model_diastole.fit(X_train_aug, y_train[:, 1], shuffle=True, nb_epoch=epochs_per_iter, batch_size=batch_size, validation_data=(X_test, y_test[:, 1]))

        # sigmas for predicted data, actually loss function values (RMSE)
        loss_systole = hist_systole.history['loss'][-1]
        loss_diastole = hist_diastole.history['loss'][-1]
        val_loss_systole = hist_systole.history['val_loss'][-1]
        val_loss_diastole = hist_diastole.history['val_loss'][-1]

        if calc_crps > 0 and i % calc_crps == 0:
            print('Evaluating CRPS...')
            pred_systole = model_systole.predict(X_train, batch_size=batch_size, verbose=1)
            pred_diastole = model_diastole.predict(X_train, batch_size=batch_size, verbose=1)
            val_pred_systole = model_systole.predict(X_test, batch_size=batch_size, verbose=1)
            val_pred_diastole = model_diastole.predict(X_test, batch_size=batch_size, verbose=1)

            # CDF for train and test data (actually a step function)
            cdf_train = real_to_cdf(np.concatenate((y_train[:, 0], y_train[:, 1])))
            cdf_test = real_to_cdf(np.concatenate((y_test[:, 0], y_test[:, 1])))

            # CDF for predicted data
            cdf_pred_systole = real_to_cdf(pred_systole, loss_systole)
            cdf_pred_diastole = real_to_cdf(pred_diastole, loss_diastole)
            cdf_val_pred_systole = real_to_cdf(val_pred_systole, val_loss_systole)
            cdf_val_pred_diastole = real_to_cdf(val_pred_diastole, val_loss_diastole)

            # evaluate CRPS on training data
            crps_train = crps(cdf_train, np.concatenate((cdf_pred_systole, cdf_pred_diastole)))
            print('CRPS(train) = {0}'.format(crps_train))

            # evaluate CRPS on test data
            crps_test = crps(cdf_test, np.concatenate((cdf_val_pred_systole, cdf_val_pred_diastole)))
            print('CRPS(test) = {0}'.format(crps_test))

        print('Saving weights...')


        # save weights so they can be loaded later
        model_systole.save_weights(MODELS + 'weights_systole.hdf5', overwrite=True)
        model_diastole.save_weights(MODELS + 'weights_diastole.hdf5', overwrite=True)

        # for best (lowest) val losses, save weights
        if val_loss_systole < min_val_loss_systole:
            min_val_loss_systole = val_loss_systole
            model_systole.save_weights(MODELS + 'weights_systole_best.hdf5', overwrite=True)

        if val_loss_diastole < min_val_loss_diastole:
            min_val_loss_diastole = val_loss_diastole
            model_diastole.save_weights(MODELS + 'weights_diastole_best.hdf5', overwrite=True)

        # save best (lowest) val losses in file (to be later used for generating submission)
        with open(MODELS + 'val_loss.txt', mode='w+') as f:
            f.write(str(min_val_loss_systole))
            f.write('\n')
            f.write(str(min_val_loss_diastole))

        with open(STATS + 'RMSE_CRPS.txt', 'a') as f:
            # train_RMSE_d train_RMSE_s test_RMSE_d test_RMSE_s train_crps test_crps
            rmse_values = [loss_diastole, loss_systole, val_loss_diastole, val_loss_systole]
            crps_values = [crps_train, crps_test]
            f.write('\t'.join([str(val) for val in rmse_values + crps_values]) + '\n')

        print('Saving stats images...')
        write_images(STATS)
        
        if (i != 0) & ((i + 1) % 100 == 0):
	    print('Submitting learned model....')
            SUBMISSION_FOLDER = SUBMISSION + preproc_type + "/" + model_name + "/" + get_name() + "_ITERS" + str(i + 1) + "/" 
            if not os.path.exists(SUBMISSION_FOLDER):
                os.makedirs(SUBMISSION_FOLDER)
            copyfile(MODELS + 'weights_systole_best.hdf5', SUBMISSION_FOLDER + 'weights_systole_best.hdf5')
            copyfile(MODELS + 'weights_diastole_best.hdf5', SUBMISSION_FOLDER + 'weights_diastole_best.hdf5')
            copyfile(MODELS + 'val_loss.txt', SUBMISSION_FOLDER + 'val_loss.txt')
            os.system('python submission.py %s %s %s' % (preproc_type, model_name, SUBMISSION_FOLDER))
Example #14
0
import pickle
import pandas as pd
from train import extract_features
from utils import real_to_cdf

if __name__ == '__main__':
    metadata = pd.read_csv('data/metadata_validate.csv')
    features = extract_features(metadata).set_index('Id').sort_index()

    diastole_model = pickle.load(open('diastole.pkl'))
    systole_model = pickle.load(open('systole.pkl'))

    diastole = diastole_model.predict(features)
    systole = systole_model.predict(features)

    systole_cdf = real_to_cdf(systole, sigma=1e-10)
    diastole_cdf = real_to_cdf(diastole, sigma=1e-10)

    submission = pd.DataFrame(columns=['Id'] + ['P%d' % i for i in range(600)])
    i = 0

    for id in range(features.shape[0]):
        diastole_id = '%d_Diastole' % features.index[id]
        systole_id = '%d_Systole' % features.index[id]
        submission.loc[i, :] = [diastole_id] + diastole_cdf[id, :].tolist()
        submission.loc[i+1, :] = [systole_id] + systole_cdf[id, :].tolist()
        i += 2

    submission.to_csv('submission.csv', index=False)
Example #15
0
def build_submission(config):
    model_systole = get_model()
    model_diastole = get_model()

    print('Loading models weights...')

    model_systole.load_weights(config.systole_weights)
    model_diastole.load_weights(config.diastole_weights)

    # load val losses to use as sigmas for CDF
    with open(config.val_loss_systole, 'r') as f:
        val_loss_systole = float(f.readline())

    with open(config.val_loss_diastole, 'r') as f:
        val_loss_diastole = float(f.readline())

    print('Loading validation data...')
    X, ids, mult = load_validation_data()

    batch_size = 32
    print('Predicting on validation data...')

    pred_normed_systole = model_systole.predict(X,
                                                batch_size=batch_size,
                                                verbose=1)
    pred_normed_diastole = model_diastole.predict(X,
                                                  batch_size=batch_size,
                                                  verbose=1)

    print('Normed_systole:', pred_normed_systole.shape)
    print('Normed_diastole:', pred_normed_diastole.shape)

    print('mult:', mult.shape)

    pred_systole = pred_normed_systole[:, 0] * mult
    pred_diastole = pred_normed_diastole[:, 0] * mult

    print('systole:', pred_systole.shape)
    print('diastole:', pred_diastole.shape)

    # real predictions to CDF
    cdf_pred_systole = real_to_cdf(pred_systole, val_loss_systole)
    cdf_pred_diastole = real_to_cdf(pred_diastole, val_loss_diastole)

    print('Accumulating results...')
    sub_systole = accumulate_study_results(ids, cdf_pred_systole)
    sub_diastole = accumulate_study_results(ids, cdf_pred_diastole)

    # write to submission file
    print('Writing submission to file...')
    fi = csv.reader(open('/data/sample_submission_validate.csv'))
    f = open(config.submission, 'w')
    fo = csv.writer(f, lineterminator='\n')
    fo.writerow(next(fi))
    for line in fi:
        idx = line[0]
        key, target = idx.split('_')
        key = int(key)
        out = [idx]
        if key in sub_systole:
            if target == 'Diastole':
                out.extend(list(sub_diastole[key][0]))
            else:
                out.extend(list(sub_systole[key][0]))
        else:
            print('Miss {0}'.format(idx))
        fo.writerow(out)
    f.close()

    print('Done.')
Example #16
0
def train():
    """
    Training systole and diastole models.
    """
    print('Loading and compiling models...')
    model_systole = get_model()
    model_diastole = get_model()

    print('Loading training data...')
    X, y = load_train_data()

    print('Pre-processing images...')
    X = preprocess(X)

    # split to training and test
    X_train, y_train, X_test, y_test = split_data(X, y, split_ratio=0.2)

    #Iteraties was 200
    nb_iter = 250

    epochs_per_iter = 1

    ## Batch-size was 32, ivm processing op laptop heb ik hiervan 12 gemaakt voor test #3
    batch_size = 12
    calc_crps = 1  # calculate CRPS every n-th iteration (set to 0 if CRPS estimation is not needed)

    # remember min val. losses (best iterations), used as sigmas for submission
    min_val_loss_systole = sys.float_info.max
    min_val_loss_diastole = sys.float_info.max

    print('-' * 50)
    print('Training...')
    start = datetime.now()

    print('-' * 50)

    for i in range(nb_iter):
        print('-' * 50)
        print('Iteration {0}/{1}'.format(i + 1, nb_iter))
        print('-' * 50)

        print('Augmenting images - rotations')
        X_train_aug = rotation_augmentation(X_train, 15)
        print('Augmenting images - shifts')
        X_train_aug = shift_augmentation(X_train_aug, 0.1, 0.1)

        csv_logger_diastole = CSVLogger('training_diastole.log',
                                        append=True,
                                        separator=';')
        csv_logger_systole = CSVLogger('training_systole.log',
                                       append=True,
                                       separator=';')

        print('Fitting systole model...')
        hist_systole = model_systole.fit(X_train_aug,
                                         y_train[:, 0],
                                         shuffle=True,
                                         nb_epoch=epochs_per_iter,
                                         batch_size=batch_size,
                                         validation_data=(X_test, y_test[:,
                                                                         0]),
                                         callbacks=[csv_logger_systole])

        print('Fitting diastole model...')
        hist_diastole = model_diastole.fit(X_train_aug,
                                           y_train[:, 1],
                                           shuffle=True,
                                           nb_epoch=epochs_per_iter,
                                           batch_size=batch_size,
                                           validation_data=(X_test, y_test[:,
                                                                           1]),
                                           callbacks=[csv_logger_diastole])

        dialoss_history = hist_diastole.history["loss"]
        sysloss_history = hist_systole.history["loss"]

        import numpy
        numpy_dialoss_history = numpy.array(dialoss_history)
        numpy_sysloss_history = numpy.array(sysloss_history)
        numpy.savetxt("dialoss_history.txt",
                      numpy_dialoss_history,
                      delimiter=",")
        numpy.savetxt("sysloss_history.txt",
                      numpy_sysloss_history,
                      delimiter=",")

        # sigmas for predicted data, actually loss function values (RMSE)
        loss_systole = hist_systole.history['loss'][-1]
        loss_diastole = hist_diastole.history['loss'][-1]
        val_loss_systole = hist_systole.history['val_loss'][-1]
        val_loss_diastole = hist_diastole.history['val_loss'][-1]

        if calc_crps > 0 and i % calc_crps == 0:
            print('Evaluating CRPS...')
            pred_systole = model_systole.predict(X_train,
                                                 batch_size=batch_size,
                                                 verbose=1)
            pred_diastole = model_diastole.predict(X_train,
                                                   batch_size=batch_size,
                                                   verbose=1)
            val_pred_systole = model_systole.predict(X_test,
                                                     batch_size=batch_size,
                                                     verbose=1)
            val_pred_diastole = model_diastole.predict(X_test,
                                                       batch_size=batch_size,
                                                       verbose=1)

            ## DEZE BOVENSTAANDE VALUES ZIJN DE RESULTATEN ##

            ## try

            ##    accuracy_systole = pred_systole - val_pred_systole

            print("Pred_diastole:")
            print(pred_diastole)

            print("Pred_systole:")
            print(pred_systole)

            print("Val_pred_sysyole:")
            print(val_pred_systole)

            print("Val_pred_diastole:")
            print(val_pred_diastole)

            # CDF for train and test data (actually a step function)
            cdf_train = real_to_cdf(
                np.concatenate((y_train[:, 0], y_train[:, 1])))
            cdf_test = real_to_cdf(np.concatenate((y_test[:, 0], y_test[:,
                                                                        1])))

            # CDF for predicted data
            cdf_pred_systole = real_to_cdf(pred_systole, loss_systole)
            cdf_pred_diastole = real_to_cdf(pred_diastole, loss_diastole)
            cdf_val_pred_systole = real_to_cdf(val_pred_systole,
                                               val_loss_systole)
            cdf_val_pred_diastole = real_to_cdf(val_pred_diastole,
                                                val_loss_diastole)

            # evaluate CRPS on training data
            crps_train = crps(
                cdf_train, np.concatenate(
                    (cdf_pred_systole, cdf_pred_diastole)))
            print('CRPS(train) = {0}'.format(crps_train))

            # evaluate CRPS on test data
            crps_test = crps(
                cdf_test,
                np.concatenate((cdf_val_pred_systole, cdf_val_pred_diastole)))
            print('CRPS(test) = {0}'.format(crps_test))
            """ BEGIN PLOTTING RESULTS """

            import matplotlib.pyplot as plt
            import numpy

        #    score = model_systole.evaluate(X_test, Y_test, verbose=0)
        #    print("Score systole")
        #    print(score)
        """ EIND PLOTTING RESULT """

        print('Saving weights...')
        # save weights so they can be loaded later
        model_systole.save_weights('weights_systole.hdf5', overwrite=True)
        model_diastole.save_weights('weights_diastole.hdf5', overwrite=True)

        # for best (lowest) val losses, save weights
        if val_loss_systole < min_val_loss_systole:
            min_val_loss_systole = val_loss_systole
            model_systole.save_weights('weights_systole_best.hdf5',
                                       overwrite=True)

        if val_loss_diastole < min_val_loss_diastole:
            min_val_loss_diastole = val_loss_diastole
            model_diastole.save_weights('weights_diastole_best.hdf5',
                                        overwrite=True)

            ##Start accuracy plot for systole and diastole

            ## pyplot.plot(history.history['acc'])
            ##pyplot.show()

        # save best (lowest) val losses in file (to be later used for generating submission)
        with open('val_loss.txt', mode='w+') as f:
            f.write(str(min_val_loss_systole))
            f.write('\n')
            f.write(str(min_val_loss_diastole))