Example #1
0
def submission():
    """
    Generate submission file for the trained models.
    """
    print('Loading and compiling models...')
    model_systole = get_vgg_model()
    model_diastole = get_vgg_model()

    print('Loading models weights...')
    model_systole.load_weights('weights_systole_best.hdf5')
    model_diastole.load_weights('weights_diastole_best.hdf5')

    print('Loading validation data...')
    X, ids = load_validation_data()

    print('Pre-processing images...')
    X = preprocess(X)

    batch_size = 32
    print('Predicting on validation data...')
    pred_systole = model_systole.predict(X, batch_size=batch_size, verbose=1)
    pred_diastole = model_diastole.predict(X, batch_size=batch_size, verbose=1)

    # real predictions to CDF
    cdf_pred_systole = pred_systole.cumsum(axis=-1)
    cdf_pred_diastole = pred_diastole.cumsum(axis=-1)

    print('Accumulating results...')
    sub_systole = accumulate_study_results(ids, cdf_pred_systole)
    sub_diastole = accumulate_study_results(ids, cdf_pred_diastole)

    # write to submission file
    print('Writing submission to file...')
    fi = csv.reader(open('/data/heart/sample_submission_test.csv'))
    f = open('submission.csv', 'w')
    fo = csv.writer(f, lineterminator='\n')
    fo.writerow(fi.next())
    for line in fi:
        idx = line[0]
        key, target = idx.split('_')
        key = int(key)
        out = [idx]
        if key in sub_systole:
            if target == 'Diastole':
                out.extend(list(sub_diastole[key][0]))
            else:
                out.extend(list(sub_systole[key][0]))
        else:
            print('Miss {0}'.format(idx))
        fo.writerow(out)
    f.close()

    print('Done.')
Example #2
0
def submission():
    """
    Generate submission file for the trained models.
    """
    print('Loading and compiling models...')
    model_systole = get_vgg_model()
    model_diastole = get_vgg_model()

    print('Loading models weights...')
    model_systole.load_weights('weights_systole_best.hdf5')
    model_diastole.load_weights('weights_diastole_best.hdf5')

    print('Loading validation data...')
    X, ids = load_validation_data()

    print('Pre-processing images...')
    X = preprocess(X)

    batch_size = 32
    print('Predicting on validation data...')
    pred_systole = model_systole.predict(X, batch_size=batch_size, verbose=1)
    pred_diastole = model_diastole.predict(X, batch_size=batch_size, verbose=1)

    # real predictions to CDF
    cdf_pred_systole = pred_systole.cumsum(axis=-1) 
    cdf_pred_diastole = pred_diastole.cumsum(axis=-1)

    print('Accumulating results...')
    sub_systole = accumulate_study_results(ids, cdf_pred_systole)
    sub_diastole = accumulate_study_results(ids, cdf_pred_diastole)

    # write to submission file
    print('Writing submission to file...')
    fi = csv.reader(open('/data/heart/sample_submission_test.csv'))
    f = open('submission.csv', 'w')
    fo = csv.writer(f, lineterminator='\n')
    fo.writerow(fi.next())
    for line in fi:
        idx = line[0]
        key, target = idx.split('_')
        key = int(key)
        out = [idx]
        if key in sub_systole:
            if target == 'Diastole':
                out.extend(list(sub_diastole[key][0]))
            else:
                out.extend(list(sub_systole[key][0]))
        else:
            print('Miss {0}'.format(idx))
        fo.writerow(out)
    f.close()

    print('Done.')
Example #3
0
def train(train_prefix_dir="/data/heart"):
    """
    Training systole and diastole models.
    """
    print('Loading and compiling models...')
    model_systole = get_vgg_model()
    model_diastole = get_vgg_model()

    print('Loading training data...')
    X, y = load_train_data(train_prefix_dir)

    print('Pre-processing images...')
    X = preprocess(X)

    # split to training and test
    X_train, y_train, X_test, y_test = split_data(X, y, split_ratio=0.2)

    nb_iter = 200
    epochs_per_iter = 1
    batch_size = 32
    calc_crps = 1  # calculate CRPS every n-th iteration (set to 0 if CRPS estimation is not needed)

    # remember min val. losses (best iterations), used as sigmas for submission
    min_val_loss_systole = sys.float_info.max
    min_val_loss_diastole = sys.float_info.max

    print('-'*50)
    print('Training...')
    print('-'*50)

    # Create Image Augmentation
    datagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range=15,  # randomly rotate images in the range (degrees, 0 to 180)
        width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=True,  # randomly flip images
        vertical_flip=True)  # randomly flip images

    # compute quantities required for featurewise normalization
    # (std, mean, and principal components if ZCA whitening is applied)
    datagen.fit(X_train)

    # Create model checkpointers for systole and diastole
    systole_checkpointer_best = ModelCheckpoint(filepath="weights_systole_best.hdf5", verbose=1, save_best_only=True)
    diastole_checkpointer_best = ModelCheckpoint(filepath="weights_diastole_best.hdf5", verbose=1, save_best_only=True)
    systole_checkpointer = ModelCheckpoint(filepath="weights_systole.hdf5", verbose=1, save_best_only=False)
    diastole_checkpointer = ModelCheckpoint(filepath="weights_diastole.hdf5", verbose=1, save_best_only=False)

    # Create 600-dimentional y cdfs from observations
    y_syst_train = np.array([(i < np.arange(600)) for i in y_train[:, 0]], dtype=np.uint8)
    y_syst_test = np.array([(i < np.arange(600)) for i in y_test[:, 0]], dtype=np.uint8)
    y_diast_train = np.array([(i < np.arange(600)) for i in y_train[:, 1]], dtype=np.uint8)
    y_diast_test = np.array([(i < np.arange(600)) for i in y_test[:, 1]], dtype=np.uint8)

    print('Fitting Systole Shapes')
    hist_systole = model_systole.fit_generator(datagen.flow(X_train, y_syst_train, batch_size=batch_size),
                                               samples_per_epoch=X_train.shape[0],
                                               nb_epoch=nb_iter, show_accuracy=False,
                                               validation_data=(X_test, y_syst_test),
                                               callbacks=[systole_checkpointer, systole_checkpointer_best],
                                               nb_worker=1)
    
    print('Fitting Diastole Shapes')
    hist_diastole = model_diastole.fit_generator(datagen.flow(X_train, y_diast_train, batch_size=batch_size),
                                                 samples_per_epoch=X_train.shape[0],
                                                 nb_epoch=nb_iter, show_accuracy=False,
                                                 validation_data=(X_test, y_diast_test),
                                                 callbacks=[diastole_checkpointer, diastole_checkpointer_best],
                                                 nb_worker=1)
   
    loss_systole = hist_systole.history['loss'][-1]
    loss_diastole = hist_diastole.history['loss'][-1]
    val_loss_systole = hist_systole.history['val_loss'][-1]
    val_loss_diastole = hist_diastole.history['val_loss'][-1]

    if calc_crps > 0:
        print('Evaluating CRPS...')
        pred_systole = model_systole.predict(X_train, batch_size=batch_size, verbose=1)
        pred_diastole = model_diastole.predict(X_train, batch_size=batch_size, verbose=1)
        val_pred_systole = model_systole.predict(X_test, batch_size=batch_size, verbose=1)
        val_pred_diastole = model_diastole.predict(X_test, batch_size=batch_size, verbose=1)

        # CDF for train and test data (actually a step function)
        cdf_train = real_to_cdf(np.concatenate((y_train[:, 0], y_train[:, 1])))
        cdf_test = real_to_cdf(np.concatenate((y_test[:, 0], y_test[:, 1])))

        # CDF for predicted data
        cdf_pred_systole = real_to_cdf(pred_systole, loss_systole)
        cdf_pred_diastole = real_to_cdf(pred_diastole, loss_diastole)
        cdf_val_pred_systole = real_to_cdf(val_pred_systole, val_loss_systole)
        cdf_val_pred_diastole = real_to_cdf(val_pred_diastole, val_loss_diastole)

        # evaluate CRPS on training data
        crps_train = crps(cdf_train, np.concatenate((cdf_pred_systole, cdf_pred_diastole)))
        print('CRPS(train) = {0}'.format(crps_train))

        # evaluate CRPS on test data
        crps_test = crps(cdf_test, np.concatenate((cdf_val_pred_systole, cdf_val_pred_diastole)))
        print('CRPS(test) = {0}'.format(crps_test))

    # save best (lowest) val losses in file (to be later used for generating submission)
    with open('val_loss.txt', mode='w+') as f:
        f.write(str(min(hist_systole.history['val_loss'])))
        f.write('\n')
        f.write(str(min(hist_diastole.history['val_loss'])))
        
    """