def optimize_potential(dev_pats, model1, model2, stratified, fscores=None,
                       do_plot_predictions=False, resolution=1, load_hog=False):
    n_labels = 4
    potentials = []
    factors = [0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1]
    #factors = [0.00001, 0.0001, 0.001, 0.01, 0.02, 0.05, 0.1]
    # Quadratic potential
    order = [2,1,3,4]
    pot_mat = np.zeros((n_labels, n_labels))
    for i in range(len(order)):
        for j in range(len(order)):
            pot_mat[i,j] = np.abs(order[i] - order[j])**2
    max_val = np.max(pot_mat[:])
    pot_mat = (max_val - pot_mat) / max_val
    for f in factors:
        #potentials.append(f * np.eye(n_labels))
        potentials.append(f * pot_mat)
    n_pots = len(potentials)

    yde = np.zeros(0)
    predde = np.zeros((0, n_pots))
    predde_no_pp = np.zeros(0)
    patient_idxs_de = [0]
    print "Development users:"
    # Iterate over dev users
    for de_idx, de_pat in enumerate(dev_pats):
        print "Development patient number %d" % (de_idx+1)
        x, y, coord, dim = dp.load_patient(de_pat, n_voxels=None,
                                           resolution=resolution,
                                           load_hog=load_hog)
        yde = np.concatenate((yde, y))
        patient_idxs_de.append(len(yde))

        pred = model1.predict(x)
        pp_pred = dp.post_process(coord, dim, pred, binary_closing=True)

        tumor_idxs = pp_pred > 0
        pred_probs2 = model2.predict_proba(x[tumor_idxs,:])
        pred2 = np.argmax(pred_probs2, axis=1) + 1
        pp_pred[tumor_idxs] = pred2

        pp_pred15 = np.array(pp_pred)
        print "\nConfusion matrix (dev):"
        cm = confusion_matrix(y, pp_pred15)
        print cm
        dice_scores(y, pp_pred15, label='Dice scores (dev, no MRF):')
        predde_no_pp = np.concatenate((predde_no_pp, pp_pred15))
        predde_part = np.zeros((len(pp_pred15), 0))

        edges = dp.create_graph(coord[tumor_idxs,:])
        for pi, pot in enumerate(potentials):
            print "  Patient %d, potential %d." % (de_idx+1, pi+1)
            pp_pred[tumor_idxs] = dp.mrf(pred_probs2, edges, potential=pot) + 1

            print "\nConfusion matrix (MRF-%d):" % (pi+1)
            cm = confusion_matrix(y, pp_pred)
            print cm

            predde_part = np.hstack((predde_part, pp_pred.reshape(len(pp_pred),1)))

            dice_scores(y, pp_pred, label='Dice scores (pp):')

            if do_plot_predictions or de_idx < 5:
                # Plot the patient
                pif = os.path.join('plots', 'validation2', 'pat%d_slices_2S_MRF-%d.png' % (de_pat,pi+1))
                pp.plot_predictions(coord, dim, pp_pred15, y, pp_pred, fname=pif)
                #if pred_fname is not None:
                #    extras.save_predictions(coord, dim_list[0], pred, yte, pred_fname)
        predde = np.vstack((predde, predde_part))

    dice_scores(yde, predde_no_pp, patient_idxs=patient_idxs_de,
                label='Overall dice scores (two-stage, no MRF):', fscores=fscores)

    best_potential = potentials[0]
    best_score = -1
    for i in range(n_pots):
        print "\nOverall confusion matrix (%d):" % i
        cm = confusion_matrix(yde, predde[:,i])
        print cm

        ds = dice_scores(yde, predde[:,i], patient_idxs=patient_idxs_de,
                         label='Overall dice scores (two-stage, MRF-%d):' % i,
                         fscores=fscores)
        score = sum(ds)
        if score > best_score:
            best_score = score
            best_potential = potentials[i]
    print "Best potential (score=%f):" % (best_score)
    print best_potential
    return best_potential
Example #2
0
def optimize_potential(dev_pats,
                       model1,
                       model2,
                       stratified,
                       fscores=None,
                       do_plot_predictions=False,
                       resolution=1,
                       load_hog=False):
    n_labels = 4
    potentials = []
    factors = [0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1]
    #factors = [0.00001, 0.0001, 0.001, 0.01, 0.02, 0.05, 0.1]
    # Quadratic potential
    order = [2, 1, 3, 4]
    pot_mat = np.zeros((n_labels, n_labels))
    for i in range(len(order)):
        for j in range(len(order)):
            pot_mat[i, j] = np.abs(order[i] - order[j])**2
    max_val = np.max(pot_mat[:])
    pot_mat = (max_val - pot_mat) / max_val
    for f in factors:
        #potentials.append(f * np.eye(n_labels))
        potentials.append(f * pot_mat)
    n_pots = len(potentials)

    yde = np.zeros(0)
    predde = np.zeros((0, n_pots))
    predde_no_pp = np.zeros(0)
    patient_idxs_de = [0]
    print "Development users:"
    # Iterate over dev users
    for de_idx, de_pat in enumerate(dev_pats):
        print "Development patient number %d" % (de_idx + 1)
        x, y, coord, dim = dp.load_patient(de_pat,
                                           n_voxels=None,
                                           resolution=resolution,
                                           load_hog=load_hog)
        yde = np.concatenate((yde, y))
        patient_idxs_de.append(len(yde))

        pred = model1.predict(x)
        pp_pred = dp.post_process(coord, dim, pred, binary_closing=True)

        tumor_idxs = pp_pred > 0
        pred_probs2 = model2.predict_proba(x[tumor_idxs, :])
        pred2 = np.argmax(pred_probs2, axis=1) + 1
        pp_pred[tumor_idxs] = pred2

        pp_pred15 = np.array(pp_pred)
        print "\nConfusion matrix (dev):"
        cm = confusion_matrix(y, pp_pred15)
        print cm
        dice_scores(y, pp_pred15, label='Dice scores (dev, no MRF):')
        predde_no_pp = np.concatenate((predde_no_pp, pp_pred15))
        predde_part = np.zeros((len(pp_pred15), 0))

        edges = dp.create_graph(coord[tumor_idxs, :])
        for pi, pot in enumerate(potentials):
            print "  Patient %d, potential %d." % (de_idx + 1, pi + 1)
            pp_pred[tumor_idxs] = dp.mrf(pred_probs2, edges, potential=pot) + 1

            print "\nConfusion matrix (MRF-%d):" % (pi + 1)
            cm = confusion_matrix(y, pp_pred)
            print cm

            predde_part = np.hstack(
                (predde_part, pp_pred.reshape(len(pp_pred), 1)))

            dice_scores(y, pp_pred, label='Dice scores (pp):')

            if do_plot_predictions or de_idx < 5:
                # Plot the patient
                pif = os.path.join(
                    'plots', 'validation2',
                    'pat%d_slices_2S_MRF-%d.png' % (de_pat, pi + 1))
                pp.plot_predictions(coord,
                                    dim,
                                    pp_pred15,
                                    y,
                                    pp_pred,
                                    fname=pif)
                #if pred_fname is not None:
                #    extras.save_predictions(coord, dim_list[0], pred, yte, pred_fname)
        predde = np.vstack((predde, predde_part))

    dice_scores(yde,
                predde_no_pp,
                patient_idxs=patient_idxs_de,
                label='Overall dice scores (two-stage, no MRF):',
                fscores=fscores)

    best_potential = potentials[0]
    best_score = -1
    for i in range(n_pots):
        print "\nOverall confusion matrix (%d):" % i
        cm = confusion_matrix(yde, predde[:, i])
        print cm

        ds = dice_scores(yde,
                         predde[:, i],
                         patient_idxs=patient_idxs_de,
                         label='Overall dice scores (two-stage, MRF-%d):' % i,
                         fscores=fscores)
        score = sum(ds)
        if score > best_score:
            best_score = score
            best_potential = potentials[i]
    print "Best potential (score=%f):" % (best_score)
    print best_potential
    return best_potential
def predict_two_stage(train_pats, test_pats, fscores=None,
                      do_plot_predictions=False, stratified=False, n_trees=30,
                      dev_pats=[], use_mrf=True, resolution=1, n_voxels=30000,
                      mat_dir=None, fresh_models=False, load_hog=False):
    """
    Predict tumor voxels for given test patients.

    Input:
        train_pats -- list of patient IDs used for training a model.
        test_pats -- list of patient IDs used for testing a model.
        fscores -- An opened output file to which we write the results.
    """
    model_str = ""
    if resolution != 1:
        model_str += '_res%d' % resolution
    if load_hog:
        model_str += '_hog'
    model1_fname = os.path.join('models', 'model1_seed%d_ntrp%d_ntep%d_ntrees%d_nvox%s%s.jl' %
                                (seed, len(train_pats), len(test_pats), n_trees, n_voxels, model_str))
    model2_fname = os.path.join('models', 'model2_seed%d_ntrp%d_ntep%d_ntrees%d_nvox%s%s.jl' %
                                (seed, len(train_pats), len(test_pats), n_trees, n_voxels, model_str))

    # Load models if available
    if not fresh_models and os.path.isfile(model1_fname) and \
            os.path.isfile(model2_fname):
        model1 = joblib.load(model1_fname)
        model2 = joblib.load(model2_fname)
        min_voxels = 3000
    else:
        xtr, ytr, coordtr, patient_idxs_tr, dims_tr = dp.load_patients(
                train_pats, stratified, resolution=resolution,
                n_voxels=n_voxels, load_hog=load_hog)

        # Make all tumor labels equal to 1 and train the first model
        ytr1 = np.array(ytr, copy=True)
        ytr1[ytr1>0] = 1
        if stratified:
            # Class frequencies in the whole dataset
            class_counts = [dp.class_counts[0], sum(dp.class_counts[1:])]
            class_freqs = np.asarray(class_counts) / float(sum(class_counts))
            print "Class frequencies (model 1):", class_freqs*100
            # Class frequencies in the sample
            sample_counts = np.histogram(ytr, [0,1,5])[0]
            sample_freqs = sample_counts / float(sum(sample_counts))
            print "Sample frequencies:", sample_freqs*100
            weights = np.ones(len(ytr))
            for i in range(2):
                weights[ytr==i] = class_freqs[i] / sample_freqs[i]
        else:
            weights = None
        model1 = train_RF_model(xtr, ytr1, n_trees=n_trees,
                                sample_weight=weights, fname=model1_fname)

        # Compute minimum number of tumor voxels in a train patient
        min_voxels = 3000#get_min_voxels(ytr, patient_idxs_tr)
        print "Minimum number of voxels in a tumor: %d" % min_voxels

        # Train the second model to separate tumor classes
        ok_idxs = ytr > 0
        xtr2 = np.asarray(xtr[ok_idxs,:])
        ytr2 = np.asarray(ytr[ok_idxs])
        if stratified:
            # Class frequencies in the whole dataset
            class_counts = dp.class_counts[1:]
            class_freqs = np.asarray(class_counts) / float(sum(class_counts))
            print "Class frequencies (model 2):", class_freqs*100
            # Class frequencies in the sample
            sample_counts = np.histogram(ytr, range(1,6))[0]
            sample_freqs = sample_counts / float(sum(sample_counts))
            print "Sample frequencies:", sample_freqs*100
            weights = np.ones(len(ytr2))
            for i in range(4):
                weights[ytr2==i+1] = class_freqs[i] / sample_freqs[i]
        else:
            weights = None
        model2 = train_RF_model(xtr2, ytr2, n_trees=n_trees,
                                sample_weight=weights, fname=model2_fname)

    print "\n----------------------------------\n"

    if len(dev_pats) > 0:
        best_potential = optimize_potential(
                dev_pats, model1, model2, stratified, fscores,
                do_plot_predictions, resolution=resolution, load_hog=load_hog)
        best_radius = optimize_closing(dev_pats, model1, stratified, fscores,
                                       resolution=resolution, load_hog=load_hog)
        best_th = optimize_threshold1(dev_pats, model1, stratified, fscores,
                                      resolution, load_hog, best_radius)
    else:
        best_radius = 6
        best_th = 0.6
        best_potential = np.array([[0.04, 0.03555556, 0.03555556, 0.02222222],
                                   [0.03555556, 0.04, 0.02222222, 0.],
                                   [0.03555556, 0.02222222, 0.04, 0.03555556],
                                   [0.02222222, 0., 0.03555556, 0.04]])


    yte = np.zeros(0)
    predte = np.zeros(0)
    predte_no_pp = np.zeros(0)
    patient_idxs_te = [0]
    print "Test users:"
    # Iterate over test users
    for te_idx, te_pat in enumerate(test_pats):
        print "Test patient number %d" % (te_idx+1)
        x, y, coord, dim = dp.load_patient(te_pat, n_voxels=None,
                                           resolution=resolution,
                                           load_hog=load_hog)

        #pred = model1.predict(x)
        pred_probs = model1.predict_proba(x)
        #pred = np.argmax(pred_probs, axis=1)
        pred = pred_probs[:,1] >= best_th
        # If the predicted tumor is too small set the most probable tumor
        # voxels to one
        if sum(pred > 0) < min_voxels:
            print "Patient having too few voxels (%d < %d)" % (sum(pred > 0), min_voxels)
            pred = np.zeros(pred.shape)
            new_idxs = np.argsort(pred_probs[:,1])[-min_voxels:]
            pred[new_idxs] = 1
        pp_pred = dp.post_process(coord, dim, pred, binary_closing=True,
                                  radius=best_radius)

        tumor_idxs = pp_pred > 0
        if sum(tumor_idxs) > 0:
            pred_probs2 = model2.predict_proba(x[tumor_idxs,:])
            pred2 = np.argmax(pred_probs2, axis=1) + 1
            pp_pred[tumor_idxs] = pred2

        pp_pred15 = np.array(pp_pred)
        print "\nConfusion matrix:"
        cm = confusion_matrix(y, pp_pred15)
        print cm
        dice_scores(y, pp_pred15, label='Dice scores:')

        if use_mrf:
            # MRF post processing
            if sum(tumor_idxs) > 0:
                edges = dp.create_graph(coord[tumor_idxs,:])
                pp_pred[tumor_idxs] = dp.mrf(pred_probs2, edges,
                                             potential=best_potential) + 1
            method = 'MRF'
        else:
            # Closing post processing
            if sum(tumor_idxs) > 0:
                pp_pred[tumor_idxs] = dp.post_process(coord[tumor_idxs,:], dim,
                                                      pred2, remove_components=False,
                                                      radius=best_radius)
            method = 'closing'

        print "\nConfusion matrix (pp):"
        cm = confusion_matrix(y, pp_pred)
        print cm

        yte = np.concatenate((yte, y))
        patient_idxs_te.append(len(yte))
        predte = np.concatenate((predte, pp_pred))
        predte_no_pp = np.concatenate((predte_no_pp, pp_pred15))

        dice_scores(y, pp_pred, label='Dice scores (pp):')

        if do_plot_predictions:
            # Plot the patient
            pif = os.path.join('results', 'pat%d_slices_2S_%s.png' % (te_pat, method))
            if mat_dir is not None:
                fmat = os.path.join(mat_dir, 'pat%d.mat' % te_pat)
            else:
                fmat = None
            pp.plot_predictions(coord, dim, pp_pred15, y, pp_pred, fname=pif,
                                fmat=fmat)
            #if pred_fname is not None:
            #    extras.save_predictions(coord, dim_list[0], pred, yte, pred_fname)

    print "\nOverall confusion matrix:"
    cm = confusion_matrix(yte, predte)
    print cm

    dice_scores(yte, predte_no_pp, patient_idxs=patient_idxs_te,
                label='Overall dice scores (two-stage, no pp):', fscores=fscores)

    dice_scores(yte, predte, patient_idxs=patient_idxs_te,
                label='Overall dice scores (two-stage):', fscores=fscores)
Example #4
0
def predict_two_stage(train_pats,
                      test_pats,
                      fscores=None,
                      do_plot_predictions=False,
                      stratified=False,
                      n_trees=30,
                      dev_pats=[],
                      use_mrf=True,
                      resolution=1,
                      n_voxels=30000,
                      mat_dir=None,
                      fresh_models=False,
                      load_hog=False):
    """
    Predict tumor voxels for given test patients.

    Input:
        train_pats -- list of patient IDs used for training a model.
        test_pats -- list of patient IDs used for testing a model.
        fscores -- An opened output file to which we write the results.
    """
    model_str = ""
    if resolution != 1:
        model_str += '_res%d' % resolution
    if load_hog:
        model_str += '_hog'
    model1_fname = os.path.join(
        'models', 'model1_seed%d_ntrp%d_ntep%d_ntrees%d_nvox%s%s.jl' %
        (seed, len(train_pats), len(test_pats), n_trees, n_voxels, model_str))
    model2_fname = os.path.join(
        'models', 'model2_seed%d_ntrp%d_ntep%d_ntrees%d_nvox%s%s.jl' %
        (seed, len(train_pats), len(test_pats), n_trees, n_voxels, model_str))

    # Load models if available
    if not fresh_models and os.path.isfile(model1_fname) and \
            os.path.isfile(model2_fname):
        model1 = joblib.load(model1_fname)
        model2 = joblib.load(model2_fname)
        min_voxels = 3000
    else:
        xtr, ytr, coordtr, patient_idxs_tr, dims_tr = dp.load_patients(
            train_pats,
            stratified,
            resolution=resolution,
            n_voxels=n_voxels,
            load_hog=load_hog)

        # Make all tumor labels equal to 1 and train the first model
        ytr1 = np.array(ytr, copy=True)
        ytr1[ytr1 > 0] = 1
        if stratified:
            # Class frequencies in the whole dataset
            class_counts = [dp.class_counts[0], sum(dp.class_counts[1:])]
            class_freqs = np.asarray(class_counts) / float(sum(class_counts))
            print "Class frequencies (model 1):", class_freqs * 100
            # Class frequencies in the sample
            sample_counts = np.histogram(ytr, [0, 1, 5])[0]
            sample_freqs = sample_counts / float(sum(sample_counts))
            print "Sample frequencies:", sample_freqs * 100
            weights = np.ones(len(ytr))
            for i in range(2):
                weights[ytr == i] = class_freqs[i] / sample_freqs[i]
        else:
            weights = None
        model1 = train_RF_model(xtr,
                                ytr1,
                                n_trees=n_trees,
                                sample_weight=weights,
                                fname=model1_fname)

        # Compute minimum number of tumor voxels in a train patient
        min_voxels = 3000  #get_min_voxels(ytr, patient_idxs_tr)
        print "Minimum number of voxels in a tumor: %d" % min_voxels

        # Train the second model to separate tumor classes
        ok_idxs = ytr > 0
        xtr2 = np.asarray(xtr[ok_idxs, :])
        ytr2 = np.asarray(ytr[ok_idxs])
        if stratified:
            # Class frequencies in the whole dataset
            class_counts = dp.class_counts[1:]
            class_freqs = np.asarray(class_counts) / float(sum(class_counts))
            print "Class frequencies (model 2):", class_freqs * 100
            # Class frequencies in the sample
            sample_counts = np.histogram(ytr, range(1, 6))[0]
            sample_freqs = sample_counts / float(sum(sample_counts))
            print "Sample frequencies:", sample_freqs * 100
            weights = np.ones(len(ytr2))
            for i in range(4):
                weights[ytr2 == i + 1] = class_freqs[i] / sample_freqs[i]
        else:
            weights = None
        model2 = train_RF_model(xtr2,
                                ytr2,
                                n_trees=n_trees,
                                sample_weight=weights,
                                fname=model2_fname)

    print "\n----------------------------------\n"

    if len(dev_pats) > 0:
        best_potential = optimize_potential(dev_pats,
                                            model1,
                                            model2,
                                            stratified,
                                            fscores,
                                            do_plot_predictions,
                                            resolution=resolution,
                                            load_hog=load_hog)
        best_radius = optimize_closing(dev_pats,
                                       model1,
                                       stratified,
                                       fscores,
                                       resolution=resolution,
                                       load_hog=load_hog)
        best_th = optimize_threshold1(dev_pats, model1, stratified, fscores,
                                      resolution, load_hog, best_radius)
    else:
        best_radius = 6
        best_th = 0.6
        best_potential = np.array([[0.04, 0.03555556, 0.03555556, 0.02222222],
                                   [0.03555556, 0.04, 0.02222222, 0.],
                                   [0.03555556, 0.02222222, 0.04, 0.03555556],
                                   [0.02222222, 0., 0.03555556, 0.04]])

    yte = np.zeros(0)
    predte = np.zeros(0)
    predte_no_pp = np.zeros(0)
    patient_idxs_te = [0]
    print "Test users:"
    # Iterate over test users
    for te_idx, te_pat in enumerate(test_pats):
        print "Test patient number %d" % (te_idx + 1)
        x, y, coord, dim = dp.load_patient(te_pat,
                                           n_voxels=None,
                                           resolution=resolution,
                                           load_hog=load_hog)

        #pred = model1.predict(x)
        pred_probs = model1.predict_proba(x)
        #pred = np.argmax(pred_probs, axis=1)
        pred = pred_probs[:, 1] >= best_th
        # If the predicted tumor is too small set the most probable tumor
        # voxels to one
        if sum(pred > 0) < min_voxels:
            print "Patient having too few voxels (%d < %d)" % (sum(pred > 0),
                                                               min_voxels)
            pred = np.zeros(pred.shape)
            new_idxs = np.argsort(pred_probs[:, 1])[-min_voxels:]
            pred[new_idxs] = 1
        pp_pred = dp.post_process(coord,
                                  dim,
                                  pred,
                                  binary_closing=True,
                                  radius=best_radius)

        tumor_idxs = pp_pred > 0
        if sum(tumor_idxs) > 0:
            pred_probs2 = model2.predict_proba(x[tumor_idxs, :])
            pred2 = np.argmax(pred_probs2, axis=1) + 1
            pp_pred[tumor_idxs] = pred2

        pp_pred15 = np.array(pp_pred)
        print "\nConfusion matrix:"
        cm = confusion_matrix(y, pp_pred15)
        print cm
        dice_scores(y, pp_pred15, label='Dice scores:')

        if use_mrf:
            # MRF post processing
            if sum(tumor_idxs) > 0:
                edges = dp.create_graph(coord[tumor_idxs, :])
                pp_pred[tumor_idxs] = dp.mrf(
                    pred_probs2, edges, potential=best_potential) + 1
            method = 'MRF'
        else:
            # Closing post processing
            if sum(tumor_idxs) > 0:
                pp_pred[tumor_idxs] = dp.post_process(coord[tumor_idxs, :],
                                                      dim,
                                                      pred2,
                                                      remove_components=False,
                                                      radius=best_radius)
            method = 'closing'

        print "\nConfusion matrix (pp):"
        cm = confusion_matrix(y, pp_pred)
        print cm

        yte = np.concatenate((yte, y))
        patient_idxs_te.append(len(yte))
        predte = np.concatenate((predte, pp_pred))
        predte_no_pp = np.concatenate((predte_no_pp, pp_pred15))

        dice_scores(y, pp_pred, label='Dice scores (pp):')

        if do_plot_predictions:
            # Plot the patient
            pif = os.path.join('results',
                               'pat%d_slices_2S_%s.png' % (te_pat, method))
            if mat_dir is not None:
                fmat = os.path.join(mat_dir, 'pat%d.mat' % te_pat)
            else:
                fmat = None
            pp.plot_predictions(coord,
                                dim,
                                pp_pred15,
                                y,
                                pp_pred,
                                fname=pif,
                                fmat=fmat)
            #if pred_fname is not None:
            #    extras.save_predictions(coord, dim_list[0], pred, yte, pred_fname)

    print "\nOverall confusion matrix:"
    cm = confusion_matrix(yte, predte)
    print cm

    dice_scores(yte,
                predte_no_pp,
                patient_idxs=patient_idxs_te,
                label='Overall dice scores (two-stage, no pp):',
                fscores=fscores)

    dice_scores(yte,
                predte,
                patient_idxs=patient_idxs_te,
                label='Overall dice scores (two-stage):',
                fscores=fscores)