def optimize_potential(dev_pats, model1, model2, stratified, fscores=None, do_plot_predictions=False, resolution=1, load_hog=False): n_labels = 4 potentials = [] factors = [0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1] #factors = [0.00001, 0.0001, 0.001, 0.01, 0.02, 0.05, 0.1] # Quadratic potential order = [2,1,3,4] pot_mat = np.zeros((n_labels, n_labels)) for i in range(len(order)): for j in range(len(order)): pot_mat[i,j] = np.abs(order[i] - order[j])**2 max_val = np.max(pot_mat[:]) pot_mat = (max_val - pot_mat) / max_val for f in factors: #potentials.append(f * np.eye(n_labels)) potentials.append(f * pot_mat) n_pots = len(potentials) yde = np.zeros(0) predde = np.zeros((0, n_pots)) predde_no_pp = np.zeros(0) patient_idxs_de = [0] print "Development users:" # Iterate over dev users for de_idx, de_pat in enumerate(dev_pats): print "Development patient number %d" % (de_idx+1) x, y, coord, dim = dp.load_patient(de_pat, n_voxels=None, resolution=resolution, load_hog=load_hog) yde = np.concatenate((yde, y)) patient_idxs_de.append(len(yde)) pred = model1.predict(x) pp_pred = dp.post_process(coord, dim, pred, binary_closing=True) tumor_idxs = pp_pred > 0 pred_probs2 = model2.predict_proba(x[tumor_idxs,:]) pred2 = np.argmax(pred_probs2, axis=1) + 1 pp_pred[tumor_idxs] = pred2 pp_pred15 = np.array(pp_pred) print "\nConfusion matrix (dev):" cm = confusion_matrix(y, pp_pred15) print cm dice_scores(y, pp_pred15, label='Dice scores (dev, no MRF):') predde_no_pp = np.concatenate((predde_no_pp, pp_pred15)) predde_part = np.zeros((len(pp_pred15), 0)) edges = dp.create_graph(coord[tumor_idxs,:]) for pi, pot in enumerate(potentials): print " Patient %d, potential %d." % (de_idx+1, pi+1) pp_pred[tumor_idxs] = dp.mrf(pred_probs2, edges, potential=pot) + 1 print "\nConfusion matrix (MRF-%d):" % (pi+1) cm = confusion_matrix(y, pp_pred) print cm predde_part = np.hstack((predde_part, pp_pred.reshape(len(pp_pred),1))) dice_scores(y, pp_pred, label='Dice scores (pp):') if do_plot_predictions or de_idx < 5: # Plot the patient pif = os.path.join('plots', 'validation2', 'pat%d_slices_2S_MRF-%d.png' % (de_pat,pi+1)) pp.plot_predictions(coord, dim, pp_pred15, y, pp_pred, fname=pif) #if pred_fname is not None: # extras.save_predictions(coord, dim_list[0], pred, yte, pred_fname) predde = np.vstack((predde, predde_part)) dice_scores(yde, predde_no_pp, patient_idxs=patient_idxs_de, label='Overall dice scores (two-stage, no MRF):', fscores=fscores) best_potential = potentials[0] best_score = -1 for i in range(n_pots): print "\nOverall confusion matrix (%d):" % i cm = confusion_matrix(yde, predde[:,i]) print cm ds = dice_scores(yde, predde[:,i], patient_idxs=patient_idxs_de, label='Overall dice scores (two-stage, MRF-%d):' % i, fscores=fscores) score = sum(ds) if score > best_score: best_score = score best_potential = potentials[i] print "Best potential (score=%f):" % (best_score) print best_potential return best_potential
def optimize_potential(dev_pats, model1, model2, stratified, fscores=None, do_plot_predictions=False, resolution=1, load_hog=False): n_labels = 4 potentials = [] factors = [0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1] #factors = [0.00001, 0.0001, 0.001, 0.01, 0.02, 0.05, 0.1] # Quadratic potential order = [2, 1, 3, 4] pot_mat = np.zeros((n_labels, n_labels)) for i in range(len(order)): for j in range(len(order)): pot_mat[i, j] = np.abs(order[i] - order[j])**2 max_val = np.max(pot_mat[:]) pot_mat = (max_val - pot_mat) / max_val for f in factors: #potentials.append(f * np.eye(n_labels)) potentials.append(f * pot_mat) n_pots = len(potentials) yde = np.zeros(0) predde = np.zeros((0, n_pots)) predde_no_pp = np.zeros(0) patient_idxs_de = [0] print "Development users:" # Iterate over dev users for de_idx, de_pat in enumerate(dev_pats): print "Development patient number %d" % (de_idx + 1) x, y, coord, dim = dp.load_patient(de_pat, n_voxels=None, resolution=resolution, load_hog=load_hog) yde = np.concatenate((yde, y)) patient_idxs_de.append(len(yde)) pred = model1.predict(x) pp_pred = dp.post_process(coord, dim, pred, binary_closing=True) tumor_idxs = pp_pred > 0 pred_probs2 = model2.predict_proba(x[tumor_idxs, :]) pred2 = np.argmax(pred_probs2, axis=1) + 1 pp_pred[tumor_idxs] = pred2 pp_pred15 = np.array(pp_pred) print "\nConfusion matrix (dev):" cm = confusion_matrix(y, pp_pred15) print cm dice_scores(y, pp_pred15, label='Dice scores (dev, no MRF):') predde_no_pp = np.concatenate((predde_no_pp, pp_pred15)) predde_part = np.zeros((len(pp_pred15), 0)) edges = dp.create_graph(coord[tumor_idxs, :]) for pi, pot in enumerate(potentials): print " Patient %d, potential %d." % (de_idx + 1, pi + 1) pp_pred[tumor_idxs] = dp.mrf(pred_probs2, edges, potential=pot) + 1 print "\nConfusion matrix (MRF-%d):" % (pi + 1) cm = confusion_matrix(y, pp_pred) print cm predde_part = np.hstack( (predde_part, pp_pred.reshape(len(pp_pred), 1))) dice_scores(y, pp_pred, label='Dice scores (pp):') if do_plot_predictions or de_idx < 5: # Plot the patient pif = os.path.join( 'plots', 'validation2', 'pat%d_slices_2S_MRF-%d.png' % (de_pat, pi + 1)) pp.plot_predictions(coord, dim, pp_pred15, y, pp_pred, fname=pif) #if pred_fname is not None: # extras.save_predictions(coord, dim_list[0], pred, yte, pred_fname) predde = np.vstack((predde, predde_part)) dice_scores(yde, predde_no_pp, patient_idxs=patient_idxs_de, label='Overall dice scores (two-stage, no MRF):', fscores=fscores) best_potential = potentials[0] best_score = -1 for i in range(n_pots): print "\nOverall confusion matrix (%d):" % i cm = confusion_matrix(yde, predde[:, i]) print cm ds = dice_scores(yde, predde[:, i], patient_idxs=patient_idxs_de, label='Overall dice scores (two-stage, MRF-%d):' % i, fscores=fscores) score = sum(ds) if score > best_score: best_score = score best_potential = potentials[i] print "Best potential (score=%f):" % (best_score) print best_potential return best_potential
def predict_two_stage(train_pats, test_pats, fscores=None, do_plot_predictions=False, stratified=False, n_trees=30, dev_pats=[], use_mrf=True, resolution=1, n_voxels=30000, mat_dir=None, fresh_models=False, load_hog=False): """ Predict tumor voxels for given test patients. Input: train_pats -- list of patient IDs used for training a model. test_pats -- list of patient IDs used for testing a model. fscores -- An opened output file to which we write the results. """ model_str = "" if resolution != 1: model_str += '_res%d' % resolution if load_hog: model_str += '_hog' model1_fname = os.path.join('models', 'model1_seed%d_ntrp%d_ntep%d_ntrees%d_nvox%s%s.jl' % (seed, len(train_pats), len(test_pats), n_trees, n_voxels, model_str)) model2_fname = os.path.join('models', 'model2_seed%d_ntrp%d_ntep%d_ntrees%d_nvox%s%s.jl' % (seed, len(train_pats), len(test_pats), n_trees, n_voxels, model_str)) # Load models if available if not fresh_models and os.path.isfile(model1_fname) and \ os.path.isfile(model2_fname): model1 = joblib.load(model1_fname) model2 = joblib.load(model2_fname) min_voxels = 3000 else: xtr, ytr, coordtr, patient_idxs_tr, dims_tr = dp.load_patients( train_pats, stratified, resolution=resolution, n_voxels=n_voxels, load_hog=load_hog) # Make all tumor labels equal to 1 and train the first model ytr1 = np.array(ytr, copy=True) ytr1[ytr1>0] = 1 if stratified: # Class frequencies in the whole dataset class_counts = [dp.class_counts[0], sum(dp.class_counts[1:])] class_freqs = np.asarray(class_counts) / float(sum(class_counts)) print "Class frequencies (model 1):", class_freqs*100 # Class frequencies in the sample sample_counts = np.histogram(ytr, [0,1,5])[0] sample_freqs = sample_counts / float(sum(sample_counts)) print "Sample frequencies:", sample_freqs*100 weights = np.ones(len(ytr)) for i in range(2): weights[ytr==i] = class_freqs[i] / sample_freqs[i] else: weights = None model1 = train_RF_model(xtr, ytr1, n_trees=n_trees, sample_weight=weights, fname=model1_fname) # Compute minimum number of tumor voxels in a train patient min_voxels = 3000#get_min_voxels(ytr, patient_idxs_tr) print "Minimum number of voxels in a tumor: %d" % min_voxels # Train the second model to separate tumor classes ok_idxs = ytr > 0 xtr2 = np.asarray(xtr[ok_idxs,:]) ytr2 = np.asarray(ytr[ok_idxs]) if stratified: # Class frequencies in the whole dataset class_counts = dp.class_counts[1:] class_freqs = np.asarray(class_counts) / float(sum(class_counts)) print "Class frequencies (model 2):", class_freqs*100 # Class frequencies in the sample sample_counts = np.histogram(ytr, range(1,6))[0] sample_freqs = sample_counts / float(sum(sample_counts)) print "Sample frequencies:", sample_freqs*100 weights = np.ones(len(ytr2)) for i in range(4): weights[ytr2==i+1] = class_freqs[i] / sample_freqs[i] else: weights = None model2 = train_RF_model(xtr2, ytr2, n_trees=n_trees, sample_weight=weights, fname=model2_fname) print "\n----------------------------------\n" if len(dev_pats) > 0: best_potential = optimize_potential( dev_pats, model1, model2, stratified, fscores, do_plot_predictions, resolution=resolution, load_hog=load_hog) best_radius = optimize_closing(dev_pats, model1, stratified, fscores, resolution=resolution, load_hog=load_hog) best_th = optimize_threshold1(dev_pats, model1, stratified, fscores, resolution, load_hog, best_radius) else: best_radius = 6 best_th = 0.6 best_potential = np.array([[0.04, 0.03555556, 0.03555556, 0.02222222], [0.03555556, 0.04, 0.02222222, 0.], [0.03555556, 0.02222222, 0.04, 0.03555556], [0.02222222, 0., 0.03555556, 0.04]]) yte = np.zeros(0) predte = np.zeros(0) predte_no_pp = np.zeros(0) patient_idxs_te = [0] print "Test users:" # Iterate over test users for te_idx, te_pat in enumerate(test_pats): print "Test patient number %d" % (te_idx+1) x, y, coord, dim = dp.load_patient(te_pat, n_voxels=None, resolution=resolution, load_hog=load_hog) #pred = model1.predict(x) pred_probs = model1.predict_proba(x) #pred = np.argmax(pred_probs, axis=1) pred = pred_probs[:,1] >= best_th # If the predicted tumor is too small set the most probable tumor # voxels to one if sum(pred > 0) < min_voxels: print "Patient having too few voxels (%d < %d)" % (sum(pred > 0), min_voxels) pred = np.zeros(pred.shape) new_idxs = np.argsort(pred_probs[:,1])[-min_voxels:] pred[new_idxs] = 1 pp_pred = dp.post_process(coord, dim, pred, binary_closing=True, radius=best_radius) tumor_idxs = pp_pred > 0 if sum(tumor_idxs) > 0: pred_probs2 = model2.predict_proba(x[tumor_idxs,:]) pred2 = np.argmax(pred_probs2, axis=1) + 1 pp_pred[tumor_idxs] = pred2 pp_pred15 = np.array(pp_pred) print "\nConfusion matrix:" cm = confusion_matrix(y, pp_pred15) print cm dice_scores(y, pp_pred15, label='Dice scores:') if use_mrf: # MRF post processing if sum(tumor_idxs) > 0: edges = dp.create_graph(coord[tumor_idxs,:]) pp_pred[tumor_idxs] = dp.mrf(pred_probs2, edges, potential=best_potential) + 1 method = 'MRF' else: # Closing post processing if sum(tumor_idxs) > 0: pp_pred[tumor_idxs] = dp.post_process(coord[tumor_idxs,:], dim, pred2, remove_components=False, radius=best_radius) method = 'closing' print "\nConfusion matrix (pp):" cm = confusion_matrix(y, pp_pred) print cm yte = np.concatenate((yte, y)) patient_idxs_te.append(len(yte)) predte = np.concatenate((predte, pp_pred)) predte_no_pp = np.concatenate((predte_no_pp, pp_pred15)) dice_scores(y, pp_pred, label='Dice scores (pp):') if do_plot_predictions: # Plot the patient pif = os.path.join('results', 'pat%d_slices_2S_%s.png' % (te_pat, method)) if mat_dir is not None: fmat = os.path.join(mat_dir, 'pat%d.mat' % te_pat) else: fmat = None pp.plot_predictions(coord, dim, pp_pred15, y, pp_pred, fname=pif, fmat=fmat) #if pred_fname is not None: # extras.save_predictions(coord, dim_list[0], pred, yte, pred_fname) print "\nOverall confusion matrix:" cm = confusion_matrix(yte, predte) print cm dice_scores(yte, predte_no_pp, patient_idxs=patient_idxs_te, label='Overall dice scores (two-stage, no pp):', fscores=fscores) dice_scores(yte, predte, patient_idxs=patient_idxs_te, label='Overall dice scores (two-stage):', fscores=fscores)
def predict_two_stage(train_pats, test_pats, fscores=None, do_plot_predictions=False, stratified=False, n_trees=30, dev_pats=[], use_mrf=True, resolution=1, n_voxels=30000, mat_dir=None, fresh_models=False, load_hog=False): """ Predict tumor voxels for given test patients. Input: train_pats -- list of patient IDs used for training a model. test_pats -- list of patient IDs used for testing a model. fscores -- An opened output file to which we write the results. """ model_str = "" if resolution != 1: model_str += '_res%d' % resolution if load_hog: model_str += '_hog' model1_fname = os.path.join( 'models', 'model1_seed%d_ntrp%d_ntep%d_ntrees%d_nvox%s%s.jl' % (seed, len(train_pats), len(test_pats), n_trees, n_voxels, model_str)) model2_fname = os.path.join( 'models', 'model2_seed%d_ntrp%d_ntep%d_ntrees%d_nvox%s%s.jl' % (seed, len(train_pats), len(test_pats), n_trees, n_voxels, model_str)) # Load models if available if not fresh_models and os.path.isfile(model1_fname) and \ os.path.isfile(model2_fname): model1 = joblib.load(model1_fname) model2 = joblib.load(model2_fname) min_voxels = 3000 else: xtr, ytr, coordtr, patient_idxs_tr, dims_tr = dp.load_patients( train_pats, stratified, resolution=resolution, n_voxels=n_voxels, load_hog=load_hog) # Make all tumor labels equal to 1 and train the first model ytr1 = np.array(ytr, copy=True) ytr1[ytr1 > 0] = 1 if stratified: # Class frequencies in the whole dataset class_counts = [dp.class_counts[0], sum(dp.class_counts[1:])] class_freqs = np.asarray(class_counts) / float(sum(class_counts)) print "Class frequencies (model 1):", class_freqs * 100 # Class frequencies in the sample sample_counts = np.histogram(ytr, [0, 1, 5])[0] sample_freqs = sample_counts / float(sum(sample_counts)) print "Sample frequencies:", sample_freqs * 100 weights = np.ones(len(ytr)) for i in range(2): weights[ytr == i] = class_freqs[i] / sample_freqs[i] else: weights = None model1 = train_RF_model(xtr, ytr1, n_trees=n_trees, sample_weight=weights, fname=model1_fname) # Compute minimum number of tumor voxels in a train patient min_voxels = 3000 #get_min_voxels(ytr, patient_idxs_tr) print "Minimum number of voxels in a tumor: %d" % min_voxels # Train the second model to separate tumor classes ok_idxs = ytr > 0 xtr2 = np.asarray(xtr[ok_idxs, :]) ytr2 = np.asarray(ytr[ok_idxs]) if stratified: # Class frequencies in the whole dataset class_counts = dp.class_counts[1:] class_freqs = np.asarray(class_counts) / float(sum(class_counts)) print "Class frequencies (model 2):", class_freqs * 100 # Class frequencies in the sample sample_counts = np.histogram(ytr, range(1, 6))[0] sample_freqs = sample_counts / float(sum(sample_counts)) print "Sample frequencies:", sample_freqs * 100 weights = np.ones(len(ytr2)) for i in range(4): weights[ytr2 == i + 1] = class_freqs[i] / sample_freqs[i] else: weights = None model2 = train_RF_model(xtr2, ytr2, n_trees=n_trees, sample_weight=weights, fname=model2_fname) print "\n----------------------------------\n" if len(dev_pats) > 0: best_potential = optimize_potential(dev_pats, model1, model2, stratified, fscores, do_plot_predictions, resolution=resolution, load_hog=load_hog) best_radius = optimize_closing(dev_pats, model1, stratified, fscores, resolution=resolution, load_hog=load_hog) best_th = optimize_threshold1(dev_pats, model1, stratified, fscores, resolution, load_hog, best_radius) else: best_radius = 6 best_th = 0.6 best_potential = np.array([[0.04, 0.03555556, 0.03555556, 0.02222222], [0.03555556, 0.04, 0.02222222, 0.], [0.03555556, 0.02222222, 0.04, 0.03555556], [0.02222222, 0., 0.03555556, 0.04]]) yte = np.zeros(0) predte = np.zeros(0) predte_no_pp = np.zeros(0) patient_idxs_te = [0] print "Test users:" # Iterate over test users for te_idx, te_pat in enumerate(test_pats): print "Test patient number %d" % (te_idx + 1) x, y, coord, dim = dp.load_patient(te_pat, n_voxels=None, resolution=resolution, load_hog=load_hog) #pred = model1.predict(x) pred_probs = model1.predict_proba(x) #pred = np.argmax(pred_probs, axis=1) pred = pred_probs[:, 1] >= best_th # If the predicted tumor is too small set the most probable tumor # voxels to one if sum(pred > 0) < min_voxels: print "Patient having too few voxels (%d < %d)" % (sum(pred > 0), min_voxels) pred = np.zeros(pred.shape) new_idxs = np.argsort(pred_probs[:, 1])[-min_voxels:] pred[new_idxs] = 1 pp_pred = dp.post_process(coord, dim, pred, binary_closing=True, radius=best_radius) tumor_idxs = pp_pred > 0 if sum(tumor_idxs) > 0: pred_probs2 = model2.predict_proba(x[tumor_idxs, :]) pred2 = np.argmax(pred_probs2, axis=1) + 1 pp_pred[tumor_idxs] = pred2 pp_pred15 = np.array(pp_pred) print "\nConfusion matrix:" cm = confusion_matrix(y, pp_pred15) print cm dice_scores(y, pp_pred15, label='Dice scores:') if use_mrf: # MRF post processing if sum(tumor_idxs) > 0: edges = dp.create_graph(coord[tumor_idxs, :]) pp_pred[tumor_idxs] = dp.mrf( pred_probs2, edges, potential=best_potential) + 1 method = 'MRF' else: # Closing post processing if sum(tumor_idxs) > 0: pp_pred[tumor_idxs] = dp.post_process(coord[tumor_idxs, :], dim, pred2, remove_components=False, radius=best_radius) method = 'closing' print "\nConfusion matrix (pp):" cm = confusion_matrix(y, pp_pred) print cm yte = np.concatenate((yte, y)) patient_idxs_te.append(len(yte)) predte = np.concatenate((predte, pp_pred)) predte_no_pp = np.concatenate((predte_no_pp, pp_pred15)) dice_scores(y, pp_pred, label='Dice scores (pp):') if do_plot_predictions: # Plot the patient pif = os.path.join('results', 'pat%d_slices_2S_%s.png' % (te_pat, method)) if mat_dir is not None: fmat = os.path.join(mat_dir, 'pat%d.mat' % te_pat) else: fmat = None pp.plot_predictions(coord, dim, pp_pred15, y, pp_pred, fname=pif, fmat=fmat) #if pred_fname is not None: # extras.save_predictions(coord, dim_list[0], pred, yte, pred_fname) print "\nOverall confusion matrix:" cm = confusion_matrix(yte, predte) print cm dice_scores(yte, predte_no_pp, patient_idxs=patient_idxs_te, label='Overall dice scores (two-stage, no pp):', fscores=fscores) dice_scores(yte, predte, patient_idxs=patient_idxs_te, label='Overall dice scores (two-stage):', fscores=fscores)