def report_md_breakdown(metadata): """Prints a breakdown of metadata info to logger Parameters ---------- metadata : array_like List of dicts of the metadata """ n_samples = len(metadata) # Find number of samples in metadata # Find the adipose shell ID of each sample adi_ids = np.array([md['phant_id'].split('F')[0] for md in metadata]) unique_adi_ids = np.unique(adi_ids) # Find the unique adi IDs logger.info('\tAdipose Fractions:') for adi_id in unique_adi_ids: # Find fraction samples with this adipose ID frac_here = np.sum(adi_ids == adi_id) / n_samples logger.info('\t\t%s:\t%.2f%%' % (adi_id, 100 * frac_here)) # Get tumor presence and print to logger tum_presence = get_class_labels(metadata) logger.info('\tTumor Fraction:\t%.2f' % (np.sum(tum_presence) * 100 / n_samples)) # Get BI-RADS class of each sample birads_classes = np.array([md['birads'] for md in metadata]) unique_birads = np.unique(birads_classes) logger.info('\tBI-RADS Fractions:') for birads_class in unique_birads: # Find fraction of samples with this BI-RADS class frac_here = np.sum(birads_classes == birads_class) / n_samples logger.info('\t\tClass %d:\t%.2f' % (birads_class, 100 * frac_here))
# Get the indices of the samples with the target Adi ID tar_idxs = np.array( [this_adi_id in [adi_id] for this_adi_id in adi_ids]) # Use the samples with this adipose ID as the test set, # all others assigned to train set test_data = g2_d[tar_idxs, :, :] train_data = g2_d[~tar_idxs, :, :] test_md = g2_md[tar_idxs] train_md = g2_md[~tar_idxs] # Perform data augmentation on the training set here train_data, train_md = full_aug(train_data, train_md) # Get class labels for train/test sets here test_labels = get_class_labels(test_md) train_labels = get_class_labels(train_md) test_labels = to_categorical(test_labels) train_labels = to_categorical(train_labels) # Resize data for use with keras test_data = resize_features_for_keras(test_data) train_data = resize_features_for_keras(train_data) # Init arrays for storing results for this test set aucs_here = np.zeros([ __N_RUNS, ]) accs_here = np.zeros([ __N_RUNS, ])
g1_d = load_pickle(os.path.join(__DATA_DIR, 'g1-train-test/test_fd.pickle')) g1_md = load_pickle( os.path.join(__DATA_DIR, 'g1-train-test/test_md.pickle')) # Convert data to time domain, take magnitude, apply window g1_d = correct_g1_ini_ant_ang(g1_d) g1_d = np.abs(to_td(g1_d)) g2_d = np.abs(to_td(g2_d)) # Perform data augmentation g2_d, g2_md = full_aug(g2_d, g2_md) g2_d = resize_features_for_keras(g2_d) g1_d = resize_features_for_keras(g1_d) g2_labels = to_categorical(get_class_labels(g2_md)) g1_labels = to_categorical(get_class_labels(g1_md)) n_runs = 20 # Init arrays for storing performance metrics auc_scores = np.zeros([ n_runs, ]) accs = np.zeros([ n_runs, ]) sens = np.zeros([ n_runs, ]) spec = np.zeros([
metadata = load_pickle(os.path.join(__DATA_DIR, 'g1_metadata.pickle')) # Init vars for train/test fractions tr_frac = 0 te_frac = 0 # Until the train tumor fraction is approx 50% while not (0.45 <= tr_frac <= 0.55): # Shuffle the arrays [fd_data, metadata] = shuffle_arrs([fd_data, metadata]) # Split data/metadata into train and test sets tr_data = fd_data[:125, :, :] tr_md = metadata[:125] te_data = fd_data[125:, :, :] te_md = metadata[125:] # Get the train/test class labels to determine tumor fraction cv_labels = get_class_labels(tr_md) test_labels = get_class_labels(te_md) # Get tumor fraction in the training set tr_frac = np.sum(cv_labels) / len(cv_labels) # Save train/test sets as .pickles save_pickle(te_data, os.path.join(__OUT_DIR, 'test_fd.pickle')) save_pickle(te_md, os.path.join(__OUT_DIR, 'test_md.pickle')) save_pickle(tr_data, os.path.join(__OUT_DIR, 'train_fd.pickle')) save_pickle(tr_md, os.path.join(__OUT_DIR, 'train_md.pickle'))
val_data = g1_tr_d val_md = g1_tr_md # Correct the initial antenna position used in G1 scans val_data = correct_g1_ini_ant_ang(val_data) # Preprocess data, take magnitude and apply time-window, augment # training dataset val_data = np.abs(to_td(val_data)) val_data = resize_features_for_keras(val_data) train_data = np.abs(to_td(train_data)) train_data, train_md = full_aug(train_data, train_md) train_data = resize_features_for_keras(train_data) # Get the validation and train set class labels and make categorical val_labels = get_class_labels(val_md) val_labels = to_categorical(val_labels) train_labels = get_class_labels(train_md) train_labels = to_categorical(train_labels) # Create arrays for storing the AUC on the train and validation # sets for this regularization parameter after training with # correct labels train_set_aucs = np.zeros([__N_RUNS, __N_EPOCHS]) val_set_aucs = np.zeros([__N_RUNS, __N_EPOCHS]) train_set_loss = np.zeros([__N_RUNS, __N_EPOCHS]) val_set_loss = np.zeros([__N_RUNS, __N_EPOCHS]) for run_idx in range(__N_RUNS): # For each CV-fold