Exemple #1
0
def report_md_breakdown(metadata):
    """Prints a breakdown of metadata info to logger

    Parameters
    ----------
    metadata : array_like
        List of dicts of the metadata
    """

    n_samples = len(metadata)  # Find number of samples in metadata

    # Find the adipose shell ID of each sample
    adi_ids = np.array([md['phant_id'].split('F')[0] for md in metadata])
    unique_adi_ids = np.unique(adi_ids)  # Find the unique adi IDs

    logger.info('\tAdipose Fractions:')
    for adi_id in unique_adi_ids:

        # Find fraction samples with this adipose ID
        frac_here = np.sum(adi_ids == adi_id) / n_samples
        logger.info('\t\t%s:\t%.2f%%' % (adi_id, 100 * frac_here))

    # Get tumor presence and print  to logger
    tum_presence = get_class_labels(metadata)
    logger.info('\tTumor Fraction:\t%.2f'
                % (np.sum(tum_presence) * 100 / n_samples))

    # Get BI-RADS class of each sample
    birads_classes = np.array([md['birads'] for md in metadata])
    unique_birads = np.unique(birads_classes)

    logger.info('\tBI-RADS Fractions:')
    for birads_class in unique_birads:

        # Find fraction of samples with this BI-RADS class
        frac_here = np.sum(birads_classes == birads_class) / n_samples
        logger.info('\t\tClass %d:\t%.2f' % (birads_class, 100 * frac_here))
        # Get the indices of the samples with the target Adi ID
        tar_idxs = np.array(
            [this_adi_id in [adi_id] for this_adi_id in adi_ids])

        # Use the samples with this adipose ID as the test set,
        # all others assigned to train set
        test_data = g2_d[tar_idxs, :, :]
        train_data = g2_d[~tar_idxs, :, :]
        test_md = g2_md[tar_idxs]
        train_md = g2_md[~tar_idxs]

        # Perform data augmentation on the training set here
        train_data, train_md = full_aug(train_data, train_md)

        # Get class labels for train/test sets here
        test_labels = get_class_labels(test_md)
        train_labels = get_class_labels(train_md)
        test_labels = to_categorical(test_labels)
        train_labels = to_categorical(train_labels)

        # Resize data for use with keras
        test_data = resize_features_for_keras(test_data)
        train_data = resize_features_for_keras(train_data)

        # Init arrays for storing results for this test set
        aucs_here = np.zeros([
            __N_RUNS,
        ])
        accs_here = np.zeros([
            __N_RUNS,
        ])
    g1_d = load_pickle(os.path.join(__DATA_DIR,
                                    'g1-train-test/test_fd.pickle'))
    g1_md = load_pickle(
        os.path.join(__DATA_DIR, 'g1-train-test/test_md.pickle'))

    # Convert data to time domain, take magnitude, apply window
    g1_d = correct_g1_ini_ant_ang(g1_d)
    g1_d = np.abs(to_td(g1_d))
    g2_d = np.abs(to_td(g2_d))

    # Perform data augmentation
    g2_d, g2_md = full_aug(g2_d, g2_md)

    g2_d = resize_features_for_keras(g2_d)
    g1_d = resize_features_for_keras(g1_d)
    g2_labels = to_categorical(get_class_labels(g2_md))
    g1_labels = to_categorical(get_class_labels(g1_md))

    n_runs = 20

    # Init arrays for storing performance metrics
    auc_scores = np.zeros([
        n_runs,
    ])
    accs = np.zeros([
        n_runs,
    ])
    sens = np.zeros([
        n_runs,
    ])
    spec = np.zeros([
Exemple #4
0
    metadata = load_pickle(os.path.join(__DATA_DIR, 'g1_metadata.pickle'))

    # Init vars for train/test fractions
    tr_frac = 0
    te_frac = 0

    # Until the train tumor fraction is approx 50%
    while not (0.45 <= tr_frac <= 0.55):

        # Shuffle the arrays
        [fd_data, metadata] = shuffle_arrs([fd_data, metadata])

        # Split data/metadata into train and test sets
        tr_data = fd_data[:125, :, :]
        tr_md = metadata[:125]
        te_data = fd_data[125:, :, :]
        te_md = metadata[125:]

        # Get the train/test class labels to determine tumor fraction
        cv_labels = get_class_labels(tr_md)
        test_labels = get_class_labels(te_md)

        # Get tumor fraction in the training set
        tr_frac = np.sum(cv_labels) / len(cv_labels)

    # Save train/test sets as .pickles
    save_pickle(te_data, os.path.join(__OUT_DIR, 'test_fd.pickle'))
    save_pickle(te_md, os.path.join(__OUT_DIR, 'test_md.pickle'))
    save_pickle(tr_data, os.path.join(__OUT_DIR, 'train_fd.pickle'))
    save_pickle(tr_md, os.path.join(__OUT_DIR, 'train_md.pickle'))
Exemple #5
0
    val_data = g1_tr_d
    val_md = g1_tr_md

    # Correct the initial antenna position used in G1 scans
    val_data = correct_g1_ini_ant_ang(val_data)

    # Preprocess data, take magnitude and apply time-window, augment
    # training dataset
    val_data = np.abs(to_td(val_data))
    val_data = resize_features_for_keras(val_data)
    train_data = np.abs(to_td(train_data))
    train_data, train_md = full_aug(train_data, train_md)
    train_data = resize_features_for_keras(train_data)

    # Get the validation and train set class labels and make categorical
    val_labels = get_class_labels(val_md)
    val_labels = to_categorical(val_labels)
    train_labels = get_class_labels(train_md)
    train_labels = to_categorical(train_labels)

    # Create arrays for storing the AUC on the train and validation
    # sets for this regularization parameter after training with
    # correct labels
    train_set_aucs = np.zeros([__N_RUNS, __N_EPOCHS])
    val_set_aucs = np.zeros([__N_RUNS, __N_EPOCHS])

    train_set_loss = np.zeros([__N_RUNS, __N_EPOCHS])
    val_set_loss = np.zeros([__N_RUNS, __N_EPOCHS])

    for run_idx in range(__N_RUNS):  # For each CV-fold