def main():
    print("Creating data iterator...")
    with open("config.yml", 'r') as ymlfile:
        cfg = yaml.load(ymlfile)
    train_dir = cfg['dataset_paths']['train_data']
    train_labels = cfg['dataset_paths']['train_labels']

    mri_iter = MRIDataIterator(train_dir, train_labels)
    outputdir = './testpreproc/'
    for patient_index, patient_slices in mri_iter.frames.iteritems():
        slices_locations_to_names = {}
        i = 0
        for sax_set in patient_slices:
            slices_locations_to_names[int(dicom.read_file(sax_set[0]).SliceLocation)] = i
            i += 1
        median_array = slices_locations_to_names.keys()
        median_array.sort()
        values_closest_to_middle = []
        if len(median_array) > 1:
            middle_value = (median_array[-1] + median_array[0])/2
            for val in median_array:
                if math.sqrt((val - middle_value)**2) < 25:
                    values_closest_to_middle.append(val)
        else:
             middle_value = median_array[0]
             values_closest_to_middle.append(median_array[0])

        z = 0
        values = []
        for proposed_median_value in values_closest_to_middle:
            median_index = slices_locations_to_names[proposed_median_value]
            sax_set = patient_slices[median_index]
            time_series = []
            for path in sax_set:
                f = dicom.read_file(path)
                gender = f.PatientsSex
                age = convert_age(f.PatientsAge)
                img = mri_iter.preproc(f.pixel_array.astype(np.float32) / np.max(f.pixel_array), 64, f.PixelSpacing, True, False)
                time_series.append(img)
            values.append(time_series)
            z +=1
        data_array = np.array(values)
        rois,circles = calc_rois(data_array)
        i = 0
        import pdb; pdb.set_trace()
        new_set = []
        for sax_set in data_array:
            center_point, radius = circles[i]
            new_time_series = []
            for img in sax_set:
                # make it square
                crop_img = img[center_point[0]-40:center_point[0]+40, center_point[1]-60:center_point[1]+20]
                new_time_series.append(crop_img)
            new_set.append(new_time_series)
                
        new_data_array = np.array(new_set)
        im = Image.fromarray(new_data_array[0][0]).convert('RGB')
        im.save('examples/' + randword() +'.png')
class TestDataUtilities(unittest.TestCase):
    def setUp(self):
        # TODO: this is super inefficient and dumb, rewrite so I don't read in entire space of all sax images
        self.mriIter = MRIDataIterator("/Users/Breakend/Documents/datasets/sciencebowl2015/train", "/Users/Breakend/Documents/datasets/sciencebowl2015/train.csv")

    def test_preprocessing(self):
        """ This is a visual test, sample a random image and preprocess it, view
            the result"""
        dicom_image_path = self.mriIter.frames[randint(1,599)][0][0]
        f = dicom.read_file(dicom_image_path)
        plt.figure(figsize=(10,3.6))
        plt.subplot(131)
        plt.imshow(f.pixel_array)
        img = self.mriIter.preproc(f.pixel_array.astype(np.float32) / np.max(f.pixel_array), 64, f.PixelSpacing)
        plt.subplot(132)
        plt.imshow(img)
        plt.axis('off')
        plt.subplots_adjust(wspace=0, hspace=0., top=0.99, bottom=0.01, left=0.05,
                    right=0.99)
        plt.show()
def main(num_epochs=30):
    # Load the dataset
    print("Creating data iterator...")
    with open("config.yml", 'r') as ymlfile:
        cfg = yaml.load(ymlfile)
    train_dir = cfg['dataset_paths']['train_data']
    train_labels = cfg['dataset_paths']['train_labels']
    batch_size = 5

    mriIter = MRIDataIterator(train_dir, train_labels)

    network, train_fn, val_fn = compose_functions("systole", batch_size) #systole
    network_dia, train_fn_dia, val_fn_dia = compose_functions("diastole", batch_size)

    if os.path.exists('model-sys.npz'):
        with np.load('model-sys.npz') as f:
            param_values = [f['arr_%d' % i] for i in range(len(f.files))]
            lasagne.layers.set_all_param_values(network, param_values)
    if os.path.exists('model-dia.npz'):
        with np.load('model-dia.npz') as f:
            param_values = [f['arr_%d' % i] for i in range(len(f.files))]
            lasagne.layers.set_all_param_values(network_dia, param_values)

    # Finally, launch the training loop.
    print("Starting training...")
    # We iterate over epochs:
    for epoch in range(num_epochs):
        # In each epoch, we do a full pass over the training data:
        train_err_sys = 0
        train_err_dia = 0
        train_batches = 0
        training_index = 1
        validation_index = mriIter.last_training_index + 1

        start_time = time.time()
        while mriIter.has_more_training_data(training_index + batch_size):
            gc.collect()
            print("Training index %s" % training_index)
            inputs, systole, diastole, metadata = mriIter.get_median_bucket_data(training_index, batch_size, return_gender_age=True)
            train_err_sys += train_fn(inputs, systole, metadata)
            train_err_dia += train_fn_dia(inputs, diastole, metadata)
            train_batches += batch_size
            training_index += batch_size

        augmented_training_index = training_index
        while (augmented_training_index < 500):
            gc.collect()
            print("Augmented training index: %s" % augmented_training_index)
            inputs, systole, diastole, metadata= mriIter.get_augmented_data(augmented_training_index, training_index - batch_size, return_gender_age=True)
            train_err_sys += train_fn(inputs, systole, metadata)
            train_err_dia += train_fn_dia(inputs, diastole, metadata)
            augmented_training_index += batch_size

        # And a full pass over the validation data:
        val_err_sys = 0
        val_acc_sys = 0
        val_err_dia = 0
        val_acc_dia = 0
        val_batches = 0
        while mriIter.has_more_data(validation_index):
            gc.collect()
            print("Validation index %s" % validation_index)
            inputs, systole, diastole, metadata= mriIter.get_median_bucket_data(validation_index, batch_size, return_gender_age=True)
            # systole, diastole = targets
            err, prediction = val_fn(inputs, systole, metadata)
            y = 0
            for prob_set in prediction:
                prob_dist = np.cumsum(prob_set)
                v = np.array(range(prediction.shape[1]))
                heavy = (v >= systole[y])
                sq_dists = (prob_dist - heavy)**2
                # print(prediction.shape)
                val_err_sys += err
                val_acc_sys += (sum(sq_dists) / 600.)
                y += 1

            err, prediction = val_fn_dia(inputs, systole, metadata)
            y = 0
            for prob_set in prediction:
                prob_dist = np.cumsum(prob_set)
                v = np.array(range(prediction.shape[1]))
                heavy = (v >= diastole[y])
                sq_dists = (prob_dist - heavy)**2
                val_err_dia += err
                val_acc_dia += (sum(sq_dists) / 600.)
                y += 1
            val_batches += batch_size
            validation_index += batch_size

        # Then we print the results for this epoch:
        print("Epoch {} of {} took {:.3f}s".format(
            epoch + 1, num_epochs, time.time() - start_time))
        # print("  training loss:\t\t{:.6f}".format(train_err / train_batches))
        # print("  validation loss:\t\t{:.6f}".format(val_err / val_batches))
        print("Validation Sum Sqrts Systolic: {}".format(val_acc_sys))
        print("Validation Sum Sqrts Diastolic: {}".format(val_acc_dia))
        print("Train Err Systole: {}".format(train_err_sys))
        print("Train Err Diastole: {}".format(train_err_dia))
        print("CRPS:\t\t{:.6f} %".format(
            (val_acc_sys + val_acc_dia) / (val_batches) * .5))


        # Optionally, you could now dump the network weights to a file like this:
        np.savez('model-sys.npz', *lasagne.layers.get_all_param_values(network))
        np.savez('model-dia.npz', *lasagne.layers.get_all_param_values(network_dia))
 def setUp(self):
     # TODO: this is super inefficient and dumb, rewrite so I don't read in entire space of all sax images
     self.mriIter = MRIDataIterator("/Users/Breakend/Documents/datasets/sciencebowl2015/train", "/Users/Breakend/Documents/datasets/sciencebowl2015/train.csv")
Example #5
0
def compose_prediction_functions(scope):
    input_var = T.tensor4(scope + 'inputs')
    metadata_var = T.matrix(scope + 'metadatainputs')
    network = build_cnn(input_var, 20, metadata_var)

    prediction = lasagne.layers.get_output(network)
    prediction_fn = theano.function([input_var, metadata_var], prediction)
    return network, prediction_fn


with open("config.yml", 'r') as ymlfile:
    cfg = yaml.load(ymlfile)
    validation_dir = cfg['dataset_paths']['validation_data']
    sample_submission_path = cfg['dataset_paths']['sample_submission']

mriIter = MRIDataIterator(validation_dir)
systolic_network, systolic_prediction_fn = compose_prediction_functions('sys')
diastolic_network, diastolic_prediction_fn = compose_prediction_functions(
    'dia')

if os.path.exists('model-sys.npz'):
    with np.load('model-sys.npz') as f:
        param_values = [f['arr_%d' % i] for i in range(len(f.files))]
        lasagne.layers.set_all_param_values(systolic_network, param_values)

if os.path.exists('model-dia.npz'):
    with np.load('model-dia.npz') as f:
        param_values = [f['arr_%d' % i] for i in range(len(f.files))]
        lasagne.layers.set_all_param_values(diastolic_network, param_values)

#TODO: Abstract data retrieval so it applies to validation (i.e. get bounds from number of folders in dataset path)
def main():
    print("Creating data iterator...")
    with open("config.yml", 'r') as ymlfile:
        cfg = yaml.load(ymlfile)
    train_dir = cfg['dataset_paths']['train_data']
    train_labels = cfg['dataset_paths']['train_labels']

    mri_iter = MRIDataIterator(train_dir, train_labels)
    outputdir = './testpreproc/'
    for patient_index, patient_slices in mri_iter.frames.iteritems():
        slices_locations_to_names = {}
        i = 0
        for sax_set in patient_slices:
            slices_locations_to_names[int(
                dicom.read_file(sax_set[0]).SliceLocation)] = i
            i += 1
        median_array = slices_locations_to_names.keys()
        median_array.sort()
        values_closest_to_middle = []
        if len(median_array) > 1:
            middle_value = (median_array[-1] + median_array[0]) / 2
            for val in median_array:
                if math.sqrt((val - middle_value)**2) < 25:
                    values_closest_to_middle.append(val)
        else:
            middle_value = median_array[0]
            values_closest_to_middle.append(median_array[0])

        z = 0
        values = []
        for proposed_median_value in values_closest_to_middle:
            median_index = slices_locations_to_names[proposed_median_value]
            sax_set = patient_slices[median_index]
            time_series = []
            for path in sax_set:
                f = dicom.read_file(path)
                gender = f.PatientsSex
                age = convert_age(f.PatientsAge)
                img = mri_iter.preproc(
                    f.pixel_array.astype(np.float32) / np.max(f.pixel_array),
                    64, f.PixelSpacing, True, False)
                time_series.append(img)
            values.append(time_series)
            z += 1
        data_array = np.array(values)
        rois, circles = calc_rois(data_array)
        i = 0
        import pdb
        pdb.set_trace()
        new_set = []
        for sax_set in data_array:
            center_point, radius = circles[i]
            new_time_series = []
            for img in sax_set:
                # make it square
                crop_img = img[center_point[0] - 40:center_point[0] + 40,
                               center_point[1] - 60:center_point[1] + 20]
                new_time_series.append(crop_img)
            new_set.append(new_time_series)

        new_data_array = np.array(new_set)
        im = Image.fromarray(new_data_array[0][0]).convert('RGB')
        im.save('examples/' + randword() + '.png')
def compose_prediction_functions(scope):
    input_var = T.tensor4(scope + 'inputs')
    metadata_var = T.matrix(scope + 'metadatainputs')
    network = build_cnn(input_var, 20, metadata_var)

    prediction = lasagne.layers.get_output(network)
    prediction_fn = theano.function([input_var, metadata_var], prediction)
    return network, prediction_fn

with open("config.yml", 'r') as ymlfile:
    cfg = yaml.load(ymlfile)
    validation_dir = cfg['dataset_paths']['validation_data']
    sample_submission_path = cfg['dataset_paths']['sample_submission']

mriIter = MRIDataIterator(validation_dir)
systolic_network, systolic_prediction_fn = compose_prediction_functions('sys')
diastolic_network, diastolic_prediction_fn = compose_prediction_functions('dia')

if os.path.exists('model-sys.npz'):
    with np.load('model-sys.npz') as f:
        param_values = [f['arr_%d' % i] for i in range(len(f.files))]
        lasagne.layers.set_all_param_values(systolic_network, param_values)

if os.path.exists('model-dia.npz'):
    with np.load('model-dia.npz') as f:
        param_values = [f['arr_%d' % i] for i in range(len(f.files))]
        lasagne.layers.set_all_param_values(diastolic_network, param_values)

#TODO: Abstract data retrieval so it applies to validation (i.e. get bounds from number of folders in dataset path)
index = 501
def main(num_epochs=30):
    # Load the dataset
    print("Creating data iterator...")
    with open("config.yml", 'r') as ymlfile:
        cfg = yaml.load(ymlfile)
    train_dir = cfg['dataset_paths']['train_data']
    train_labels = cfg['dataset_paths']['train_labels']
    batch_size = 5

    mriIter = MRIDataIterator(train_dir, train_labels)

    network, train_fn, val_fn = compose_functions("systole",
                                                  batch_size)  #systole
    network_dia, train_fn_dia, val_fn_dia = compose_functions(
        "diastole", batch_size)

    if os.path.exists('model-sys.npz'):
        with np.load('model-sys.npz') as f:
            param_values = [f['arr_%d' % i] for i in range(len(f.files))]
            lasagne.layers.set_all_param_values(network, param_values)
    if os.path.exists('model-dia.npz'):
        with np.load('model-dia.npz') as f:
            param_values = [f['arr_%d' % i] for i in range(len(f.files))]
            lasagne.layers.set_all_param_values(network_dia, param_values)

    # Finally, launch the training loop.
    print("Starting training...")
    # We iterate over epochs:
    for epoch in range(num_epochs):
        # In each epoch, we do a full pass over the training data:
        train_err_sys = 0
        train_err_dia = 0
        train_batches = 0
        training_index = 1
        validation_index = mriIter.last_training_index + 1

        start_time = time.time()
        while mriIter.has_more_training_data(training_index + batch_size):
            gc.collect()
            print("Training index %s" % training_index)
            inputs, systole, diastole, metadata = mriIter.get_median_bucket_data(
                training_index, batch_size, return_gender_age=True)
            train_err_sys += train_fn(inputs, systole, metadata)
            train_err_dia += train_fn_dia(inputs, diastole, metadata)
            train_batches += batch_size
            training_index += batch_size

        augmented_training_index = training_index
        while (augmented_training_index < 500):
            gc.collect()
            print("Augmented training index: %s" % augmented_training_index)
            inputs, systole, diastole, metadata = mriIter.get_augmented_data(
                augmented_training_index,
                training_index - batch_size,
                return_gender_age=True)
            train_err_sys += train_fn(inputs, systole, metadata)
            train_err_dia += train_fn_dia(inputs, diastole, metadata)
            augmented_training_index += batch_size

        # And a full pass over the validation data:
        val_err_sys = 0
        val_acc_sys = 0
        val_err_dia = 0
        val_acc_dia = 0
        val_batches = 0
        while mriIter.has_more_data(validation_index):
            gc.collect()
            print("Validation index %s" % validation_index)
            inputs, systole, diastole, metadata = mriIter.get_median_bucket_data(
                validation_index, batch_size, return_gender_age=True)
            # systole, diastole = targets
            err, prediction = val_fn(inputs, systole, metadata)
            y = 0
            for prob_set in prediction:
                prob_dist = np.cumsum(prob_set)
                v = np.array(range(prediction.shape[1]))
                heavy = (v >= systole[y])
                sq_dists = (prob_dist - heavy)**2
                # print(prediction.shape)
                val_err_sys += err
                val_acc_sys += (sum(sq_dists) / 600.)
                y += 1

            err, prediction = val_fn_dia(inputs, systole, metadata)
            y = 0
            for prob_set in prediction:
                prob_dist = np.cumsum(prob_set)
                v = np.array(range(prediction.shape[1]))
                heavy = (v >= diastole[y])
                sq_dists = (prob_dist - heavy)**2
                val_err_dia += err
                val_acc_dia += (sum(sq_dists) / 600.)
                y += 1
            val_batches += batch_size
            validation_index += batch_size

        # Then we print the results for this epoch:
        print("Epoch {} of {} took {:.3f}s".format(epoch + 1, num_epochs,
                                                   time.time() - start_time))
        # print("  training loss:\t\t{:.6f}".format(train_err / train_batches))
        # print("  validation loss:\t\t{:.6f}".format(val_err / val_batches))
        print("Validation Sum Sqrts Systolic: {}".format(val_acc_sys))
        print("Validation Sum Sqrts Diastolic: {}".format(val_acc_dia))
        print("Train Err Systole: {}".format(train_err_sys))
        print("Train Err Diastole: {}".format(train_err_dia))
        print("CRPS:\t\t{:.6f} %".format(
            (val_acc_sys + val_acc_dia) / (val_batches) * .5))

        # Optionally, you could now dump the network weights to a file like this:
        np.savez('model-sys.npz',
                 *lasagne.layers.get_all_param_values(network))
        np.savez('model-dia.npz',
                 *lasagne.layers.get_all_param_values(network_dia))