Exemple #1
0
def evaluate(object_folder):
    test_images_list = os.path.join(object_folder, 'test_images_list.csv')
    test_labels_list = os.path.join(object_folder, 'test_labels_list.csv')

    test_image_filenames = KScsv.read_csv(test_images_list)
    test_label_filenames = KScsv.read_csv(test_labels_list)

    all_prediction = list()
    all_label = list()
    f1score_per_image = list()
    all_score = list()
    for i_image, (image_file, label_file) in enumerate(
            zip(test_image_filenames, test_label_filenames)):

        tick = time.time()

        basename = os.path.basename(image_file[0])
        basename = os.path.splitext(basename)[0]
        image_file = os.path.join(object_folder, 'result', basename + '.mat')

        # Read in result and label
        mat_content = matlab.load(image_file)
        score = mat_content['mask']
        prediction = score > 0.5
        prediction = prediction.astype('float')

        label = KSimage.imread(label_file[0])
        label = label.astype('float')
        label = label / 255.0
        label = label > 0.5
        label = label.astype('float')

        prediction = np.reshape(prediction, -1)
        label = np.reshape(label, -1)
        score = np.reshape(score, -1)

        all_prediction.append(prediction)
        all_label.append(label)
        all_score.append(score)

        f1score = metrics.f1_score(label, prediction, average='binary')
        f1score_per_image.append(f1score)

        duration = time.time() - tick
        print('evaluate %d / %d (%.2f sec)' %
              (i_image + 1, len(test_image_filenames), duration))

    all_label = np.reshape(np.array(all_label), -1)
    all_prediction = np.reshape(np.array(all_prediction), -1)
    all_score = np.reshape(np.array(all_score), -1)

    total_f1score = metrics.f1_score(all_label,
                                     all_prediction,
                                     average='binary')
    avg_f1score = np.mean(f1score_per_image)
    average_precision = metrics.average_precision_score(all_label,
                                                        all_score,
                                                        average='micro')

    return total_f1score, avg_f1score, average_precision, f1score_per_image
def inputs2(object_folder, mode, flags, mat_contents):
    if mode == 'train':
        log_file_path = os.path.join(object_folder, 'train', 'train_log.csv')
    else:
        log_file_path = os.path.join(object_folder, 'val', 'val_log.csv')

    log_list = KScsv.read_csv(log_file_path)

    #key_list = ['HE', 'DAPI', 'weight']
    key_list = list(
        flags['dict_path'].keys())[:-1]  # all items except last item: 'group'

    allimageslist = []
    for ind, key in enumerate(key_list):
        image_dict = collections.defaultdict(list)
        #label_dict = collections.defaultdict(list)
        #weight_dict = collections.defaultdict(list)

        for row in log_list:
            image_dict[key].append(row[ind])
            #label_dict['label'].append(row[1])
            #weight_dict['weight'].append(row[2])

        min_queue_examples = int(
            len(image_dict[key[0]]) *
            flags['min_fraction_of_examples_in_queue'])
        print('Filling queue with %d images before starting to train. '
              'This will take a few minutes.' % min_queue_examples)

        # Create a queue that produces the filenames to read.
        combine_image_dict = list()
        #combine_label_dict = list()
        #combine_weight_dict = list()
        allimageslist.append(image_dict[key])

        filename_queue = tf.train.slice_input_producer(allimageslist,
                                                       shuffle=True)
        queue_dict = read_data(filename_queue, flags)
        image, label, weight = process_image_and_label(image, label, weight,
                                                       mean_image,
                                                       variance_image, flags)

        # Generate a batch of images and labels by building up a queue of examples.
        image, label, weight = generate_batch(image,
                                              label,
                                              weight,
                                              min_queue_examples,
                                              int(flags['batch_size']),
                                              shuffle=False,
                                              flags=flags)
        combine_image_dict.append(image)
        combine_label_dict.append(label)
        combine_weight_dict.append(weight)

        out_image = tf.concat(combine_image_dict, 0)
        out_label = tf.concat(combine_label_dict, 0)
        out_weight = tf.concat(combine_weight_dict, 0)

    return {'images': out_image, 'labels': out_label, 'weights': out_weight}
Exemple #3
0
def main(argv):
    he_log_file = argv[0]
    he_dcis_segmentation_result_path = argv[1]
    igpu = argv[2]

    row_list = KScsv.read_csv(he_log_file)
    main_he_dcis_segmentation.main(1, 'test_model', flags_he_dcis_segmentation,
                                                  row_list, he_dcis_segmentation_result_path, igpu)
def main(argv):
    file_list = argv[0]
    result_path = argv[1]
    he_dcis_segmentation_result_path = argv[2]
    igpu = argv[3]

    row_list = KScsv.read_csv(file_list)
    main_probe_detection.main(1, 'test_model', flags_probe_detection_green,
                              row_list, result_path,
                              he_dcis_segmentation_result_path, igpu)
Exemple #5
0
def main(argv):
    file_list = argv[0]
    he_cell_segmentation_result_path = argv[1]
    he_dcis_segmentation_result_path = argv[2]
    igpu = argv[3]

    row_list = KScsv.read_csv(file_list)
    main_he_cell_segmentation.main(1, 'test_model', flags_he_cell_segmentation,
                                   row_list, he_cell_segmentation_result_path,
                                   he_dcis_segmentation_result_path, igpu)
def inputs(mean_image, variance_image, object_folder, mode, flags):
    if mode == 'train':
        log_file_path = os.path.join(object_folder, 'train', 'train_log.csv')
    else:
        log_file_path = os.path.join(object_folder, 'val', 'val_log.csv')

    log_list = KScsv.read_csv(log_file_path)

    image_dict = collections.defaultdict(list)
    label_dict = collections.defaultdict(list)

    for row in log_list:
        for i_class in range(flags['n_classes']):
            if int(row[2]) == i_class:
                image_dict[i_class].append(row[0])
                label_dict[i_class].append(int(row[2]))
    min_queue_examples = int(
        np.sum([len(image_dict[k]) for k in image_dict.keys()]) *
        flags['min_fraction_of_examples_in_queue'])

    print('Filling queue with %d images before starting to train. '
          'This will take a few minutes.' % min_queue_examples)

    # Create a queue that produces the filenames to read.
    combine_image_dict = list()
    combine_label_dict = list()
    for i_class in range(flags['n_classes']):
        filename_queue = tf.train.slice_input_producer(
            [image_dict[i_class], label_dict[i_class]], shuffle=True)
        image, label = read_data(filename_queue, flags)
        image = tf.cast(image, tf.float32)
        label = tf.cast(label, tf.float32)
        image, label = process_image_and_label(image, label, mean_image,
                                               variance_image, flags)

        # Generate a batch of images and labels by building up a queue of examples.
        image, label = generate_batch(image,
                                      label,
                                      min_queue_examples,
                                      int(flags['batch_size'] /
                                          flags['n_classes']),
                                      shuffle=False,
                                      flags=flags)
        combine_image_dict.append(image)
        combine_label_dict.append(label)

    out_image = tf.concat(0, combine_image_dict)
    out_label = tf.concat(0, combine_label_dict)

    return {'images': out_image, 'labels': out_label}
Exemple #7
0
def select_train_val_instances(nth_fold, method, flags):
    """
    select_train_val_instances is used to balance the class instances found in the training and validation sets

    param: nth_fold
    param: method
    return: void
    """
    # check if log files exist
    list_dir = os.listdir(os.path.join(flags['experiment_folder']))
    if ('cv' + str(nth_fold) in list_dir) and ('perm' + str(nth_fold)
                                               in list_dir):
        raise ValueError('Dangerous! You have both cv and perm on the path.')
    elif 'cv' + str(nth_fold) in list_dir:
        object_folder = os.path.join(flags['experiment_folder'],
                                     'cv' + str(nth_fold))
    elif 'perm' + str(nth_fold) in list_dir:
        object_folder = os.path.join(flags['experiment_folder'],
                                     'perm' + str(nth_fold))
    else:
        raise ValueError('No cv or perm folder!')

    train_log_file_path = os.path.join(object_folder, 'train', 'train_log.csv')
    val_log_file_path = os.path.join(object_folder, 'val', 'val_log.csv')

    if not os.path.isfile(train_log_file_path):
        raise ValueError('no ' + train_log_file_path)
    if not os.path.isfile(val_log_file_path):
        raise ValueError('no ' + val_log_file_path)

    # read csv
    train_log = KScsv.read_csv(train_log_file_path)
    val_log = KScsv.read_csv(val_log_file_path)

    # count the number
    if method == 'by_numbers':
        train_log = select_instances.by_numbers(train_log)
        val_log = select_instances.by_numbers(val_log)

        KScsv.write_csv(train_log, train_log_file_path)
        KScsv.write_csv(val_log, val_log_file_path)
    else:
        raise ValueError('no method ' + method + ' exists!')
Exemple #8
0
def main(nth_fold, mode, flags, testdir):
    """
    main trains, tests, or executes the model on the provided
    data based on the specified preferences

    param: nth_fold
    param: mode
    param: experiment_folder
    param: image_ext
    param: test_model
    param: test_image_list
    return: saves segmentation results to appropriate file/directory
    """

    # check if cv or perm
    list_dir = os.listdir(os.path.join(flags['experiment_folder']))
    if ('cv' + str(nth_fold) in list_dir) and ('perm' + str(nth_fold)
                                               in list_dir):
        raise ValueError('Dangerous! You have both cv and perm on the path.')
    elif 'cv' + str(nth_fold) in list_dir:
        object_folder = os.path.join(flags['experiment_folder'],
                                     'cv' + str(nth_fold))
    elif 'perm' + str(nth_fold) in list_dir:
        object_folder = os.path.join(flags['experiment_folder'],
                                     'perm' + str(nth_fold))
    else:
        raise ValueError('No cv or perm folder!')

    # Train model
    if mode == 'train':
        checkpoint_folder = os.path.join(object_folder, 'checkpoint')
        network_stats_file_path = os.path.join(checkpoint_folder,
                                               'network_stats.mat')

        train_images_folder = os.path.join(object_folder, 'train', 'image')

        if not os.path.isfile(network_stats_file_path):
            list_images = glob.glob(
                os.path.join(train_images_folder, '*' + flags['image_ext']))
            print('calculating mean and variance image')
            mean_image, variance_image = utils.calculate_mean_variance_image(
                list_images)
            routine.create_dir(checkpoint_folder)
            matlab.save(network_stats_file_path, {
                'mean_image': mean_image,
                'variance_image': variance_image
            })

        tf_model_train.train(object_folder, flags)

    # Test model on validation set
    elif mode == 'test_model':
        checkpointlist = glob.glob(
            os.path.join(object_folder, 'checkpoint', 'model*meta'))
        checkpointlist = [
            file for file in checkpointlist if 'pretrain' not in file
        ]
        temp = []
        for filepath in checkpointlist:
            basename = os.path.basename(filepath)
            temp.append(int(float(basename.split('-')[-1].split('.')[0])))
        temp = np.sort(temp)

        model_path = os.path.join(
            object_folder, 'checkpoint',
            'model.ckpt-' + str(temp[flags['test_model']]))
        print('use epoch %d : model %s' % (flags['test_model'], 'model.ckpt-' +
                                           str(temp[flags['test_model']])))
        test_images_list = flags['test_image_list']
        filename_list = KScsv.read_csv(test_images_list)
        tf_model_test.test(object_folder, model_path, filename_list, flags)

    #Segment WSIs
    elif mode == 'test_WSI':
        checkpointlist = glob.glob(
            os.path.join(object_folder, 'checkpoint', 'model*meta'))
        checkpointlist = [
            file for file in checkpointlist if 'pretrain' not in file
        ]
        temp = []
        for filepath in checkpointlist:
            basename = os.path.basename(filepath)
            temp.append(int(float(basename.split('-')[-1].split('.')[0])))
        temp = np.sort(temp)

        model_path = os.path.join(
            object_folder, 'checkpoint',
            'model.ckpt-' + str(temp[flags['test_model']]))
        print('use epoch %d : model %s' % (flags['test_model'], 'model.ckpt-' +
                                           str(temp[flags['test_model']])))

        #should iterate over all subdirectories
        paths = get_immediate_subdirectories(testdir)
        list.sort(paths)  #sort WSIs into ascending numerical order
        #paths = paths[100:] #TODO: Enable based on which batch this code is running
        print("TEST DIR: " + str(testdir))

        for path in paths:
            print(os.path.join(testdir, path))
            if not os.path.isdir(
                    os.path.join(testdir, path + 'epiStromalSeg')
            ):  #prevents this from being executed with exsiting directories
                tf_model_test.testWSI(object_folder, model_path,
                                      os.path.join(testdir, path), flags)

                #TODO: uncomment to process only controls
                #imageCSV = open(os.path.join('/data', 'avellal14', 'WSI_patches', 'BBD_NCC_Covariate_Outcome_KK_JH_modifiedWithPaths.csv'),'rb')
#reader = csv.reader(imageCSV)
#csvList = list(reader)
#patientId = path[:path.index('_')]
#caseControlList =  next(subl for subl in csvList if patientId in subl)
#TODO: uncomment to process only cases
# if(caseControlList[1] == '1'): #only test the WSI if the image is indeed a case(1)
#        tf_model_test.testWSI(object_folder, model_path, os.path.join(testdir,path), flags)

    #Segment WSIs at patient level using data from CSV
    elif mode == 'test_Case_Control':
        checkpointlist = glob.glob(
            os.path.join(object_folder, 'checkpoint', 'model*meta'))
        checkpointlist = [
            file for file in checkpointlist if 'pretrain' not in file
        ]
        temp = []
        for filepath in checkpointlist:
            basename = os.path.basename(filepath)
            temp.append(int(float(basename.split('-')[-1].split('.')[0])))
        temp = np.sort(temp)

        model_path = os.path.join(
            object_folder, 'checkpoint',
            'model.ckpt-' + str(temp[flags['test_model']]))
        print('use epoch %d : model %s' % (flags['test_model'], 'model.ckpt-' +
                                           str(temp[flags['test_model']])))

        with open(
                os.path.join('/home', 'avellal14', 'data', 'Adithya_BBD_NHS',
                             'NHS_BBD_CODE',
                             'casesAndMatchedControls224.csv')) as csvFile:
            csvReader = csv.DictReader(csvFile)
            for row in csvReader:
                if (row['path'] == 'BBD_NCC_extractedat20x'
                        or row['path'] == 'BBD_NCC_extractedat20x_round2'):
                    testdir = os.path.join('/home', 'avellal14', 'data',
                                           'Adithya_BBD_NHS', row['path'])
                    paths = get_subdirectories_by_patient(testdir, row['id'])

                    for path in paths:
                        print('CURRENT WSI BEING SEGMENTED',
                              os.path.join(testdir, path))
                        if not os.path.isdir(
                                os.path.join(testdir, path + '_cellSeg')
                        ):  #prevents this from being executed with exsiting directories
                            tf_model_test.testWSI(object_folder, model_path,
                                                  os.path.join(testdir, path),
                                                  flags)
Exemple #9
0
def gen_train_val_data(nth_fold, flags):
    """
    gen_train_val_data generates training and validation data for training the network. It builds
    directories for train and test and extract patches according to the provided 'method', and it
    maintains a log file containing the contents of all the data splits

    param: nth_fold
    param method: sliding_window
    return: void
    """

    ########## check whether 'cv' or 'perm' exists and which one to use ##########
    list_dir = os.listdir(os.path.join(flags['experiment_folder']))
    if ('cv' + str(nth_fold) in list_dir) and ('perm' + str(nth_fold)
                                               in list_dir):
        raise ValueError('Dangerous! You have both cv and perm on the path.')
    elif 'cv' + str(nth_fold) in list_dir:
        object_folder = os.path.join(flags['experiment_folder'],
                                     'cv' + str(nth_fold))
    elif 'perm' + str(nth_fold) in list_dir:
        object_folder = os.path.join(flags['experiment_folder'],
                                     'perm' + str(nth_fold))
    else:
        raise ValueError('No cv or perm folder!')

    ########## create train and val paths ##########
    path_dict = dict()
    path_dict['train_folder'] = os.path.join(object_folder, 'train')
    path_dict['val_folder'] = os.path.join(object_folder, 'val')
    create_dir(path_dict['train_folder'])
    create_dir(path_dict['val_folder'])

    print("Gets to the beginning of an if statement")
    ########## extract patches and put in a designated directory ##########
    if flags['gen_train_val_method'] == 'sliding_window':

        key_list = ['image', 'groundtruth', 'weight']

        for key in key_list:
            path_dict['train_' + key + '_folder'] = os.path.join(
                path_dict['train_folder'], key)
            create_dir(path_dict['train_' + key + '_folder'])
            path_dict['val_' + key + '_folder'] = os.path.join(
                path_dict['val_folder'], key)
            create_dir(path_dict['val_' + key + '_folder'])

        list_dict = dict()
        for key in key_list:
            list_dict['train_' + key + '_list'] = KScsv.read_csv(
                os.path.join(object_folder, 'train_' + key + '_list.csv'))
            list_dict['val_' + key + '_list'] = KScsv.read_csv(
                os.path.join(object_folder, 'val_' + key + '_list.csv'))

        ########## train ##########
        for key in ['train', 'val']:
            if not os.path.isfile(
                    os.path.join(path_dict[key + '_folder'],
                                 key + '_log.csv')):
                log_data = list()

                for i_image in range(len(list_dict[key + '_image_list'])):

                    tic = time.time()

                    path_image = list_dict[key + '_image_list'][i_image][0]
                    path_groundtruth = list_dict[
                        key + '_groundtruth_list'][i_image][0]
                    path_weight = list_dict[key + '_weight_list'][i_image][0]

                    #Resize image, groundtruth, and weight from 10x input size to 2.5x (level at which network operates)
                    image = KSimage.imread(path_image)
                    image = KSimage.imresize(image, 0.25)

                    groundtruth = KSimage.imread(path_groundtruth)
                    groundtruth = KSimage.imresize(groundtruth, 0.25)

                    weight = KSimage.imread(path_weight)
                    weight = KSimage.imresize(weight, 0.25)

                    #make sure that groundtruth images have depth = 1
                    if (len(groundtruth.shape) > 2
                            and groundtruth.shape[2] > 1):
                        groundtruth = groundtruth[:, :, 1]

                    groundtruth[
                        groundtruth ==
                        3] = 2  #remove all intra-stromal epithelium labels and set them simply to stroma
                    groundtruth[
                        groundtruth ==
                        4] = 3  #fat label was originally 4 but is now changed to 3

                    dict_obj = {
                        'image': image,
                        'groundtruth': groundtruth,
                        'weight': weight
                    }

                    extractor = extract_patches.sliding_window(
                        dict_obj, flags['size_input_patch'],
                        flags['size_output_patch'], flags['stride'])

                    for j, (out_obj_dict, coord_dict) in enumerate(extractor):
                        images = out_obj_dict['image']
                        groundtruths = out_obj_dict['groundtruth']
                        weights = out_obj_dict['weight']
                        coord_images = coord_dict['image']

                        #############################################################

                        basename = os.path.basename(path_image)
                        basename = os.path.splitext(basename)[0]

                        image_name = os.path.join(
                            path_dict[key + '_image_folder'], basename +
                            '_idx' + str(j) + '_row' + str(coord_images[0]) +
                            '_col' + str(coord_images[1]) + flags['image_ext'])
                        label_name = os.path.join(
                            path_dict[key + '_groundtruth_folder'],
                            basename + '_idx' + str(j) + '_row' +
                            str(coord_images[0]) + '_col' +
                            str(coord_images[1]) + flags['groundtruth_ext'])
                        weight_name = os.path.join(
                            path_dict[key + '_weight_folder'],
                            basename + '_idx' + str(j) + '_row' +
                            str(coord_images[0]) + '_col' +
                            str(coord_images[1]) + flags['weight_ext'])

                        if not os.path.isfile(image_name):
                            KSimage.imwrite(images, image_name)

                        if not os.path.isfile(label_name):
                            KSimage.imwrite(groundtruths, label_name)

                        if not os.path.isfile(weight_name):
                            KSimage.imwrite(weights, weight_name)

                        log_data.append((image_name, label_name, weight_name))

                    print('finish processing %d image from %d images : %.2f' %
                          (i_image + 1, len(list_dict[key + '_image_list']),
                           time.time() - tic))

                KScsv.write_csv(
                    log_data,
                    os.path.join(path_dict[key + '_folder'], key + '_log.csv'))

    ####################################################################################################################
    else:
        print(
            "ONLY SLIDING WINDOW TRAINING IS SUPPORTED!!!! Training terminated."
        )
        return
Exemple #10
0
def split_perm(obj_list, flags):
    """
    split_perm splits data using permutation with stratification based on group label

    param: images_list
    param: labels_list
    param: groups_list
    param: num
    param: test_percentage
    param: val_percentage
    return: void
    """
    num = flags['num_split']
    test_percentage = flags['test_percentage']
    val_percentage = flags['val_percentage']

    groups_label = list()
    for file in obj_list['group']:
        row = KScsv.read_csv(file)
        groups_label.append(row[0][0])
    groups_label = np.array(groups_label)

    for key in obj_list.keys():
        obj_list[key] = np.array(obj_list[key])

    if test_percentage != 0:
        skf = StratifiedShuffleSplit(n_splits=num,
                                     test_size=test_percentage / 100.0)
        for i_num, (train_idx, test_idx) in enumerate(
                skf.split(obj_list['image'], groups_label)):
            cv_folder = os.path.join(flags['experiment_folder'],
                                     'perm' + str(i_num + 1))
            create_dir(cv_folder)

            test_obj_list_dict = dict()
            train_obj_list_dict = dict()
            for key in obj_list.keys():
                test_obj_list_dict[key] = obj_list[key][test_idx]
                train_obj_list_dict[key] = obj_list[key][train_idx]

            train_groups_label = groups_label[train_idx]

            sss = StratifiedShuffleSplit(n_splits=1,
                                         test_size=val_percentage / 100.0)
            for train_train_index, train_val_index in sss.split(
                    train_obj_list_dict['image'], train_groups_label):
                train_train_obj_list_dict = dict()
                train_val_obj_list_dict = dict()
                for key in train_obj_list_dict.keys():
                    train_train_obj_list_dict[key] = train_obj_list_dict[key][
                        train_train_index]
                    train_val_obj_list_dict[key] = train_obj_list_dict[key][
                        train_val_index]

            #################################################################
            # test
            for key in test_obj_list_dict.keys():
                filename = os.path.join(cv_folder, 'test_' + key + '_list.csv')
                if not os.path.isfile(filename):
                    row_list = [[item] for item in test_obj_list_dict[key]]
                    KScsv.write_csv(row_list, filename)

            #################################################################
            # train
            dict_path = flags['dict_path']
            dict_ext = flags['dict_ext']

            obj_list_dict = dict()
            for key in dict_path.keys():
                obj_list_dict[key] = glob.glob(
                    os.path.join(dict_path[key], '*' + dict_ext[key]))

            temp_train_train_obj_list_dict = collections.defaultdict(list)

            for name in train_train_obj_list_dict['image']:
                basename = os.path.basename(name)
                basename = os.path.splitext(basename)[0]
                matching = sorted(
                    [s for s in obj_list_dict['image'] if basename in s])

                for m in matching:
                    basename = os.path.basename(m)
                    basename = os.path.splitext(basename)[0]

                    basename_dict = dict()
                    for key in train_train_obj_list_dict.keys():
                        basename_dict[key] = os.path.join(
                            dict_path[key], basename + dict_ext[key])

                    if all(basename_dict[k] in obj_list_dict[k]
                           for k in basename_dict.keys()):
                        for key in train_train_obj_list_dict.keys():
                            temp_train_train_obj_list_dict[key].append(
                                basename_dict[key])

            for key in train_train_obj_list_dict.keys():
                train_train_obj_list_dict[key] = np.array(
                    temp_train_train_obj_list_dict[key])

                filename = os.path.join(cv_folder,
                                        'train_' + key + '_list.csv')
                if not os.path.isfile(filename):
                    row_list = [[item]
                                for item in train_train_obj_list_dict[key]]
                    KScsv.write_csv(row_list, filename)

            #################################################################
            # validation
            dict_path = flags['dict_path']
            dict_ext = flags['dict_ext']

            obj_list_dict = dict()
            for key in dict_path.keys():
                obj_list_dict[key] = glob.glob(
                    os.path.join(dict_path[key], '*' + dict_ext[key]))

            temp_train_val_obj_list_dict = collections.defaultdict(list)

            for name in train_val_obj_list_dict['image']:
                basename = os.path.basename(name)
                basename = os.path.splitext(basename)[0]
                matching = sorted(
                    [s for s in obj_list_dict['image'] if basename in s])

                for m in matching:
                    basename = os.path.basename(m)
                    basename = os.path.splitext(basename)[0]

                    basename_dict = dict()
                    for key in train_val_obj_list_dict.keys():
                        basename_dict[key] = os.path.join(
                            dict_path[key], basename + dict_ext[key])

                    if all(basename_dict[k] in obj_list_dict[k]
                           for k in basename_dict.keys()):
                        for key in train_val_obj_list_dict.keys():
                            temp_train_val_obj_list_dict[key].append(
                                basename_dict[key])

            for key in train_val_obj_list_dict.keys():
                train_val_obj_list_dict[key] = np.array(
                    temp_train_val_obj_list_dict[key])

                filename = os.path.join(cv_folder, 'val_' + key + '_list.csv')
                if not os.path.isfile(filename):
                    row_list = [[item]
                                for item in train_val_obj_list_dict[key]]
                    KScsv.write_csv(row_list, filename)

    else:
        for i_num in range(num):
            cv_folder = os.path.join(flags['experiment_folder'],
                                     'perm' + str(i_num + 1))
            create_dir(cv_folder)

            train_obj_list_dict = dict()
            for key in obj_list.keys():
                train_obj_list_dict[key] = obj_list[key]
            train_groups_label = groups_label

            sss = StratifiedShuffleSplit(n_splits=1,
                                         test_size=val_percentage / 100.0)
            for train_train_index, train_val_index in sss.split(
                    train_obj_list_dict['image'], train_groups_label):
                train_train_obj_list_dict = dict()
                train_val_obj_list_dict = dict()
                for key in train_obj_list_dict.keys():
                    train_train_obj_list_dict[key] = train_obj_list_dict[key][
                        train_train_index]
                    train_val_obj_list_dict[key] = train_obj_list_dict[key][
                        train_val_index]

            #################################################################
            # train
            dict_path = flags['dict_path']
            dict_ext = flags['dict_ext']

            obj_list_dict = dict()
            for key in dict_path.keys():
                obj_list_dict[key] = glob.glob(
                    os.path.join(dict_path[key], '*' + dict_ext[key]))

            temp_train_train_obj_list_dict = collections.defaultdict(list)

            for name in train_train_obj_list_dict['image']:
                basename = os.path.basename(name)
                basename = os.path.splitext(basename)[0]
                matching = sorted(
                    [s for s in obj_list_dict['image'] if basename in s])

                for m in matching:
                    basename = os.path.basename(m)
                    basename = os.path.splitext(basename)[0]

                    basename_dict = dict()
                    for key in train_train_obj_list_dict.keys():
                        basename_dict[key] = os.path.join(
                            dict_path[key], basename + dict_ext[key])

                    if all(basename_dict[k] in obj_list_dict[k]
                           for k in basename_dict.keys()):
                        for key in train_train_obj_list_dict.keys():
                            temp_train_train_obj_list_dict[key].append(
                                basename_dict[key])

            for key in train_train_obj_list_dict.keys():
                train_train_obj_list_dict[key] = np.array(
                    temp_train_train_obj_list_dict[key])

                filename = os.path.join(cv_folder,
                                        'train_' + key + '_list.csv')
                if not os.path.isfile(filename):
                    row_list = [[item]
                                for item in train_train_obj_list_dict[key]]
                    KScsv.write_csv(row_list, filename)

            #################################################################
            # validation

            dict_path = flags['dict_path']
            dict_ext = flags['dict_ext']

            obj_list_dict = dict()
            for key in dict_path.keys():
                obj_list_dict[key] = glob.glob(
                    os.path.join(dict_path[key], '*' + dict_ext[key]))

            temp_train_val_obj_list_dict = collections.defaultdict(list)

            for name in train_val_obj_list_dict['image']:
                basename = os.path.basename(name)
                basename = os.path.splitext(basename)[0]
                matching = sorted(
                    [s for s in obj_list_dict['image'] if basename in s])

                for m in matching:
                    basename = os.path.basename(m)
                    basename = os.path.splitext(basename)[0]

                    basename_dict = dict()
                    for key in train_val_obj_list_dict.keys():
                        basename_dict[key] = os.path.join(
                            dict_path[key], basename + dict_ext[key])

                    if all(basename_dict[k] in obj_list_dict[k]
                           for k in basename_dict.keys()):
                        for key in train_val_obj_list_dict.keys():
                            temp_train_val_obj_list_dict[key].append(
                                basename_dict[key])

            for key in train_val_obj_list_dict.keys():
                train_val_obj_list_dict[key] = np.array(
                    temp_train_val_obj_list_dict[key])

                filename = os.path.join(cv_folder, 'val_' + key + '_list.csv')
                if not os.path.isfile(filename):
                    row_list = [[item]
                                for item in train_val_obj_list_dict[key]]
                    KScsv.write_csv(row_list, filename)
Exemple #11
0
def split_cv(obj_list, flags):
    """
    split_cv splits data into train, validation, and test stratified by the group label

    param: images_list
    param: labels_list
    param: groups_list
    param: num
    param: val_percentage
    return: void
    """

    num = flags['num_split']
    val_percentage = flags['val_percentage']

    groups_label = list()
    for file in obj_list['group']:
        row = KScsv.read_csv(file)
        groups_label.append(row[0][0])
    groups_label = np.array(groups_label)

    for key in obj_list.keys():
        obj_list[key] = np.array(obj_list[key])

    skf = StratifiedKFold(n_splits=num)
    for i_num, (train_idx, test_idx) in enumerate(
            skf.split(obj_list['image'], groups_label)):
        cv_folder = os.path.join(flags['experiment_folder'],
                                 'cv' + str(i_num + 1))
        create_dir(cv_folder)

        test_obj_list_dict = dict()
        train_obj_list_dict = dict()
        for key in obj_list.keys():
            test_obj_list_dict[key] = obj_list[key][test_idx]
            train_obj_list_dict[key] = obj_list[key][train_idx]

        train_groups_label = groups_label[train_idx]

        sss = StratifiedShuffleSplit(n_splits=1,
                                     test_size=val_percentage / 100.0)
        for train_train_index, train_val_index in sss.split(
                train_obj_list_dict['image'], train_groups_label):
            train_train_obj_list_dict = dict()
            train_val_obj_list_dict = dict()
            for key in train_obj_list_dict.keys():
                train_train_obj_list_dict[key] = train_obj_list_dict[key][
                    train_train_index]
                train_val_obj_list_dict[key] = train_obj_list_dict[key][
                    train_val_index]

        #################################################################
        # test
        for key in test_obj_list_dict.keys():
            filename = os.path.join(cv_folder, 'test_' + key + '_list.csv')
            if not os.path.isfile(filename):
                row_list = [[item] for item in test_obj_list_dict[key]]
                KScsv.write_csv(row_list, filename)

        #################################################################
        # train
        for key in train_train_obj_list_dict.keys():
            filename = os.path.join(cv_folder, 'train_' + key + '_list.csv')
            if not os.path.isfile(filename):
                row_list = [[item] for item in train_train_obj_list_dict[key]]
                KScsv.write_csv(row_list, filename)

        #################################################################
        # validation
        for key in train_val_obj_list_dict.keys():
            filename = os.path.join(cv_folder, 'val_' + key + '_list.csv')
            if not os.path.isfile(filename):
                row_list = [[item] for item in train_val_obj_list_dict[key]]
                KScsv.write_csv(row_list, filename)
def inputs(object_folder, mode, flags, mat_contents):
    #keys = ['HE', 'DAPI', 'label']
    key_list = list(flags['dict_path'].keys())
    key_list.remove('group')

    if mode == 'train':
        log_file_path = os.path.join(object_folder, 'train', 'train_log.csv')
    else:
        log_file_path = os.path.join(object_folder, 'val', 'val_log.csv')

    log_list = KScsv.read_csv(log_file_path)

    #image_dict = collections.defaultdict(list)
    #label_dict = collections.defaultdict(list)
    #weight_dict = collections.defaultdict(list)

    all_img_dict = {}
    slice_input_list = []

    for ind, key in enumerate(key_list):
        key_img_list = []

        #deal with mean, var img
        mean_img = np.float32(mat_contents[key + '_mean'])
        var_img = np.float32(mat_contents[key + '_var'])

        if mean_img.ndim == 2:
            mean_img = np.expand_dims(mean_img, axis=2)
        if var_img.ndim == 2:
            var_img = np.expand_dims(var_img, axis=2)

        #mean_image = tf.constant(mean_img, name='mean_image')
        #var_image = tf.constant(var_img, name='var_image')

        for row in log_list:
            key_img_list.append(row[ind])

        slice_input_list.append(key_img_list)

    min_queue_examples = int(
        len(slice_input_list[0]) * flags['min_fraction_of_examples_in_queue'])
    print('Filling queue with %d images before starting to train. '
          'This will take a few minutes.' % min_queue_examples)

    # Create a queue that produces the filenames to read.
    combine_image_dict = list()

    #fliename_queue- list of file names
    filename_queue = tf.train.slice_input_producer(slice_input_list,
                                                   shuffle=True)
    queue_dict, label_dict = read_data(filename_queue, flags)
    processed_dict = process_image_and_label(queue_dict, mat_contents, flags)

    # Generate a batch of images and labels by building up a queue of examples.
    batch_list, label = generate_batch(processed_dict,
                                       label_dict,
                                       min_queue_examples,
                                       int(flags['batch_size']),
                                       shuffle=False,
                                       flags=flags)

    #create final combined dict
    #combine_image_dict = collections.defaultdict(list)
    #combine_label_dict = collections.defaultdict(list)

    combine_image_dict = {}
    combine_label_dict = {}
    for ind, key in enumerate(key_list):
        #combine_image_dict[key].append(batch_list[ind])
        combine_image_dict[key] = batch_list[ind]
        #combine_image_dict[key] = tf.concat(combine_image_dict[key],0)

        combine_label_dict[key] = label

    return combine_image_dict, combine_label_dict
def inputs(mean_image, variance_image, object_folder, mode, flags):
    """
    inputs takes in either training or validation inputs, then performs aggressive data
    augmentation and normalization using process_image_and_label and places them into
    a mini_batch to be passed through the network

    param: mean_image
    param: variance_image
    param: object_folder
    param: mode
    param: min_fraction_of_examples_in_queue
    param: batch_size
    return: image dict, label dict, weight dict
    """

    if mode == 'train':
        log_file_path = os.path.join(object_folder, 'train', 'train_log.csv')
    else:
        log_file_path = os.path.join(object_folder, 'val', 'val_log.csv')

    log_list = KScsv.read_csv(log_file_path)

    image_dict = collections.defaultdict(list)
    label_dict = collections.defaultdict(list)
    weight_dict = collections.defaultdict(list)

    for row in log_list:
        image_dict['image'].append(row[0])
        label_dict['label'].append(row[1])
        weight_dict['weight'].append(row[2])

    min_queue_examples = int(
        len(image_dict['image']) * flags['min_fraction_of_examples_in_queue'])
    print('Filling queue with %d images before starting to train. '
          'This will take a few minutes.' % min_queue_examples)

    # Create a queue that produces the filenames to read.
    combine_image_dict = list()
    combine_label_dict = list()
    combine_weight_dict = list()

    filename_queue = tf.train.slice_input_producer(
        [image_dict['image'], label_dict['label'], weight_dict['weight']],
        shuffle=True)
    image, label, weight = read_data(filename_queue, flags)
    image = tf.cast(image, tf.float32)
    label = tf.cast(label, tf.float32)
    weight = tf.cast(weight, tf.float32)
    image, label, weight = process_image_and_label(image, label, weight,
                                                   mean_image, variance_image,
                                                   flags)

    # Generate a batch of images and labels by building up a queue of examples.
    image, label, weight = generate_batch(image,
                                          label,
                                          weight,
                                          min_queue_examples,
                                          int(flags['batch_size']),
                                          shuffle=False,
                                          flags=flags)
    combine_image_dict.append(image)
    combine_label_dict.append(label)
    combine_weight_dict.append(weight)

    out_image = tf.concat(0, combine_image_dict)
    out_label = tf.concat(0, combine_label_dict)
    out_weight = tf.concat(0, combine_weight_dict)

    return {'images': out_image, 'labels': out_label, 'weights': out_weight}