Exemple #1
0
def copy_files_to_label_dirs(inp_path, out_path, labels_file, process = None):
    prep_out_path(out_path)
    
    for i in range(0, 5):
        os.makedirs(path.join(out_path, str(i)))
    
    labels = pd.read_csv(labels_file)
    existing_files = pd.DataFrame([path.splitext(f)[0] for f in os.listdir(inp_path)], columns =[labels.columns[0]])
    existing_files = existing_files.merge(labels, on=existing_files.columns[0])
    
    def processAndCopy(inp_file, out_file):
        im = cv2.imread(inp_file)
        im = process(im)
        cv2.imwrite(out_file, im)

    if process == None:
        cp = copy
    else:
        cp = processAndCopy

    for f, l in zip(existing_files['image'], existing_files['level']):
        file_name = path.join(out_path, str(l), f + ".jpeg")
        inp_file = path.join(inp_path, f + '.jpeg')
        cp(inp_file, file_name)

        print "copied {0} to {1}".format(inp_file, file_name)
Exemple #2
0
def copy_files_to_label_dirs(inp_path, out_path, labels_file, process=None):
    prep_out_path(out_path)

    for i in range(0, 5):
        os.makedirs(path.join(out_path, str(i)))

    labels = pd.read_csv(labels_file)
    existing_files = pd.DataFrame(
        [path.splitext(f)[0] for f in os.listdir(inp_path)],
        columns=[labels.columns[0]])
    existing_files = existing_files.merge(labels, on=existing_files.columns[0])

    def processAndCopy(inp_file, out_file):
        im = cv2.imread(inp_file)
        im = process(im)
        cv2.imwrite(out_file, im)

    if process == None:
        cp = copy
    else:
        cp = processAndCopy

    for f, l in zip(existing_files['image'], existing_files['level']):
        file_name = path.join(out_path, str(l), f + ".jpeg")
        inp_file = path.join(inp_path, f + '.jpeg')
        cp(inp_file, file_name)

        print "copied {0} to {1}".format(inp_file, file_name)
Exemple #3
0
def copy_train_files(inp_path, train_path, labels_file):
    prep_out_path(train_path)
    labels = pd.read_csv(labels_file)
    files_names = set(labels[labels.columns[0]].as_matrix())
    all_files = set(os.listdir(inp_path))
    train_files = all_files.intersection(files_names)

    for f in train_files:
        copy(path.join(inp_path, f), path.join(train_path, f))
Exemple #4
0
def copy_test_files(inp_path, test_path, labels_file):
    prep_out_path(test_path)
    labels = pd.read_csv(labels_file)
    file_names = set(labels[labels.columns[0]].as_matrix())
    all_files = set(os.listdir(inp_path))
    test_files = all_files.difference(file_names)

    for f in test_files:
        copy(path.join(inp_path, f), path.join(test_path, f))
def get_areal_features(root, features_path, masks_dir, n_bins = 100):
    prep_out_path(features_path)
    files = os.listdir(root)

    df = pd.DataFrame(columns = range(n_bins * 2) + ['name', 'level'])
    names = pd.read_csv(labels_file)
    print "Starting extraction: ", time_now_str()

    for j, f in enumerate(files):
        label = names.loc[names['image'] == path.splitext(f)[0]]
        start = time.time()
        imr = ImageReader(root, f, masks_dir, gray_scale = True)

        drusen = get_predicted_region(imr.image, Labels.Drusen)
        blood = get_predicted_region(imr.image, Labels.Haemorage)

        Bc = np.ones((5, 5))
        labels_drusen, n_drusen = mh.label(drusen, Bc)
        labels_blood, n_blood = mh.label(blood, Bc)

        area = float(cv2.countNonZero(imr.mask))

        outp = np.array([], dtype = np.int)

        # sizes excluding background
        sizes_drusen = mhl.labeled_size(labels_drusen)[1:] / area
        sizes_blood = mhl.labeled_size(labels_blood)[1:] / area

        hist_druzen, _ = np.histogram(sizes_drusen, n_bins, (0, 1e-3))
        hist_blood, _ = np.histogram(sizes_blood, n_bins, (0, 1e-3))


        outp = np.r_[outp, hist_druzen]
        outp = np.r_[outp, hist_blood]
        outp = np.r_[outp, label.values[0]]
        df.loc[j] = outp
        print "Extracted: {0}, took {1:02.2f} sec ".format(f, time.time() - start)
      
    # write out the csv
    df.to_csv(path.join(features_path, prefix + ".csv"), index = False, header=True)    
    print "Extracted: ", prefix, "@", time_now_str()
Exemple #6
0
def copy_files_to_label_dirs(inp_path, out_path, labels_file):
    prep_out_path(out_path)
    
    labels = pd.read_csv(labels_file)
    splitter = labels.columns[1]
    
    dirs = np.unique(labels[splitter].as_matrix())
    for dir in dirs:
        p = path.join(out_path, dir)
        os.makedirs(p)

    bad = []
    for f, l in zip(labels[labels.columns[0]], labels[labels.columns[1]]):
        file_name = path.join(out_path, l, f)
        inp_file = path.join(inp_path, f)
        try:
            shutil.copy(inp_file, file_name)
        except IOError:
            print "Cannot copy: {0}".format(f)
            bad += [f]
            continue
        print "copied {0} to {1}".format(inp_file, file_name)

    print bad
Exemple #7
0
from kobra.tr_utils import prep_out_path, time_now_str
import os
from os import path
import mahotas as mh
import mahotas.labeled as mhl
import cv2
import time

preprocessed = '/kaggle/retina/train/labelled'
masks = '/kaggle/retina/train/masks'
orig = '/kaggle/retina/train/sample/split'
output = '/kaggle/retina/train/sample/features'

n_bins = 100

prep_out_path(output)

for i in range(0, 5):
    prefix = str(i)

    print "Starting extraction @ ", time_now_str()
    files = os.listdir(path.join(preprocessed, prefix))
    
    # intermediate output will be stored here
    # we will save all the files first then join them into one csv file
    df = pd.DataFrame(columns = range(n_bins * 2 + 1))
    j = 0

    for f in files:
        start = time.time()
        
Exemple #8
0
        cv2.imwrite(out_im_name, toSave)

    return out_im_name

def resize_only(image_name):
    image = cv2.imread(image_name)

    out_name = path.split(image_name)[1]
    out_im_name = path.join(out_path, out_name)

    toSave = cv2.resize(image, size)

    cv2.imwrite(out_im_name, toSave)

def kmeans_only(image_name, K=10):
    out_im_name = get_output_name(image_name)
    image = cv2.imread(image_name)
    
    toSave, _, _ = kmeans(image, K)

    cv2.imwrite(out_im_name, toSave)
        
prep_out_path(out_path)
dv = Client().load_balanced_view()
fs = dv.map(kmeans_only, np.array(image_paths))
print "Started: ", time_now_str()
fs.wait()
print "Finished: ", time_now_str()

from kobra.tr_utils import prep_out_path, time_now_str
import os
from os import path
import mahotas as mh
import mahotas.labeled as mhl
import cv2
import time

preprocessed = '/kaggle/retina/train/labelled'
masks = '/kaggle/retina/train/masks'
orig = '/kaggle/retina/train/sample/split'
output = '/kaggle/retina/train/sample/features'

n_bins = 100

prep_out_path(output)

for i in range(0, 5):
    prefix = str(i)

    print "Starting extraction @ ", time_now_str()
    files = os.listdir(path.join(preprocessed, prefix))

    # intermediate output will be stored here
    # we will save all the files first then join them into one csv file
    df = pd.DataFrame(columns=range(n_bins * 2 + 1))
    j = 0

    for f in files:
        start = time.time()