files = [line.strip() for line in f.readlines()]

# Create samples and its label
preprocessor = HedBilateralFilter()
filter_kernel = (7, 7)
preprocessor.set_param("bilateral_kernel", filter_kernel)
preprocessor.set_param("sigma_color", 9)
segment = SegmentStage(5)
labels = []
data = []

for img in files:
    label = img[-5:-4]
    labels.append(float(label))
    image = cv2.imread(img, flags=1)
    canny, gray = preprocessor.run(image)
    # com.debug_im(image)
    conts = segment.run(canny, gray, image)
    try:
        max_cont = max(conts, key=lambda x: x.area)
    except ValueError:
        print "filename: ", img
        exit()
    # com.debug_im(max_cont.get_region(rgb2hed(image)[1]))
    region = max_cont.get_region(cv2.split(rgb2hed(image))[1])
    # com.debug_im(region)
    max_cont = cv2.resize(region, (30, 30)).flatten()
    data.append(max_cont)

data_train, data_test, labels_train, labels_test = train_test_split(
    data, labels, test_size=0.25)