def __init__(self, img_paths, outfile_name=("data/align/fundus_photo_aligner_%s.csv" % str(uuid.uuid4())[:8])): path = '/'.join(img_paths[0].split('/')[:-1]) img_names = [img_path.split('/')[-1] for img_path in img_paths] ext = img_names[0].split('.')[-1] ids = set([img_path.split('_')[0] for img_path in img_names]) assert((len(ids) * 2) == len(img_names)) grouped_img_names = [["%s_left" % id, "%s_right" % id] for id in ids] self.labels = {} print ''.join([" %i%%|" % (j*10) for j in xrange(1,10)] + [' 100%|']) pb = NyanBar(tasks=len(grouped_img_names)) with open(outfile_name, 'w') as csvfile: writer = csv.writer(csvfile) writer.writerow(['image','horizontal_flip','vertical_flip']) for img_names in grouped_img_names: img_paths = ["%s/%s.%s" % (path, name, ext) for name in img_names] fundus_photos = [FundusPhoto(img_path) for img_path in img_paths] for i, fundus_photo in enumerate(fundus_photos): fundus_photo.refine_center() direct = fundus_photo.has_notch() lens_flip_vector = np.array([0,0] if direct else [1,1]) left_right_flip_vector = np.array([0,1] if i == 0 else [0,0]) final_flip_vector = (lens_flip_vector + left_right_flip_vector) % 2 writerow = [img_names[i]] + final_flip_vector.tolist() writer.writerow(writerow) self.labels[img_names[i]] = direct pb.task_done() pb.finish() print("Done! created: %s" % outfile_name)
def benchmark_put_with_many_filters(): hosts = ["bloom1", "bloom2", "bloom3", "bloom4"] client = BloomRouter(hosts, "g{}".format(random.randint(1, 100000), filter_count=16)) progress = NyanBar(tasks=testsize) for i in range(testsize): progress.task_done() keys = [str(uuid.uuid4()) for _ in range(num_keys)] client.add(keys) progress.finish()
def put_then_get_with_one_filter(): hosts = ["bloom1", "bloom2", "bloom3", "bloom4"] client = BloomRouter(hosts, "f{}".format(random.randint(1, 100000)), filter_count=1) progress = NyanBar(tasks=testsize) for i in range(testsize): progress.task_done() keys = [str(uuid.uuid4()) for _ in range(num_keys)] client.add(keys) assert client.get(keys) progress.finish()
def package_data(directory, datasets, image_shape, outfile_path): """ Outputs a *.npz file of image data in 3 sets :type directory: string :param directory: directory of image data :type datasets: tuple of dictionaries, string -> Any :param datasets: dictionaries should be in the order training, validation, test. The key is the image filename and the value is the true label :type image_shape: tuple of ints :type outfile_path: string """ assert(len(datasets) == 3) xsize = numpy.prod(image_shape) x_datasets = [numpy.zeros((len(dataset), xsize), dtype=numpy.uint8) for dataset in datasets] y_datasets = [numpy.array(dataset.values(), dtype=numpy.uint8) for dataset in datasets] print "| " + ("⚐ ⚑ " * 19) + "-|" pb = NyanBar(tasks=sum([len(dataset) for dataset in datasets])) for j, dataset in enumerate(datasets): for i, image_name in enumerate(dataset.keys()): pb.task_done() im = Image.open(directory + image_name) x_datasets[j][i, :] = numpy.array(im.getdata(), dtype=numpy.uint8).reshape(xsize) pb.finish() print '... saving data' # cPickle too slow (takes many minutes for tens of thousands of images over 100x100x3) saveme = [x_datasets[0], y_datasets[0], x_datasets[1], y_datasets[1], x_datasets[2], y_datasets[2]] numpy.savez(open(outfile_path, 'wb'), *saveme) print 'done'
with open(label_csv, 'rU') as csvfile: reader = csv.reader(csvfile, dialect=csv.excel_tab, delimiter=',') next(reader, None) # skip header print ''.join([" %i%%|" % (j*10) for j in xrange(1,10)] + [' 100%|']) pb = NyanBar(tasks=(sum(1 for row in open(label_csv, 'rU'))-1)) for row in reader: imgname = row[0] true_label = 1 if (row[1] == 'tr' or row[1] == 'br' or row[1] == '1') else 0 title_prefix = "True Label: %s" % true_label predicted_label = predict_single_image(inpath + imgname, title_prefix=title_prefix, save_path=(outpath + imgname), true_label=true_label) true_labels.append(true_label) predicted_labels.append(predicted_label) pb.task_done() pb.finish() accuracy, precision = binary_accuracy_precision(true_labels, predicted_labels) print("Accuracy: %.3f. Precision: %.3f." % (accuracy, precision)) elif len(sys.argv) == 3: # batch of doubled images true_label_csv = sys.argv[1] inpath = sys.argv[2] true_labels = [] img_names = [] # for reference of label order img_paths = [] # read csv # we're going to assume true_label_csv has the double images next to each other # whether left/right comes first doesn't matter since their label is identical with open(true_label_csv, 'rU') as csvfile: reader = csv.reader(csvfile, dialect=csv.excel_tab, delimiter=',')