def make_dataset(category=None, dirlist=None, height=32, width=32, channel=3, extensions=None): print("\n** Make " + category) class_len = len(dirlist) io_mode = "w" label_number = 0 if (not (util.check_path(path=PACK_PATH + "/images/"))): util.make_path(path=PACK_PATH + "/images/") util.refresh_directory(PACK_PATH + "/images/dataset/") channel = 1 for di in dirlist: tmp_path = PACK_PATH + "/dataset/" + category + "/" + di fi_list = util.get_filelist(directory=tmp_path, extensions=extensions) cnt = 0 for fi in fi_list: tmp_sub, tmp_file = util.get_dir_and_file_name(path=fi) cnt += 1 image = cv2.imread(fi) resized_image = cv2.resize(image, (width, height)) cvf.save_image(path=PACK_PATH + "/images/dataset/", filename=str(label_number) + "_" + str(cnt) + ".png", image=resized_image) if (channel == 1): resized_image = cv2.cvtColor(resized_image, cv2.COLOR_BGR2GRAY) height, width = resized_image.shape else: height, width, channel = resized_image.shape resized_image = resized_image.reshape((height * width * channel)) np.save(file=tmp_path + "/" + tmp_file, arr=resized_image) label_number += 1 if (os.path.exists(PACK_PATH + "/" + category)): # management storage shutil.rmtree(PACK_PATH + "/" + category) f = open(PACK_PATH + "/dataset/format.txt", "w") f.write(str(label_number)) f.write("\n") f.write(str(height * width * channel)) f.write("\n") f.write(str(height)) f.write("\n") f.write(str(width)) f.write("\n") f.write(str(channel)) f.close()
def make(path=None, height=32, width=32, channel=3, extensions=None): print("\n** Make dataset") cate_list = ["train", "test", "valid"] shuffle_list = ["train", "test"] util.refresh_directory(PACK_PATH + "/dataset") for ca in cate_list: util.refresh_directory(PACK_PATH + "/dataset/" + ca) dirlist = util.get_dirlist(path=path) split_data(path=path, directories=dirlist, extensions=extensions) for di in dirlist: fi_list = util.get_filelist(directory=path + "/" + di, extensions=extensions) print("I got the standard shape!") for ca in cate_list: make_dataset(category=ca, dirlist=dirlist, height=height, width=width, channel=channel, extensions=extensions)
def main(): extensions = ["BMP", "bmp", "PNG", "png", "JPG", "jpg", "JPEG", "jpeg"] util.refresh_directory(PACK_PATH+"/images") print("Enter the path") # usr_path = input(">> ") usr_path = "/media/yeonghyeon/Toshiba/lung/datasets/20171204" if(util.check_path(usr_path)): files = util.get_filelist(directory=usr_path, extensions=extensions) for fi in files: print(fi) tmp_sub, tmp_file = util.get_dir_and_file_name(path=fi) if(not(util.check_path(path=PACK_PATH+"/images/"+str(tmp_file)+"/"))): util.make_path(path=PACK_PATH+"/images/"+str(tmp_file)+"/") image = cvf.load_image(path=fi) if(image.shape[0] > image.shape[1]): # height > width resized = cvf.resizing(image=image, width=int(500*(image.shape[1]/image.shape[0])), height=500) else: resized = cvf.resizing(image=image, width=500, height=int(500*(image.shape[0]/image.shape[1]))) zeropad = cvf.zero_padding(image=resized, height=500, width=500) print(image.shape) print(resized.shape) print(zeropad.shape) cvf.save_image(path=PACK_PATH+"/images/", filename=str(tmp_file)+".png", image=zeropad) else: print("Invalid path :"+usr_path)
def __init__(self, who_am_i, class_len, data_len, height, width, channel): self._who_am_i = who_am_i self._class_len = class_len self._data_len = data_len self._height = height self._width = width self._channel = channel self._valid_idx = 0 self._amount = len(util.get_filelist(directory=PACK_PATH+"/dataset/"+str(self._who_am_i), extensions=["npy"])) self.total_data, self.total_label = self.pre_load()
def main(): extensions = ["BMP", "bmp", "PNG", "png", "JPG", "jpg", "JPEG", "jpeg"] util.refresh_directory(PACK_PATH + "/images") print("Enter the path") # usr_path = input(">> ") usr_path = "/home/yeonghyeon/Desktop/images/post_processing_1113" if (util.check_path(usr_path)): files = util.get_filelist(directory=usr_path, extensions=extensions) for fi in files: print(fi) extract_segments(filename=fi) else: print("Invalid path :" + usr_path)
def split_data(path=None, directories=None, extensions=None): print("\n** Split whole datas") if (not (os.path.exists(path))): print("Path not exists \"" + str(path) + "\"") return for di in directories: if (not (os.path.exists(PACK_PATH + "/dataset/train/" + di))): os.mkdir(PACK_PATH + "/dataset/train/" + di) if (not (os.path.exists(PACK_PATH + "/dataset/test/" + di))): os.mkdir(PACK_PATH + "/dataset/test/" + di) if (not (os.path.exists(PACK_PATH + "/dataset/valid/" + di))): os.mkdir(PACK_PATH + "/dataset/valid/" + di) for di in directories: fi_list = util.get_filelist(directory=path + "/" + di, extensions=extensions) fi_list = random.sample(fi_list, len(fi_list)) tr_point = int(len(fi_list) * 0.8) te_point = int(len(fi_list) * 0.9) va_point = int(len(fi_list) * 1.0) train = fi_list[:tr_point] test = fi_list[tr_point:te_point] valid = fi_list[te_point:va_point] print("Class: " + str(di)) print("Train:\t%d" % (len(train))) print("Test:\t%d" % (len(test))) print("Valid:\t%d" % (len(valid))) print() util.copy_file(origin=train, copy=PACK_PATH + "/dataset/train/" + di) util.copy_file(origin=test, copy=PACK_PATH + "/dataset/test/" + di) util.copy_file(origin=valid, copy=PACK_PATH + "/dataset/valid/" + di) print("Split the datas!")
def pre_load(self): find_path = PACK_PATH+"/dataset/"+str(self._who_am_i) dirlist = util.get_dirlist(path=find_path, dataset_dir="dataset") total_label = [] total_data = [] tmp_label = 0 for di in dirlist: fi_list = util.get_filelist(directory=find_path+"/"+di, extensions=["npy"]) for fi in fi_list: total_label.append(tmp_label) tmp_data = np.load(file=fi) total_data.append(tmp_data) tmp_label += 1 return total_data, total_label
# from http://www.janeriksolem.net/2009/06/histogram-equalization-with-python-and.html # get image histogram image_histogram, bins = np.histogram(image.flatten(), number_bins, normed=True) cdf = image_histogram.cumsum() # cumulative distribution function cdf = (number_bins-1) * cdf / cdf[-1] # normalize # use linear interpolation of cdf to find new pixel values image_equalized = np.interp(image.flatten(), bins[:-1], cdf) return image_equalized.reshape(image.shape), cdf print("\nEnter the path") usr_path = input(">> ") if(util.check_path(usr_path)): files = util.get_filelist(directory=usr_path, extensions=extensions) for fi in files: print("Convert: "+str(fi)) tmp_name = fi.split("/") tmp_file = tmp_name[len(tmp_name)-1].split(".")[0] dir_sp = fi.split("/") main_dir = "" for ds in dir_sp[:len(dir_sp)-1]: main_dir += ds main_dir += "/" dicom_data = dicom.read_file(fi) try: dicom_numpy = dicom_data.pixel_array
def extract_lung(usr_path, extensions=None, height=None, width=None, channel=None, sess=None, x_holder=None, training=None, prediction=None, saver=None): if (not (util.check_path(path=PACK_PATH + "/results/"))): util.make_path(path=PACK_PATH + "/results/") summf = open(PACK_PATH + "/results/summary.csv", "w") summf.write("FILENAME") summf.write(",") summf.write("DETECT") summf.write(",") summf.write("IOU") summf.write("\n") files = util.get_filelist(directory=usr_path, extensions=extensions) files.sort() for filename in files: print(filename) if (util.check_file(filename=filename)): tmp_sub, tmp_file = util.get_dir_and_file_name(path=filename) if (not (util.check_path(path=PACK_PATH + "/results/" + str(tmp_file) + "/"))): util.make_path(path=PACK_PATH + "/results/" + str(tmp_file) + "/") origin = cvf.load_image(path=filename) try: gray = cvf.rgb2gray(rgb=origin) except: # if origin image is grayscale gray = origin resized = cvf.resizing(image=gray, width=500) cvf.save_image(path=PACK_PATH + "/results/" + str(tmp_file) + "/", filename=str(tmp_file) + "_pre1_origin.png", image=resized) mulmul = resized.copy() for i in range(20): ret, thresh = cv2.threshold(mulmul, np.average(mulmul) * 0.3, 255, cv2.THRESH_BINARY) cvf.save_image(path=PACK_PATH + "/results/" + str(tmp_file) + "/", filename=str(tmp_file) + "_pre2_thresh.png", image=thresh) mulmul = cvf.normalizing(binary_img=resized * (thresh / 255)) cvf.save_image(path=PACK_PATH + "/results/" + str(tmp_file) + "/", filename=str(tmp_file) + "_pre3_normalize.png", image=mulmul) movavg = cvf.moving_avg_filter(binary_img=mulmul, k_size=10) adap = cvf.adaptiveThresholding(binary_img=movavg, neighbor=111, blur=False, blur_size=3) cvf.save_image(path=PACK_PATH + "/results/" + str(tmp_file) + "/", filename=str(tmp_file) + "_pre4_adaptrhesh.png", image=255 - adap) masking = resized * ((255 - adap) / 255) cvf.save_image(path=PACK_PATH + "/results/" + str(tmp_file) + "/", filename=str(tmp_file) + "_pre5_mask1.png", image=masking) movavg = cvf.moving_avg_filter(binary_img=masking, k_size=5) cvf.save_image(path=PACK_PATH + "/results/" + str(tmp_file) + "/", filename=str(tmp_file) + "_pre6_mask2.png", image=movavg) ret, thresh = cv2.threshold(movavg, np.average(movavg) * 0.5, 255, cv2.THRESH_BINARY_INV) cvf.save_image(path=PACK_PATH + "/results/" + str(tmp_file) + "/", filename=str(tmp_file) + "_pre7_thresh.png", image=thresh) contours = cvf.contouring(binary_img=thresh) boxes_tmp = cvf.contour2box(contours=contours, padding=20) boxes = cvf.rid_repetition(boxes=boxes_tmp, binary_img=thresh) if (os.path.exists(PACK_PATH + "/checkpoint/checker.index")): saver.restore(sess, PACK_PATH + "/checkpoint/checker") f = open(PACK_PATH + "/dataset/labels.txt", 'r') content = f.readlines() f.close() for idx in range(len(content)): content[idx] = content[idx][:len(content[idx]) - 1] # rid \n boxes_pred = [] cnt = 0 for b in boxes: x, y, w, h = b if ((x > 0) and (y > 0)): if ((x + w < resized.shape[1]) and (y + h < resized.shape[0])): pad = cvf.zero_padding(image=thresh[y:y + h, x:x + w], height=500, width=500) pad2 = cvf.remain_only_biggest(binary_img=pad) pad_res = cvf.zero_padding(image=resized[y:y + h, x:x + w], height=500, width=500) xdata = pad_res * (pad2 / 255) prob = sess.run(prediction, feed_dict={ x_holder: convert_image(image=xdata, height=height, width=width, channel=channel), training: False }) result = str(content[int(np.argmax(prob))]) acc = np.max(prob) boxes_pred.append([x, y, w, h, result, acc]) # cvf.save_image(path=PACK_PATH+"/results/"+str(tmp_file)+"/", filename=str(tmp_file)+"_"+str(result)+"_"+str(int(round(acc, 2)*100))+"_"+str(cnt)+".png", image=xdata) cnt += 1 boxes_pred = sorted(boxes_pred, key=lambda l: l[4], reverse=True) # sort by result boxes_pred = sorted(boxes_pred, key=lambda l: l[5], reverse=True) # sort by acc ratio = origin.shape[0] / resized.shape[0] save_crops(image=resized, boxes=boxes_pred, ratio=1, file_name=tmp_file) concats = concatenate(image=resized, boxes=boxes_pred, ratio=1, file_name=tmp_file) iou, bbox = intersection_over_union(filename=filename, boxes=concats, ratio=ratio) summf.write(str(filename)) summf.write(",") summf.write(str(len(concats))) summf.write(",") summf.write(str(iou)) summf.write("\n") origin_res1 = cvf.resizing(image=origin, width=500) origin_res2 = origin_res1.copy() origin_res3 = origin_res1.copy() origin_res_lr = draw_boxes(image=origin_res1, boxes=boxes_pred, ratio=1, file_name=tmp_file) cvf.save_image(path=PACK_PATH + "/results/" + str(tmp_file) + "/", filename=str(tmp_file) + "_origin_lr.png", image=origin_res_lr) origin_res_concat1 = draw_boxes(image=origin_res1, boxes=concats, ratio=1, file_name=tmp_file) cvf.save_image( path=PACK_PATH + "/results/" + str(tmp_file) + "/", filename=str(tmp_file) + "_origin_lr_and_concat.png", image=origin_res_concat1) origin_res_concat2 = draw_boxes(image=origin_res2, boxes=concats, ratio=1, file_name=tmp_file) cvf.save_image(path=PACK_PATH + "/results/" + str(tmp_file) + "/", filename=str(tmp_file) + "_origin_concat.png", image=origin_res_concat2) if (len(bbox) > 0): origin_res_bbox = draw_boxes(image=origin_res3, boxes=bbox, ratio=1, file_name=tmp_file) cvf.save_image(path=PACK_PATH + "/results/" + str(tmp_file) + "/", filename=str(tmp_file) + "_origin_bbox.png", image=origin_res_bbox) origin_res_concat3 = draw_boxes(image=origin_res3, boxes=concats, ratio=1, file_name=tmp_file) cvf.save_image( path=PACK_PATH + "/results/" + str(tmp_file) + "/", filename=str(tmp_file) + "_origin_concat_bbox.png", image=origin_res_concat3) else: print("You must training first!") else: print("Invalid File: " + str(filename)) summf.close()