def rename(_): proceed = input('Are you sure? (yes/no): ') if not proceed.lower() == 'yes': print('Aborted...') return solution_path = os.path.join('solutions') test_path = os.path.join('tests', 'solutions') print('Reading...') solution_files = get_files(solution_path) print(' %d solutions' % len(solution_files)) test_files = get_files(test_path) print(' %d tests' % len(test_files)) print('Refactoring...') print(' solutions:', end=' ') skipped_solutions = refactor(solution_path, solution_files) print('DONE') print(' tests:', end=' ') skipped_tests = refactor(test_path, test_files) print('DONE') print('Skipped files...') for skipped_file in itertools.chain(skipped_solutions, skipped_tests): print(' %s' % skipped_file)
def split_train_val(root, train_percent=0.8): train_dir = osp.join(root, '..', 'train') val_dir = osp.join(root, '..', 'val') class_paths = [osp.join(root, c) for c in os.listdir(root)] mkdir_if_not_exist = lambda x: os.makedirs(x) if not osp.isdir(x) else None random.seed(123) tq = tqdm(class_paths, total=len(class_paths), desc='Spliting Data') for cls_path in tq: class_name = os.path.basename(cls_path) tq.set_postfix(class_name=class_name) train_new_dir = osp.join(train_dir, class_name) val_new_dir = osp.join(val_dir, class_name) mkdir_if_not_exist(train_new_dir) mkdir_if_not_exist(val_new_dir) img_files = get_files(cls_path, extensions='.jpg') train_class_files = random.sample(img_files, int(len(img_files) * train_percent)) for img_file in img_files: if img_file in train_class_files: shutil.copy( img_file, osp.join(train_new_dir, os.path.basename(img_file))) else: shutil.copy(img_file, osp.join(val_new_dir, os.path.basename(img_file))) # tq.close() return train_dir, val_dir
def prepare_patient_images(patient_id, intermediate_crop=0): """ Prepare patient images. Create the patient folder. Crop the patient image if it's necessary and save it in a new directory. :param patient_id: the patient id. :param intermediate_crop: optional parameter :return: nothing """ file_lst = [] prefix = str(patient_id).rjust(4, '0') src_files = utils.get_files(settings.BASE_PREPROCESSEDIMAGES_DIR, prefix + "*.png") patient_dir = utils.get_pred_patient_dir(patient_id) utils.create_dir_if_not_exists(patient_dir) patient_img_dir = utils.get_pred_patient_img_dir(patient_id) utils.create_dir_if_not_exists(patient_img_dir) utils.delete_files(patient_img_dir, "*.png") dummy = numpy.zeros((settings.TARGET_SIZE, settings.TARGET_SIZE)) cv2.imwrite(patient_img_dir + "dummy_overlay.png", dummy) for src_path in src_files: file_name = ntpath.basename(src_path) org_img = cv2.imread(src_path, cv2.IMREAD_GRAYSCALE) cropped_img = utils.prepare_cropped_sax_image( org_img, clahe=True, intermediate_crop=intermediate_crop, rotate=0) cv2.imwrite(patient_img_dir + file_name, cropped_img) file_lst.append([file_name, "dummy_overlay.png"]) with open(patient_img_dir + "pred.lst", "w") as f: writer = csv.writer(f, delimiter='\t') writer.writerows(file_lst)
def test_returns_correct_result(self, tmpdir, files): create_files(str(tmpdir), files) refactor(str(tmpdir), files) expected = ['s003', 's002', 's001'] assert get_files(str(tmpdir)) == expected for filename in expected: with open(os.path.join(str(tmpdir), filename)) as file: assert file.read() == filename
def get_wsl_cam(img_name, model='deepmil_multi'): wsl_dir = '/home/victor/PycharmProjects/active-learning-segmentation-baselines/wsl_cams' cams_dir = join(wsl_dir, '{}/npy/'.format(model)) paths = get_paths(cams_dir, 'npy') file_names = get_files(cams_dir, 'npy') file_names = [f.replace('.npy', '') for f in file_names] wsl_cam_path = paths[file_names.index(img_name[0].replace('.bmp', ''))] return np.load(wsl_cam_path)
def find_last_model(): # find the latest model. log_dir = cfgs.log_dir files = get_files(log_dir) files = [file for file in files if file.endswith('.h5')] if len(files) == 0: return None files = sorted(files) file_path = os.path.join(log_dir, files[-1]) return file_path
def parse_test(data_path, w_path, doc_w_path=None, doc_token_span_w_path=None): if doc_token_span_w_path and not file_exists(doc_token_span_w_path): print('{} not found, computing doc-level-span information dictionary'. format(doc_token_span_w_path)) documents_spans = get_real_token_span(data_path) # keep a copy of token spans to avoid re-computing it during training etc., write_pickle(documents_spans, doc_token_span_w_path) print('{} created'.format(doc_token_span_w_path)) else: documents_spans = read_pickle(doc_token_span_w_path) txt_files = get_files(data_path, ext='txt') documents_tokens = [] documents_pos = [] documents_ortho = [] documents_fname = [] for txt_path in txt_files: document_tokens = [] document_pos = [] document_ortho = [] document_fname = [] f_name = get_filename(txt_path) sentences = documents_spans[f_name] for sentence in sentences: sentence_tokens = [] sentence_pos = [] sentence_ortho = [] sentence_fname = [] for word_dictio in sentence: sentence_tokens.append(word_dictio['word']) sentence_pos.append(word_dictio['pos']) sentence_ortho.append(get_ortho_feature(word_dictio['word'])) sentence_fname.append(f_name) document_tokens.append(sentence_tokens) document_pos.append(sentence_pos) document_ortho.append(sentence_ortho) document_fname.append(sentence_fname) documents_tokens.append(document_tokens) documents_pos.append(document_pos) documents_ortho.append(document_ortho) documents_fname.append(document_fname) write_bio_test(w_path, documents_tokens, documents_pos, documents_ortho, documents_fname, sentence_level=True) if doc_w_path: write_bio_test(doc_w_path, documents_tokens, documents_pos, documents_ortho, documents_fname, sentence_level=False)
def main(): # Settings cfg = Config(config_file='../configs/train_action_recogn_pipeline.yaml') cfg_state = cfg[os.path.basename(__file__)] ## IO folders get_path = lambda x: os.path.join(*x) if isinstance(x, (list, tuple)) else x skeletons_folder = get_path(cfg_state.input.skeletons_folder) skeletons_txt = get_path(cfg_state.output.skeletons_txt) ## Config for training idx_person = 0 # Only use the skeleton of the 0th person in each image idx_label = 3 # [1, 7, 54, "jump", "jump_03-02-12-34-01-795/00240.jpg"] classes = np.array(cfg.classes) # Get skeleton files files = utils.get_files(skeletons_folder, extensions='.txt') data_loader = skeleton_loader(files) all_skeletons = [] labels_cnt = defaultdict(int) tq = tqdm(data_loader, total=len(files)) for skeletons in tq: if not skeletons: continue skeleton = skeletons[idx_person] label = skeleton[idx_label] if label not in classes: continue labels_cnt[label] += 1 all_skeletons.append(skeleton) with open(skeletons_txt, 'w') as f: json.dump(all_skeletons, f) print( tabulate([list(labels_cnt.values())], list(labels_cnt.keys()), 'grid')) print( f'[INFO] Total numbers of combined skeletons: "{len(all_skeletons)}"')
data_loader = DataLoader(rgb_demo_dataset, batch_size=32, shuffle=True) pred_dir = os.path.join(args.demo_img_folder, "pred") transform_dir = os.path.join(args.demo_img_folder, "transform") create_directory(pred_dir) create_directory(transform_dir) for i, sample in enumerate(tqdm(data_loader)): image, target, names = sample['image'], sample['label'], sample['id'] imgs, imgs_np, masks, flow = inference(image, model) save_image(flow, os.path.join(pred_dir, "flow.png")) for i in range(len(imgs)): masks[i].save(os.path.join(pred_dir, names[i])) imgs[i].save(os.path.join(transform_dir, names[i])) # create the video # images = sorted(get_files(transform_dir)) images = sorted(get_files(transform_dir), key=lambda x: int(x.split(".")[0])) # images = sorted(get_files(transform_dir), key=lambda x: int(x.split(".")[0][14:])) print(images) fig = None with VideoWriter(pred_dir, name="test_imgs", fps=20) as video_writer: for img_name in images: img = np.array(Image.open(os.path.join(transform_dir, img_name))) pred = np.array(Image.open(os.path.join(pred_dir, img_name))) fig = vis_segmentation(img, pred, fig) data = fig2img(fig) video_writer.write(data) stream = None if args.demo_video_path is not None:
sys.exit(2) for opt, arg in opts: if opt in ("-h", "--help"): logging.info(config.hdf5_creator_help_message) print(config.hdf5_creator_help_message) sys.exit() # Initialize main variables p_picks = [] s_picks = [] noise_picks = [] # Get P picks if config.p_picks_dir_per_event: files = utils.get_files(config.p_picks_path, 1, 1, r'\.P') else: files = utils.get_files(config.p_picks_path, 0, 0, r'\.P') for file_list in files: pick_list = [] skip = False index = -1 for file in file_list: index += 1 if index >= 3: break if config.slice_offset_start == 0 and config.slice_offset_end == 0: time_shift = 0 else:
def inference(): # summary and trained weights saved path MODEL_DIR = cfgs.log_dir class_names = list(label_name_dict.values()) # basic config rgb_val_dir = cfgs.rgb_val_dir inference_config = InferenceConfig() model = modellib.MaskRCNN(mode="inference", config=inference_config, model_dir=MODEL_DIR) model_path = os.path.join( cfgs.log_dir, 'multibox_roi_border20180809T0232/mask_rcnn_multibox_roi_border_0100.h5' ) model.load_weights(model_path, by_name=True) save_dir = cfgs.val_result_dir if not os.path.exists(save_dir): os.mkdir(save_dir) # test files = get_files(rgb_val_dir) for file in files: print("---+" * 30) print('inference {}...'.format(file)) image_path = os.path.join(rgb_val_dir, file) img = cv2.imread(image_path) h, w = img.shape[0], img.shape[1] t0 = time.time() original_image = skimage.io.imread(image_path) results = model.detect([original_image], verbose=1) # Run RPN sub-graph # pillar = model.keras_model.get_layer("ROI").output # node to start searching from rpn = model.run_graph( [original_image], [ ("rpn_class", model.keras_model.get_layer("rpn_class").output), # ("pre_nms_anchors", model.ancestor(pillar, "ROI/pre_nms_anchors:0")), # ("refined_anchors", model.ancestor(pillar, "ROI/refined_anchors:0")), # ("refined_anchors_clipped", model.ancestor(pillar, "ROI/refined_anchors_clipped:0")), # ("post_nms_anchor_ix", model.ancestor(pillar, "ROI/rpn_non_max_suppression:0")), ("proposals", model.keras_model.get_layer("ROI").output) ]) proposals = rpn['proposals'] limits = min(100, len(proposals[0])) # limits = len(proposals[0]) proposals = rpn['proposals'][0, :limits, :] * np.array([h, w, h, w]) print('proposals shape:', proposals.shape) img_rpn = copy.deepcopy(img) colors_rpn = random_colors(limits) for i in range(len(proposals)): proposal = proposals[i] color = colors_rpn[i] color = tuple([v * 255 for v in color]) x1, y1, x2, y2 = tuple([int(val) for val in proposal]) cv2.rectangle(img_rpn, (x1, y1), (x2, y2), color, 1) rpn_img_path = os.path.join(save_dir, file.replace(".png", "_rpn.png")) cv2.imwrite(rpn_img_path, img_rpn) t1 = time.time() r = results[0] rois = r['rois'] if len(rois) == 0: print('no any objects.') continue class_ids = r['class_ids'] masks = r['masks'] # index = np.where((class_ids==1.)|(class_ids==2.))[0] index = np.where((class_ids == 1.) | (class_ids == 2.) | (class_ids == 6.))[0] if len(index) == 0: continue rois = rois[index, :] class_ids = class_ids[index] masks = masks[:, :, index] colors = random_colors(len(class_ids)) img_mask = copy.deepcopy(img) for i in range(len(class_ids)): mask = masks[:, :, i] img_mask = apply_mask(img_mask, mask, colors[i]) mask_val_index = np.where(mask == True) coords = list( map(lambda y, x: [x, y], mask_val_index[0], mask_val_index[1])) try: box_list = get_min_area_rectangle(coords, mode=1) except Exception: print(np.max(mask)) continue left_bottom_coord = (int(box_list[0][0] + box_list[2][0]) // 2, int(box_list[0][1] + box_list[2][1]) // 2) cv2.putText(img, label_name_dict[int(class_ids[i])], left_bottom_coord, fontFace=cv2.FONT_HERSHEY_COMPLEX_SMALL, fontScale=1, color=(0, 0, 255), thickness=1) cv2.putText(img_mask, label_name_dict[int(class_ids[i])], left_bottom_coord, fontFace=cv2.FONT_HERSHEY_COMPLEX_SMALL, fontScale=1, color=(0, 0, 255), thickness=1) for ii in range(len(box_list)): x1 = int(box_list[ii][0]) y1 = int(box_list[ii][1]) x2 = int(box_list[(ii + 1) % 4][0]) y2 = int(box_list[(ii + 1) % 4][1]) cv2.line(img, (x1, y1), (x2, y2), (0, 255, 0), 1) cv2.imwrite(os.path.join(save_dir, file.replace(".png", "_mask.png")), img_mask) cv2.imwrite(os.path.join(save_dir, file.replace(".png", ".png")), img) print('>>>> detect time:{}, post processing time:{}, total time:{}'. format(t1 - t0, time.time() - t1, time.time() - t0))
def predict_overlays_patient(patient_id, save_transparents=False): """ Predict the left ventricle with neural network. :param patient_id: patient id :param save_transparents: boolean value :return: nothing """ segmenter = LVSegmentation() src_image_dir = utils.get_pred_patient_img_dir(patient_id) overlay_dir = utils.get_pred_patient_overlay_dir(patient_id) utils.delete_files(overlay_dir, "*.png") transparent_overlay_dir = utils.get_pred_patient_transparent_overlay_dir( patient_id) utils.delete_files(transparent_overlay_dir, "*.png") num_lines = sum(1 for l in open(src_image_dir + "pred.lst")) batch_size = 1 for try_size in [2, 3, 4, 5]: if num_lines % try_size == 0: batch_size = try_size prefix = str(patient_id).rjust(4, '0') src_files = utils.get_files(src_image_dir, prefix + "*.png") for i in range(0, num_lines, batch_size): src_files_batch = src_files[i:i + batch_size] images = [] for src_file in src_files_batch: image = cv2.imread(src_file, cv2.IMREAD_GRAYSCALE) images.append(image) images = np.array(images) images = np.float32(images) original_images = images images -= np.mean(images, dtype=np.float32) images /= np.std(images, dtype=np.float32) images = np.reshape(images, (-1, 224, 224, 1)) predictions = segmenter.predict(images) for j in range(len(predictions)): file_name = ntpath.basename(src_files_batch[j]) image = original_images[j] prediction = predictions[j] image[prediction == 1] = 255 image[prediction == 0] = 0 cv2.imwrite(overlay_dir + file_name, image) if save_transparents: channels = cv2.split(image) empty = numpy.zeros(channels[0].shape, dtype=numpy.float32) alpha = channels[0].copy() alpha[alpha == 255] = 75 channels = (channels[0], channels[0], empty, alpha) transparent_overlay = cv2.merge(channels) cv2.imwrite(transparent_overlay_dir + file_name, transparent_overlay) segmenter.session.close()
def test_ignore_subdirectories(self, tmpdir, files): create_files(str(tmpdir), files) create_dirs(str(tmpdir), ['dir1', 'dir2']) assert get_files(str(tmpdir)) == files
def test_returns_correct_result(self, tmpdir, files): create_files(str(tmpdir), files) assert get_files(str(tmpdir)) == files
from frames.main import Main from frames.login import Login import wx from utils.utils import get_storage_path, get_files, load_object from client.dht.config import regions, category_names storage_path = get_storage_path() app = wx.App() if 'login.pkl' in get_files(storage_path): login_data = load_object("%s/login.pkl" % storage_path) Main(None, -1, 'Raf Chef', login_data, category_names, storage_path) else: Login(None, -1, 'Connect', regions, category_names, storage_path) app.MainLoop()
def get_real_token_span(directory): ''' :param directory: path of raw text files :return: documents :: dictionary --> key: document name, values = [[[{'word': XX, 'start': X, 'end': X}]]] ''' files = get_files(directory, ext='txt') documents = {} med_tagger = Med_Tagger() # Starts a docker image in background file_counter = 0 print('get_real_token_span::{} files to process'.format(len(files))) for file in files: file_counter += 1 if file_counter % 100 == 0: print('.') with codecs.open(file, 'r', encoding='utf-8') as f: text = f.read() tokens_space_offsets = [] text_space_sp = text.split(' ') off_set = 0 for token in text_space_sp: token_offset = { 'token': token, 'start': off_set, 'end': off_set + len(token) } tokens_space_offsets.append(token_offset) off_set += len(token) off_set += 1 # sanity check if captured token indexes are correct across all the tokens for t_offset in tokens_space_offsets: token = t_offset['token'] off_set_st = t_offset['start'] off_set_end = t_offset['end'] assert token == text[off_set_st:off_set_end] tokens_space_offsets_ptr = 0 last_inside_token_end_idx = None sentences_in_doc = [] for line in codecs.open(file, 'r', encoding='utf-8'): if line.strip(): sentences_tokenized, sentences_pos = tokenize(line.strip(), med_tagger, return_pos=True) assert sentences_tokenized is not None and sentences_pos is not None for sentence_tokenized, sentence_pos in zip( sentences_tokenized, sentences_pos): words_in_sentence = [] # sentence_tokenized = web_tokenizer(sent) for word, pos in zip(sentence_tokenized, sentence_pos): token = tokens_space_offsets[tokens_space_offsets_ptr][ 'token'] start = tokens_space_offsets[tokens_space_offsets_ptr][ 'start'] end = tokens_space_offsets[tokens_space_offsets_ptr][ 'end'] if word == token: tokens_space_offsets_ptr += 1 last_inside_token_end_idx = None elif word in token: if not last_inside_token_end_idx: # remove \n from the start of text adjust_span(text, start, end, update_end=False) end = start + len(word) else: start = last_inside_token_end_idx end = start + len(word) start, end = adjust_span(text, start, end) start, end = adjust_span(text, start, end) end_of_token_space = tokens_space_offsets[ tokens_space_offsets_ptr]['end'] # is it the end of current word, if yes increment the pointer if end == end_of_token_space: tokens_space_offsets_ptr += 1 last_inside_token_end_idx = None else: last_inside_token_end_idx = end # part of ugly checks (pharmaco::->test) if text[end:end_of_token_space] == '\x85': tokens_space_offsets_ptr += 1 last_inside_token_end_idx = None # final sanity test assert word == text[start: end], 'word={} != text={}\ntokens_space_offsets_ptr:{}' \ '\nsent_tokenized: {}\ndocument: {}'.\ format(word, text[start: end], tokens_space_offsets_ptr, sentence_tokenized, get_filename(file)) words_dictio = { 'word': word, 'start': start, 'end': end, 'pos': pos } words_in_sentence.append(words_dictio) sentences_in_doc.append(words_in_sentence) f_name = get_filename(file) documents[f_name] = sentences_in_doc # clean-up del med_tagger return documents
def inference(): # summary and trained weights saved path MODEL_DIR = cfgs.log_dir class_names = list(label_name_dict.values()) # basic config rgb_val_dir = cfgs.rgb_val_dir mask_dir = cfgs.mask_dir yaml_dir = cfgs.yaml_dir inference_config = InferenceConfig() model = modellib.MaskRCNN(mode="inference", config=inference_config, model_dir=MODEL_DIR) model_path = os.path.join( cfgs.log_dir, 'multibox_roi20180808T0911/mask_rcnn_multibox_roi_0016.h5') model.load_weights(model_path, by_name=True) save_dir = cfgs.val_result_dir if not os.path.exists(save_dir): os.mkdir(save_dir) save_PR_dir = cfgs.val_result_pr_dir if not os.path.exists(save_PR_dir): os.mkdir(save_PR_dir) # test files = get_files(rgb_val_dir) APs = [] num_gts = 0 tps = 0 fps = 0 for file in files: image_path = os.path.join(rgb_val_dir, file) mask_path = os.path.join(mask_dir, file) yaml_path = os.path.join(yaml_dir, file.replace('png', 'yaml')) print("---+" * 30) print('inference {}...'.format(image_path)) gt_mask, gt_class_id = load_mask(mask_path, yaml_path) num_gts += len(gt_class_id) gt_bbox = extract_bboxes(gt_mask) original_image = load_image(image_path) t1 = time.time() # load gt results = model.detect([original_image], verbose=1) t2 = time.time() # detect r = results[0] if len(r['class_ids']) == 0: continue # Draw precision-recall curve AP, precisions, recalls, overlaps, tp, fp = compute_ap( gt_bbox, gt_class_id, gt_mask, r['rois'], r['class_ids'], r['scores'], r['masks']) tps += tp fps += fp APs.append(AP) plot_precision_recall(save_PR_dir, file, precisions, recalls, AP) t3 = time.time() # plot PR # rois = r['rois'] # if len(rois) == 0: # print('no any objects.') # continue # class_ids = r['class_ids'] # masks = r['masks'] # colors = random_colors(len(class_ids)) # img_mask = original_image # for i in range(len(class_ids)): # mask = masks[:, :, i] # img_mask = apply_mask(img_mask, mask, colors[i]) # cv2.imwrite(os.path.join(save_dir, image_name.replace(".png", "_mask.png")), img_mask) t4 = time.time() # plot mask print('>>>> detect time:%0.2f, plot PR time:%0.2f' % (t2 - t1, t3 - t2)) print('>>>> plot mask time:%0.2f, total time:%0.2f' % (t4 - t3, time.time() - t1)) print("mAP:{}".format(np.mean(APs))) print("recall:{}, mAP:{}".format(tps / num_gts, tps / (tps + fps)))
def parse(data_path, w_path, doc_token_span_w_path=None, ann_file_ext='ann', append_i_tag=True): create_directory(get_parent_directory(w_path)) if not file_exists(doc_token_span_w_path): print('{} not found, computing doc-level-span information dictionary'. format(doc_token_span_w_path)) documents_spans = get_real_token_span(data_path) # keep a copy of token spans to avoid re-computing it during training etc., write_pickle(documents_spans, doc_token_span_w_path) print('{} created'.format(doc_token_span_w_path)) else: documents_spans = read_pickle(doc_token_span_w_path) txt_files = get_files(data_path, ext='txt') documents_tokens = [] documents_tags = [] documents_pos = [] documents_ortho = [] documents_segment = [] documents_fname = [] for txt_path in txt_files: document_tokens = [] document_tags = [] document_pos = [] document_ortho = [] document_segment = [] document_fname = [] att_path = join_path( data_path, '{}.{}'.format(get_filename(txt_path), ann_file_ext)) entities_dict = parse_annotation_file(att_path) f_name = get_filename(txt_path) sentences = documents_spans[f_name] for sentence in sentences: sentence_tokens = [] sentence_tags = [] sentence_pos = [] sentence_ortho = [] sentence_segment = [] sentence_fname = [] for word_dictio in sentence: _, tag = is_token_an_entity(word_dictio, entities_dict) if append_i_tag: if tag != 'O': tag = 'I-{}'.format(tag) segment = 'O' if tag == 'O' else 'I-SEGMENT' sentence_tokens.append(word_dictio['word']) sentence_tags.append(tag) sentence_pos.append(word_dictio['pos']) sentence_ortho.append(get_ortho_feature(word_dictio['word'])) sentence_segment.append(segment) sentence_fname.append(f_name) document_tokens.append(sentence_tokens) document_tags.append(sentence_tags) document_pos.append(sentence_pos) document_ortho.append(sentence_ortho) document_segment.append(sentence_segment) document_fname.append(sentence_fname) documents_tokens.append(document_tokens) documents_tags.append(document_tags) documents_pos.append(document_pos) documents_ortho.append(document_ortho) documents_segment.append(document_segment) documents_fname.append(document_fname) write_bio(w_path, documents_tokens, documents_tags, documents_pos, documents_ortho, documents_segment, documents_fname)