def rename(_):
    proceed = input('Are you sure? (yes/no): ')
    if not proceed.lower() == 'yes':
        print('Aborted...')
        return

    solution_path = os.path.join('solutions')
    test_path = os.path.join('tests', 'solutions')

    print('Reading...')
    solution_files = get_files(solution_path)
    print('   %d solutions' % len(solution_files))
    test_files = get_files(test_path)
    print('   %d tests' % len(test_files))

    print('Refactoring...')
    print('   solutions:', end=' ')
    skipped_solutions = refactor(solution_path, solution_files)
    print('DONE')

    print('   tests:', end=' ')
    skipped_tests = refactor(test_path, test_files)
    print('DONE')

    print('Skipped files...')
    for skipped_file in itertools.chain(skipped_solutions, skipped_tests):
        print('   %s' % skipped_file)
def split_train_val(root, train_percent=0.8):
    train_dir = osp.join(root, '..', 'train')
    val_dir = osp.join(root, '..', 'val')
    class_paths = [osp.join(root, c) for c in os.listdir(root)]

    mkdir_if_not_exist = lambda x: os.makedirs(x) if not osp.isdir(x) else None
    random.seed(123)
    tq = tqdm(class_paths, total=len(class_paths), desc='Spliting Data')
    for cls_path in tq:
        class_name = os.path.basename(cls_path)
        tq.set_postfix(class_name=class_name)
        train_new_dir = osp.join(train_dir, class_name)
        val_new_dir = osp.join(val_dir, class_name)

        mkdir_if_not_exist(train_new_dir)
        mkdir_if_not_exist(val_new_dir)

        img_files = get_files(cls_path, extensions='.jpg')

        train_class_files = random.sample(img_files,
                                          int(len(img_files) * train_percent))

        for img_file in img_files:
            if img_file in train_class_files:
                shutil.copy(
                    img_file,
                    osp.join(train_new_dir, os.path.basename(img_file)))
            else:
                shutil.copy(img_file,
                            osp.join(val_new_dir, os.path.basename(img_file)))
        # tq.close()
    return train_dir, val_dir
def prepare_patient_images(patient_id, intermediate_crop=0):
    """
    Prepare patient images. Create the patient folder. Crop the patient image if it's necessary 
    and save it in a new directory.
    :param patient_id: the patient id.
    :param intermediate_crop: optional parameter
    :return: nothing
    """

    file_lst = []
    prefix = str(patient_id).rjust(4, '0')
    src_files = utils.get_files(settings.BASE_PREPROCESSEDIMAGES_DIR,
                                prefix + "*.png")

    patient_dir = utils.get_pred_patient_dir(patient_id)
    utils.create_dir_if_not_exists(patient_dir)
    patient_img_dir = utils.get_pred_patient_img_dir(patient_id)
    utils.create_dir_if_not_exists(patient_img_dir)
    utils.delete_files(patient_img_dir, "*.png")

    dummy = numpy.zeros((settings.TARGET_SIZE, settings.TARGET_SIZE))
    cv2.imwrite(patient_img_dir + "dummy_overlay.png", dummy)

    for src_path in src_files:
        file_name = ntpath.basename(src_path)
        org_img = cv2.imread(src_path, cv2.IMREAD_GRAYSCALE)
        cropped_img = utils.prepare_cropped_sax_image(
            org_img, clahe=True, intermediate_crop=intermediate_crop, rotate=0)

        cv2.imwrite(patient_img_dir + file_name, cropped_img)
        file_lst.append([file_name, "dummy_overlay.png"])

    with open(patient_img_dir + "pred.lst", "w") as f:
        writer = csv.writer(f, delimiter='\t')
        writer.writerows(file_lst)
Beispiel #4
0
 def test_returns_correct_result(self, tmpdir, files):
     create_files(str(tmpdir), files)
     refactor(str(tmpdir), files)
     expected = ['s003', 's002', 's001']
     assert get_files(str(tmpdir)) == expected
     for filename in expected:
         with open(os.path.join(str(tmpdir), filename)) as file:
             assert file.read() == filename
def get_wsl_cam(img_name, model='deepmil_multi'):
    wsl_dir = '/home/victor/PycharmProjects/active-learning-segmentation-baselines/wsl_cams'
    cams_dir = join(wsl_dir, '{}/npy/'.format(model))
    paths = get_paths(cams_dir, 'npy')
    file_names = get_files(cams_dir, 'npy')

    file_names = [f.replace('.npy', '') for f in file_names]
    wsl_cam_path = paths[file_names.index(img_name[0].replace('.bmp', ''))]

    return np.load(wsl_cam_path)
Beispiel #6
0
def find_last_model():
    # find the latest model.
    log_dir = cfgs.log_dir
    files = get_files(log_dir)
    files = [file for file in files if file.endswith('.h5')]
    if len(files) == 0:
        return None
    files = sorted(files)
    file_path = os.path.join(log_dir, files[-1])
    return file_path
Beispiel #7
0
def parse_test(data_path, w_path, doc_w_path=None, doc_token_span_w_path=None):
    if doc_token_span_w_path and not file_exists(doc_token_span_w_path):
        print('{} not found, computing doc-level-span information dictionary'.
              format(doc_token_span_w_path))
        documents_spans = get_real_token_span(data_path)
        # keep a copy of token spans to avoid re-computing it during training etc.,
        write_pickle(documents_spans, doc_token_span_w_path)
        print('{} created'.format(doc_token_span_w_path))
    else:
        documents_spans = read_pickle(doc_token_span_w_path)
    txt_files = get_files(data_path, ext='txt')
    documents_tokens = []
    documents_pos = []
    documents_ortho = []
    documents_fname = []
    for txt_path in txt_files:
        document_tokens = []
        document_pos = []
        document_ortho = []
        document_fname = []
        f_name = get_filename(txt_path)
        sentences = documents_spans[f_name]
        for sentence in sentences:
            sentence_tokens = []
            sentence_pos = []
            sentence_ortho = []
            sentence_fname = []
            for word_dictio in sentence:
                sentence_tokens.append(word_dictio['word'])
                sentence_pos.append(word_dictio['pos'])
                sentence_ortho.append(get_ortho_feature(word_dictio['word']))
                sentence_fname.append(f_name)
            document_tokens.append(sentence_tokens)
            document_pos.append(sentence_pos)
            document_ortho.append(sentence_ortho)
            document_fname.append(sentence_fname)
        documents_tokens.append(document_tokens)
        documents_pos.append(document_pos)
        documents_ortho.append(document_ortho)
        documents_fname.append(document_fname)
    write_bio_test(w_path,
                   documents_tokens,
                   documents_pos,
                   documents_ortho,
                   documents_fname,
                   sentence_level=True)
    if doc_w_path:
        write_bio_test(doc_w_path,
                       documents_tokens,
                       documents_pos,
                       documents_ortho,
                       documents_fname,
                       sentence_level=False)
Beispiel #8
0
def main():
    # Settings
    cfg = Config(config_file='../configs/train_action_recogn_pipeline.yaml')
    cfg_state = cfg[os.path.basename(__file__)]

    ## IO folders
    get_path = lambda x: os.path.join(*x) if isinstance(x,
                                                        (list, tuple)) else x
    skeletons_folder = get_path(cfg_state.input.skeletons_folder)
    skeletons_txt = get_path(cfg_state.output.skeletons_txt)

    ## Config for training
    idx_person = 0  # Only use the skeleton of the 0th person in each image
    idx_label = 3  # [1, 7, 54, "jump", "jump_03-02-12-34-01-795/00240.jpg"]
    classes = np.array(cfg.classes)

    # Get skeleton files
    files = utils.get_files(skeletons_folder, extensions='.txt')
    data_loader = skeleton_loader(files)

    all_skeletons = []
    labels_cnt = defaultdict(int)
    tq = tqdm(data_loader, total=len(files))
    for skeletons in tq:
        if not skeletons:
            continue
        skeleton = skeletons[idx_person]
        label = skeleton[idx_label]
        if label not in classes:
            continue
        labels_cnt[label] += 1
        all_skeletons.append(skeleton)

    with open(skeletons_txt, 'w') as f:
        json.dump(all_skeletons, f)

    print(
        tabulate([list(labels_cnt.values())], list(labels_cnt.keys()), 'grid'))
    print(
        f'[INFO] Total numbers of combined skeletons: "{len(all_skeletons)}"')
Beispiel #9
0
        data_loader = DataLoader(rgb_demo_dataset, batch_size=32, shuffle=True)
        pred_dir = os.path.join(args.demo_img_folder, "pred")
        transform_dir = os.path.join(args.demo_img_folder, "transform")
        create_directory(pred_dir)
        create_directory(transform_dir)
        for i, sample in enumerate(tqdm(data_loader)):
            image, target, names = sample['image'], sample['label'], sample['id']
            imgs, imgs_np, masks, flow = inference(image, model)
            save_image(flow, os.path.join(pred_dir, "flow.png"))
            for i in range(len(imgs)):
                masks[i].save(os.path.join(pred_dir, names[i]))
                imgs[i].save(os.path.join(transform_dir, names[i]))

        # create the video
        # images = sorted(get_files(transform_dir))
        images = sorted(get_files(transform_dir), key=lambda x: int(x.split(".")[0]))
        # images = sorted(get_files(transform_dir), key=lambda x: int(x.split(".")[0][14:]))
        print(images)
        fig = None
        with VideoWriter(pred_dir, name="test_imgs", fps=20) as video_writer:
            for img_name in images:
                img = np.array(Image.open(os.path.join(transform_dir, img_name)))
                pred = np.array(Image.open(os.path.join(pred_dir, img_name)))

                fig = vis_segmentation(img, pred, fig)
                data = fig2img(fig)
                video_writer.write(data)


    stream = None
    if args.demo_video_path is not None:
        sys.exit(2)

    for opt, arg in opts:
        if opt in ("-h", "--help"):
            logging.info(config.hdf5_creator_help_message)
            print(config.hdf5_creator_help_message)
            sys.exit()

    # Initialize main variables
    p_picks = []
    s_picks = []
    noise_picks = []

    # Get P picks
    if config.p_picks_dir_per_event:
        files = utils.get_files(config.p_picks_path, 1, 1, r'\.P')
    else:
        files = utils.get_files(config.p_picks_path, 0, 0, r'\.P')

    for file_list in files:
        pick_list = []
        skip = False
        index = -1
        for file in file_list:
            index += 1
            if index >= 3:
                break

            if config.slice_offset_start == 0 and config.slice_offset_end == 0:
                time_shift = 0
            else:
Beispiel #11
0
def inference():
    # summary and trained weights saved path
    MODEL_DIR = cfgs.log_dir
    class_names = list(label_name_dict.values())
    # basic config
    rgb_val_dir = cfgs.rgb_val_dir
    inference_config = InferenceConfig()
    model = modellib.MaskRCNN(mode="inference",
                              config=inference_config,
                              model_dir=MODEL_DIR)
    model_path = os.path.join(
        cfgs.log_dir,
        'multibox_roi_border20180809T0232/mask_rcnn_multibox_roi_border_0100.h5'
    )
    model.load_weights(model_path, by_name=True)
    save_dir = cfgs.val_result_dir
    if not os.path.exists(save_dir):
        os.mkdir(save_dir)
    # test
    files = get_files(rgb_val_dir)
    for file in files:
        print("---+" * 30)
        print('inference {}...'.format(file))
        image_path = os.path.join(rgb_val_dir, file)
        img = cv2.imread(image_path)
        h, w = img.shape[0], img.shape[1]
        t0 = time.time()
        original_image = skimage.io.imread(image_path)

        results = model.detect([original_image], verbose=1)

        # Run RPN sub-graph
        # pillar = model.keras_model.get_layer("ROI").output  # node to start searching from
        rpn = model.run_graph(
            [original_image],
            [
                ("rpn_class", model.keras_model.get_layer("rpn_class").output),
                # ("pre_nms_anchors", model.ancestor(pillar, "ROI/pre_nms_anchors:0")),
                # ("refined_anchors", model.ancestor(pillar, "ROI/refined_anchors:0")),
                # ("refined_anchors_clipped", model.ancestor(pillar, "ROI/refined_anchors_clipped:0")),
                # ("post_nms_anchor_ix", model.ancestor(pillar, "ROI/rpn_non_max_suppression:0")),
                ("proposals", model.keras_model.get_layer("ROI").output)
            ])
        proposals = rpn['proposals']
        limits = min(100, len(proposals[0]))
        # limits = len(proposals[0])
        proposals = rpn['proposals'][0, :limits, :] * np.array([h, w, h, w])
        print('proposals shape:', proposals.shape)
        img_rpn = copy.deepcopy(img)
        colors_rpn = random_colors(limits)
        for i in range(len(proposals)):
            proposal = proposals[i]
            color = colors_rpn[i]
            color = tuple([v * 255 for v in color])
            x1, y1, x2, y2 = tuple([int(val) for val in proposal])
            cv2.rectangle(img_rpn, (x1, y1), (x2, y2), color, 1)
        rpn_img_path = os.path.join(save_dir, file.replace(".png", "_rpn.png"))
        cv2.imwrite(rpn_img_path, img_rpn)
        t1 = time.time()
        r = results[0]
        rois = r['rois']
        if len(rois) == 0:
            print('no any objects.')
            continue
        class_ids = r['class_ids']
        masks = r['masks']
        #         index = np.where((class_ids==1.)|(class_ids==2.))[0]
        index = np.where((class_ids == 1.) | (class_ids == 2.)
                         | (class_ids == 6.))[0]
        if len(index) == 0:
            continue

        rois = rois[index, :]
        class_ids = class_ids[index]
        masks = masks[:, :, index]
        colors = random_colors(len(class_ids))
        img_mask = copy.deepcopy(img)
        for i in range(len(class_ids)):
            mask = masks[:, :, i]
            img_mask = apply_mask(img_mask, mask, colors[i])
            mask_val_index = np.where(mask == True)
            coords = list(
                map(lambda y, x: [x, y], mask_val_index[0], mask_val_index[1]))
            try:
                box_list = get_min_area_rectangle(coords, mode=1)
            except Exception:
                print(np.max(mask))
                continue
            left_bottom_coord = (int(box_list[0][0] + box_list[2][0]) // 2,
                                 int(box_list[0][1] + box_list[2][1]) // 2)
            cv2.putText(img,
                        label_name_dict[int(class_ids[i])],
                        left_bottom_coord,
                        fontFace=cv2.FONT_HERSHEY_COMPLEX_SMALL,
                        fontScale=1,
                        color=(0, 0, 255),
                        thickness=1)
            cv2.putText(img_mask,
                        label_name_dict[int(class_ids[i])],
                        left_bottom_coord,
                        fontFace=cv2.FONT_HERSHEY_COMPLEX_SMALL,
                        fontScale=1,
                        color=(0, 0, 255),
                        thickness=1)

            for ii in range(len(box_list)):
                x1 = int(box_list[ii][0])
                y1 = int(box_list[ii][1])
                x2 = int(box_list[(ii + 1) % 4][0])
                y2 = int(box_list[(ii + 1) % 4][1])
                cv2.line(img, (x1, y1), (x2, y2), (0, 255, 0), 1)
        cv2.imwrite(os.path.join(save_dir, file.replace(".png", "_mask.png")),
                    img_mask)
        cv2.imwrite(os.path.join(save_dir, file.replace(".png", ".png")), img)
        print('>>>> detect time:{}, post processing time:{}, total time:{}'.
              format(t1 - t0,
                     time.time() - t1,
                     time.time() - t0))
def predict_overlays_patient(patient_id, save_transparents=False):
    """
    Predict the left ventricle with neural network.
    :param patient_id: patient id
    :param save_transparents: boolean value
    :return: nothing
    """

    segmenter = LVSegmentation()

    src_image_dir = utils.get_pred_patient_img_dir(patient_id)
    overlay_dir = utils.get_pred_patient_overlay_dir(patient_id)
    utils.delete_files(overlay_dir, "*.png")
    transparent_overlay_dir = utils.get_pred_patient_transparent_overlay_dir(
        patient_id)
    utils.delete_files(transparent_overlay_dir, "*.png")

    num_lines = sum(1 for l in open(src_image_dir + "pred.lst"))
    batch_size = 1

    for try_size in [2, 3, 4, 5]:
        if num_lines % try_size == 0:
            batch_size = try_size

    prefix = str(patient_id).rjust(4, '0')
    src_files = utils.get_files(src_image_dir, prefix + "*.png")

    for i in range(0, num_lines, batch_size):
        src_files_batch = src_files[i:i + batch_size]
        images = []

        for src_file in src_files_batch:
            image = cv2.imread(src_file, cv2.IMREAD_GRAYSCALE)
            images.append(image)

        images = np.array(images)
        images = np.float32(images)

        original_images = images

        images -= np.mean(images, dtype=np.float32)
        images /= np.std(images, dtype=np.float32)

        images = np.reshape(images, (-1, 224, 224, 1))

        predictions = segmenter.predict(images)

        for j in range(len(predictions)):
            file_name = ntpath.basename(src_files_batch[j])
            image = original_images[j]
            prediction = predictions[j]

            image[prediction == 1] = 255
            image[prediction == 0] = 0

            cv2.imwrite(overlay_dir + file_name, image)

            if save_transparents:
                channels = cv2.split(image)
                empty = numpy.zeros(channels[0].shape, dtype=numpy.float32)
                alpha = channels[0].copy()
                alpha[alpha == 255] = 75
                channels = (channels[0], channels[0], empty, alpha)

                transparent_overlay = cv2.merge(channels)
                cv2.imwrite(transparent_overlay_dir + file_name,
                            transparent_overlay)

    segmenter.session.close()
Beispiel #13
0
 def test_ignore_subdirectories(self, tmpdir, files):
     create_files(str(tmpdir), files)
     create_dirs(str(tmpdir), ['dir1', 'dir2'])
     assert get_files(str(tmpdir)) == files
Beispiel #14
0
 def test_returns_correct_result(self, tmpdir, files):
     create_files(str(tmpdir), files)
     assert get_files(str(tmpdir)) == files
Beispiel #15
0
from frames.main import Main
from frames.login import Login
import wx
from utils.utils import get_storage_path, get_files, load_object
from client.dht.config import regions, category_names

storage_path = get_storage_path()

app = wx.App()

if 'login.pkl' in get_files(storage_path):
    login_data = load_object("%s/login.pkl" % storage_path)
    Main(None, -1, 'Raf Chef', login_data, category_names, storage_path)
else:

    Login(None, -1, 'Connect', regions, category_names, storage_path)

app.MainLoop()
Beispiel #16
0
def get_real_token_span(directory):
    '''
    :param directory: path of raw text files
    :return: documents :: dictionary --> key: document name, values = [[[{'word': XX, 'start': X, 'end': X}]]]
    '''
    files = get_files(directory, ext='txt')
    documents = {}
    med_tagger = Med_Tagger()  # Starts a docker image in background
    file_counter = 0
    print('get_real_token_span::{} files to process'.format(len(files)))
    for file in files:
        file_counter += 1
        if file_counter % 100 == 0:
            print('.')
        with codecs.open(file, 'r', encoding='utf-8') as f:
            text = f.read()
        tokens_space_offsets = []
        text_space_sp = text.split(' ')
        off_set = 0
        for token in text_space_sp:
            token_offset = {
                'token': token,
                'start': off_set,
                'end': off_set + len(token)
            }
            tokens_space_offsets.append(token_offset)
            off_set += len(token)
            off_set += 1
        # sanity check if captured token indexes are correct across all the tokens
        for t_offset in tokens_space_offsets:
            token = t_offset['token']
            off_set_st = t_offset['start']
            off_set_end = t_offset['end']
            assert token == text[off_set_st:off_set_end]
        tokens_space_offsets_ptr = 0
        last_inside_token_end_idx = None
        sentences_in_doc = []
        for line in codecs.open(file, 'r', encoding='utf-8'):
            if line.strip():
                sentences_tokenized, sentences_pos = tokenize(line.strip(),
                                                              med_tagger,
                                                              return_pos=True)
                assert sentences_tokenized is not None and sentences_pos is not None
                for sentence_tokenized, sentence_pos in zip(
                        sentences_tokenized, sentences_pos):
                    words_in_sentence = []
                    # sentence_tokenized = web_tokenizer(sent)
                    for word, pos in zip(sentence_tokenized, sentence_pos):
                        token = tokens_space_offsets[tokens_space_offsets_ptr][
                            'token']
                        start = tokens_space_offsets[tokens_space_offsets_ptr][
                            'start']
                        end = tokens_space_offsets[tokens_space_offsets_ptr][
                            'end']
                        if word == token:
                            tokens_space_offsets_ptr += 1
                            last_inside_token_end_idx = None
                        elif word in token:
                            if not last_inside_token_end_idx:
                                # remove \n from the start of text
                                adjust_span(text, start, end, update_end=False)
                                end = start + len(word)
                            else:
                                start = last_inside_token_end_idx
                                end = start + len(word)
                                start, end = adjust_span(text, start, end)

                            start, end = adjust_span(text, start, end)
                            end_of_token_space = tokens_space_offsets[
                                tokens_space_offsets_ptr]['end']
                            # is it the end of current word, if yes increment the pointer
                            if end == end_of_token_space:
                                tokens_space_offsets_ptr += 1
                                last_inside_token_end_idx = None
                            else:
                                last_inside_token_end_idx = end
                            # part of ugly checks (pharmaco::->test)
                            if text[end:end_of_token_space] == '\x85':
                                tokens_space_offsets_ptr += 1
                                last_inside_token_end_idx = None
                        # final sanity test
                        assert word == text[start: end], 'word={} != text={}\ntokens_space_offsets_ptr:{}' \
                                                         '\nsent_tokenized: {}\ndocument: {}'.\
                            format(word, text[start: end], tokens_space_offsets_ptr,
                                   sentence_tokenized, get_filename(file))
                        words_dictio = {
                            'word': word,
                            'start': start,
                            'end': end,
                            'pos': pos
                        }
                        words_in_sentence.append(words_dictio)

                    sentences_in_doc.append(words_in_sentence)

        f_name = get_filename(file)
        documents[f_name] = sentences_in_doc
    # clean-up
    del med_tagger
    return documents
Beispiel #17
0
def inference():
    # summary and trained weights saved path
    MODEL_DIR = cfgs.log_dir
    class_names = list(label_name_dict.values())
    # basic config
    rgb_val_dir = cfgs.rgb_val_dir
    mask_dir = cfgs.mask_dir
    yaml_dir = cfgs.yaml_dir

    inference_config = InferenceConfig()
    model = modellib.MaskRCNN(mode="inference",
                              config=inference_config,
                              model_dir=MODEL_DIR)
    model_path = os.path.join(
        cfgs.log_dir,
        'multibox_roi20180808T0911/mask_rcnn_multibox_roi_0016.h5')
    model.load_weights(model_path, by_name=True)
    save_dir = cfgs.val_result_dir
    if not os.path.exists(save_dir):
        os.mkdir(save_dir)
    save_PR_dir = cfgs.val_result_pr_dir
    if not os.path.exists(save_PR_dir):
        os.mkdir(save_PR_dir)
    # test
    files = get_files(rgb_val_dir)
    APs = []
    num_gts = 0
    tps = 0
    fps = 0
    for file in files:
        image_path = os.path.join(rgb_val_dir, file)
        mask_path = os.path.join(mask_dir, file)
        yaml_path = os.path.join(yaml_dir, file.replace('png', 'yaml'))
        print("---+" * 30)
        print('inference {}...'.format(image_path))
        gt_mask, gt_class_id = load_mask(mask_path, yaml_path)
        num_gts += len(gt_class_id)
        gt_bbox = extract_bboxes(gt_mask)
        original_image = load_image(image_path)
        t1 = time.time()  # load gt
        results = model.detect([original_image], verbose=1)
        t2 = time.time()  # detect
        r = results[0]
        if len(r['class_ids']) == 0:
            continue
        # Draw precision-recall curve
        AP, precisions, recalls, overlaps, tp, fp = compute_ap(
            gt_bbox, gt_class_id, gt_mask, r['rois'], r['class_ids'],
            r['scores'], r['masks'])
        tps += tp
        fps += fp
        APs.append(AP)
        plot_precision_recall(save_PR_dir, file, precisions, recalls, AP)
        t3 = time.time()  # plot PR
        # rois = r['rois']
        # if len(rois) == 0:
        #     print('no any objects.')
        #     continue
        # class_ids = r['class_ids']
        # masks = r['masks']
        # colors = random_colors(len(class_ids))
        # img_mask = original_image
        # for i in range(len(class_ids)):
        #     mask = masks[:, :, i]
        #     img_mask = apply_mask(img_mask, mask, colors[i])
        # cv2.imwrite(os.path.join(save_dir, image_name.replace(".png", "_mask.png")), img_mask)
        t4 = time.time()  # plot mask
        print('>>>> detect time:%0.2f, plot PR time:%0.2f' %
              (t2 - t1, t3 - t2))
        print('>>>> plot mask time:%0.2f, total time:%0.2f' %
              (t4 - t3, time.time() - t1))
    print("mAP:{}".format(np.mean(APs)))
    print("recall:{}, mAP:{}".format(tps / num_gts, tps / (tps + fps)))
Beispiel #18
0
def parse(data_path,
          w_path,
          doc_token_span_w_path=None,
          ann_file_ext='ann',
          append_i_tag=True):
    create_directory(get_parent_directory(w_path))
    if not file_exists(doc_token_span_w_path):
        print('{} not found, computing doc-level-span information dictionary'.
              format(doc_token_span_w_path))
        documents_spans = get_real_token_span(data_path)
        # keep a copy of token spans to avoid re-computing it during training etc.,
        write_pickle(documents_spans, doc_token_span_w_path)
        print('{} created'.format(doc_token_span_w_path))
    else:
        documents_spans = read_pickle(doc_token_span_w_path)
    txt_files = get_files(data_path, ext='txt')
    documents_tokens = []
    documents_tags = []
    documents_pos = []
    documents_ortho = []
    documents_segment = []
    documents_fname = []
    for txt_path in txt_files:
        document_tokens = []
        document_tags = []
        document_pos = []
        document_ortho = []
        document_segment = []
        document_fname = []
        att_path = join_path(
            data_path, '{}.{}'.format(get_filename(txt_path), ann_file_ext))
        entities_dict = parse_annotation_file(att_path)
        f_name = get_filename(txt_path)
        sentences = documents_spans[f_name]
        for sentence in sentences:
            sentence_tokens = []
            sentence_tags = []
            sentence_pos = []
            sentence_ortho = []
            sentence_segment = []
            sentence_fname = []
            for word_dictio in sentence:
                _, tag = is_token_an_entity(word_dictio, entities_dict)
                if append_i_tag:
                    if tag != 'O':
                        tag = 'I-{}'.format(tag)
                segment = 'O' if tag == 'O' else 'I-SEGMENT'
                sentence_tokens.append(word_dictio['word'])
                sentence_tags.append(tag)
                sentence_pos.append(word_dictio['pos'])
                sentence_ortho.append(get_ortho_feature(word_dictio['word']))
                sentence_segment.append(segment)
                sentence_fname.append(f_name)
            document_tokens.append(sentence_tokens)
            document_tags.append(sentence_tags)
            document_pos.append(sentence_pos)
            document_ortho.append(sentence_ortho)
            document_segment.append(sentence_segment)
            document_fname.append(sentence_fname)
        documents_tokens.append(document_tokens)
        documents_tags.append(document_tags)
        documents_pos.append(document_pos)
        documents_ortho.append(document_ortho)
        documents_segment.append(document_segment)
        documents_fname.append(document_fname)
    write_bio(w_path, documents_tokens, documents_tags, documents_pos,
              documents_ortho, documents_segment, documents_fname)