コード例 #1
0
def move_to_corresponding_label_classification():
    dataset_dir = QA_TRAIN_TXT

    with open(dataset_dir) as f:
        lines = f.readlines()

    df = pd.DataFrame(columns=['image', 'modality', 'plane', 'organ'])
    for index in tqdm(range(len(lines))):
        line = lines[index]
        df = get_class_image_model(df, line)

    print(df)

    n_group = len(set(zip(df['plane'], df['organ'], df['modality'])))
    print(n_group)

    df['label'] = df.plane.map(str) + "_" + df.organ.map(str)

    for index, row in df.iterrows():
        image = row["image"]
        if image == "synpic47258":
            a = 2
        label = row["label"]
        label_dir = os.path.join(CLASSIFICATION_DIR, label)
        path_utils.make_dir(label_dir)
        in_path = os.path.join(PREPROCESSED_DIR, "train",
                               "{}.jpg".format(image))
        out_path = os.path.join(label_dir, "{}.jpg".format(image))
        shutil.copy(in_path, out_path)
コード例 #2
0
def generate_images_with_black_patches(path, path_out):
    img_dirs = glob.glob(os.path.join(path, "*"))
    print_utils.print_list(img_dirs)
    paths_utils.make_dir(path_out)

    def patch_black(im_in, index_list):
        I = np.asarray(im_in)
        I.setflags(write=1)
        x, y, dx, dy = index_list
        I[x:x + dx, y:y + dy, :] = 0
        im = PIL.Image.fromarray(np.uint8(I))
        return im

    for path_img in img_dirs:
        im_in = Image.open(path_img)
        im_name = paths_utils.get_filename_without_extension(path_img)
        list_index_list = [
            [2 * 32, 2 * 32, 32, 32],
            [2 * 32, 5 * 32, 32, 32],
            [3 * 32, 3 * 32, 64, 64],
            [5 * 32, 2 * 32, 32, 32],
            [5 * 32, 5 * 32, 32, 32],
        ]
        for index_list in list_index_list:
            im_out = patch_black(im_in, index_list)
            im_name_out = "{}_b-{}-{}-{}-{}.jpg".format(
                im_name, str(index_list[0]), str(index_list[1]),
                str(index_list[2]), str(index_list[3]))
            im_name_out = os.path.join(path_out, im_name_out)
            im_out.save(im_name_out)
コード例 #3
0
def prepare_image_model(overwrite=False, P=0.7):
    for breast in LIST_BREAST_CLASS:
        folder_in = DATASETS_PHOTOS_DIR + breast
        img_dirs = glob.glob(os.path.join(folder_in, "*.tif"))

        num_images = len(img_dirs)
        num_train = int(round(len(img_dirs)*P))
        count_train = 0

        for index, path_in in enumerate(img_dirs):
            filename = path_utils.get_filename_without_extension(path_in)

            if count_train < num_train:
                folder_out = "{}train/{}".format(
                    PREPROCESSED_IMAGE_PHOTOS_DIR, breast)
                count_train += 1
            else:
                folder_out = "{}val/{}".format(
                    PREPROCESSED_IMAGE_PHOTOS_DIR, breast)

            path_utils.make_dir(folder_out)
            path_out = folder_out + "/{}.jpg".format(filename)

            if not os.path.exists(path_out) or overwrite:
                print(">> processing {}/{}".format(index+1, len(img_dirs)))
                normalize(path_in, path_out=path_out, is_debug=False,
                          is_save=True, is_resize=True)
            else:
                print("skip {}/{}".format(index+1, len(img_dirs)))
コード例 #4
0
ファイル: ensemble.py プロジェクト: vuhoangminh/vqa_medical
def main():
    dict_prob = {}
    df = pd.read_csv(PROCESSED_QA_PER_QUESTION_PATH)
    # for key, value in DICT_SCORE.items():
    #     prob, val_loader = compute_prob_one_model(model_name=key)
    #     dict_prob[key] = prob.detach()

    # for method in ["avg", "weighted"]:
    #     for ensem in ["best", "globalbilinear", "skipthoughts", "bert3072", "bert768", "all"]:
    #         sub_path = "{}ensemble/valid/{}_{}.json".format(
    #             SUB_DIR, ensem, method)
    #         sub = DICT_METHOD[ensem]
    #         acc, bleu = ensemble(dict_prob, val_loader,
    #                              sub, sub_path, method=method,
    #                              vqa_trainsplit="train")

    #         print(method, ensem, acc, bleu)

    for key, value in DICT_SCORE.items():
        prob, test_loader = compute_prob_one_model(model_name=key.replace(
            "_train_", "_trainval_"),
                                                   vqa_trainsplit="trainval")
        dict_prob[key] = prob.detach()
        # del prob, test_loader

    for method in ["avg", "weighted"]:
        for ensem in ["globalbilinear", "skipthoughts"]:
            sub_path = "{}ensemble/test/{}_{}.txt".format(
                SUB_DIR, ensem, method)
            path_utils.make_dir("{}ensemble/test".format(SUB_DIR))
            sub = DICT_METHOD[ensem]
            results = ensemble(dict_prob,
                               test_loader,
                               sub,
                               sub_path,
                               method=method,
                               vqa_trainsplit="trainval")

            with open(TEST_DIR, encoding='UTF-8') as f:
                lines = f.readlines()

            f = open(sub_path, 'w')
            count = 0
            for line in lines:
                file_id = line.split('\n')[0]
                row_info = get_info(df, file_id + ".jpg")
                question_id = row_info["question_id"]
                f.write('{}|{}\n'.format(file_id, results[count]["answer"]))
                count += 1
            f.close(
            )  # you can omit in most cases as the destructor will call it
コード例 #5
0
def move_files(P=0.7):
    classification_dict_tool = {k: [] for k in list_tool}
    segmentation_dict_tool = {k: [] for k in ["train", "val"]}
    
    # read files
    for tool in classification_dict_tool.keys():
        path_write = "{}image_{}.txt".format(IMAGE_SET_DIR, tool)
        temp = io_utils.read_file_to_list(path_write)
        my_list = list()
        for i in range(len(temp)):
            my_list.append(temp[i].strip('\n'))
        classification_dict_tool[tool] = my_list
        
    for dataset in segmentation_dict_tool.keys():
        path_write = "{}{}.txt".format(IMAGE_SET_DIR, dataset)
        temp = io_utils.read_file_to_list(path_write)
        my_list = list()
        for i in range(len(temp)):
            my_list.append(temp[i].strip('\n'))
        segmentation_dict_tool[dataset] = my_list
    
    # move images
    for tool in classification_dict_tool.keys():
        filenames = classification_dict_tool[tool]

        num_images = len(filenames)
        num_train = int(round(len(filenames)*P))
        count_train = 0

        for filename in filenames:
            path_in = "{}{}.jpg".format(PREPROCESSED_IMAGE_DIR, filename)

            if count_train < num_train:
                dir_out = "{}train/{}/".format(CLASSIFICATION_IMAGE_DIR, tool)
                count_train += 1
            else:
                dir_out = "{}val/{}/".format(CLASSIFICATION_IMAGE_DIR, tool)

            path_utils.make_dir(dir_out)
            path_out = "{}{}.jpg".format(dir_out, filename)
            shutil.copyfile(path_in, path_out)

    for dataset in segmentation_dict_tool.keys():
        filenames = segmentation_dict_tool[dataset]
        dir_out = "{}raw/{}/".format(SEGMENTATION_IMAGE_DIR, dataset)
        path_utils.make_dir(dir_out)
        for filename in filenames:
            path_in = "{}{}.jpg".format(PREPROCESSED_IMAGE_DIR, filename)
            path_out = "{}{}.jpg".format(dir_out, filename)
            shutil.copyfile(path_in, path_out)
コード例 #6
0
def prepare_image_model(overwrite=False, is_debug=False):
    folder_in = DATASETS_PHOTOS_DIR
    folder_out = PREPROCESSED_IMAGE_DIR
    path_utils.make_dir(folder_out)
    img_dirs = glob.glob(os.path.join(folder_in, "*.jpg"))
    for index, path_in in enumerate(img_dirs):
        filename = path_utils.get_filename_without_extension(path_in)
        path_out = folder_out + "/{}.jpg".format(filename)
        if not os.path.exists(path_out) or overwrite:
            print(">> processing {}/{}".format(index+1, len(img_dirs)))
            normalize(path_in, path_out=path_out, is_debug=is_debug,
                      is_save=True, is_resize=True, is_normalize=False)
        else:
            print("skip {}/{}".format(index+1, len(img_dirs)))
コード例 #7
0
def main(dataset="breast"):
    if dataset == "breast":
        dataset_dict = BREAST
    elif dataset == "idrid":
        dataset_dict = IDRID
    elif dataset == "tools":
        dataset_dict = TOOLS
    elif dataset == "vqa":
        dataset_dict = VQA

    src_dir = "temp"
    dst_dir = "temp/sup"
    paths_utils.make_dir(dst_dir)
    wrt_dir = "figures/sup"

    str = []
    for key, value in dataset_dict.items():
        img_name = key
        question_str = value
        question_str = question_str.replace(' ', '-')
        noatt_name = "{}_noatt_question_{}.jpg".format(img_name,
                                                       question_str)
        att_name = "{}_att_question_{}.jpg".format(img_name,
                                                   question_str)
        cnn_name = "{}_cnn.jpg".format(img_name)
        in_name = "{}_in.jpg".format(img_name)
        occ_name = "{}_{}_w_{:0}_s_{:0}_color.jpg".format(
            img_name, value.replace(' ', '_'), 32, 2)

        str.append("Question: {} - Answer: {}\n\n".format(value, " "))

        for name in [in_name, cnn_name, att_name, noatt_name, occ_name]:
            path = find_in_dir(name, src_dir)
            try:
                shutil.copy(path, os.path.join(dst_dir, name))
                str.append(
                    "\\includegraphics[width=\\subfigsize]{figures/sup/" + name + "}\n")
            except:
                str.append(
                    "\\includegraphics[width=\\subfigsize]{figures/sup/" + in_name + "}\n")
        str.append("\n")

    file1 = open("temp/myfile.txt", "a+")
    file1.writelines(str)
    file1.close()
コード例 #8
0
def move_files():
    # read files
    segmentation_dict_tool = {k: [] for k in ["train", "val"]}
    for dataset in segmentation_dict_tool.keys():
        path_write = "{}{}.txt".format(IMAGE_SET_DIR, dataset)
        temp = io_utils.read_file_to_list(path_write)
        my_list = list()
        for i in range(len(temp)):
            my_list.append(temp[i].strip('\n'))
        segmentation_dict_tool[dataset] = my_list

    for dataset in segmentation_dict_tool.keys():
        filenames = segmentation_dict_tool[dataset]
        dir_out = "{}{}/".format(PREPROCESSED_IMAGE_WSI_RAW_DIR, dataset)
        path_utils.make_dir(dir_out)
        for filename in filenames:
            path_in = "{}{}.png".format(
                PREPROCESSED_IMAGE_WSI_PATCH_DIR, filename)
            path_out = "{}{}.png".format(dir_out, filename)
            shutil.copyfile(path_in, path_out)
コード例 #9
0
def main(overwrite=False):
    if overwrite and os.path.exists(PREPROCESSED_IMAGE_DIR):
        path_utils.delete_dir(PREPROCESSED_IMAGE_DIR)
    if overwrite and os.path.exists(CLASSIFICATION_IMAGE_DIR):
        path_utils.delete_dir(CLASSIFICATION_IMAGE_DIR)
    if overwrite and os.path.exists(SEGMENTATION_IMAGE_DIR):
        path_utils.delete_dir(SEGMENTATION_IMAGE_DIR)

    path_utils.make_dir(PREPROCESSED_IMAGE_DIR)
    path_utils.make_dir(CLASSIFICATION_IMAGE_DIR)
    path_utils.make_dir(SEGMENTATION_IMAGE_DIR)
    path_utils.make_dir(IMAGE_SET_DIR)

    print_utils.print_section("image model")
    # prepare_image_model(overwrite=False, is_debug=False)
    print_utils.print_section("split images")
    # split_images_for_image_model_and_vqa()
    print_utils.print_section("move files")
    move_files()
コード例 #10
0
def process_occlusion(path, dataset="breast"):
    # global args
    args = parser.parse_args()

    LIST_QUESTION_BREAST = [
        "how many classes are there",
        "is normal larger than benign",
    ]

    LIST_QUESTION_TOOLS = [
        "is grasper in 0_0_32_32 location",
        # "which tool has pointed tip on the left of the image",
        # "how many tools are there",
    ]

    LIST_QUESTION_IDRID = [
        "is there haemorrhages in the fundus",
        "is there soft exudates in the fundus",
    ]

    if dataset == "breast":
        list_question = LIST_QUESTION_BREAST
    elif dataset == "tools":
        list_question = LIST_QUESTION_TOOLS
    elif dataset == "idrid":
        list_question = LIST_QUESTION_IDRID

    img_dirs = glob.glob(os.path.join(path, "*"))

    args = update_args(
        args, vqa_model="minhmul_att_train_2048", dataset=dataset)

    for question_str in list_question:
        for path_img in img_dirs:
            print(
                "\n\n=========================================================================")
            print("{} - {}".format(question_str, path_img))
            ans_gt = get_answer(dataset, path_img, question_str)

            if ans_gt is None:
                continue
            else:
                input_size = 256
                step = 2
                windows_size = 32

                dst_dir = "temp/occlusion"
                paths_utils.make_dir(dst_dir)
                out_color_path = "{}/{}_{}_w_{:0}_s_{:0}_color.jpg".format(dst_dir,
                                                                           paths_utils.get_filename_without_extension(
                                                                               path_img),
                                                                           question_str.replace(
                                                                               ' ', '_'),
                                                                           windows_size,
                                                                           step
                                                                           )
                out_gray_path = "{}/{}_{}_w_{:0}_s_{:0}_gray.jpg".format(dst_dir,
                                                                         paths_utils.get_filename_without_extension(
                                                                             path_img),
                                                                         question_str.replace(
                                                                             ' ', '_'),
                                                                         windows_size,
                                                                         step
                                                                         )
                out_avg_path = "{}/{}_{}_w_{:0}_s_{:0}_avg.jpg".format(dst_dir,
                                                                       paths_utils.get_filename_without_extension(
                                                                           path_img),
                                                                       question_str.replace(
                                                                           ' ', '_'),
                                                                       windows_size,
                                                                       step
                                                                       )

                if not os.path.exists(out_color_path):

                    visual_PIL = Image.open(path_img)
                    visual_PIL = visual_PIL.resize((input_size,input_size), Image.ANTIALIAS)
                    indices = np.asarray(
                        np.mgrid[0:input_size-windows_size+1:step, 0:input_size-windows_size+1:step].reshape(2, -1).T, dtype=np.int)

                    cnn, model, trainset = initialize(args, dataset=dataset)

                    image_occlusion = np.zeros((input_size, input_size))
                    image_occlusion_times = np.zeros((input_size, input_size))

                    anw_without_black_patch = process_one_batch_of_occlusion(args,
                                                                             cnn,
                                                                             model,
                                                                             trainset,
                                                                             visual_PIL,
                                                                             question_str,
                                                                             box=None,
                                                                             dataset=dataset)
                    score_without_black_patch = anw_without_black_patch.get("val")[
                        anw_without_black_patch.get("ans").index(ans_gt)]

                    for i in range(indices.shape[0]):
                        print_utils.print_tqdm(i, indices.shape[0])
                        box = [indices[i][0], indices[i][0]+windows_size -
                               1, indices[i][1], indices[i][1]+windows_size-1]
                        # print(box)
                        ans = process_one_batch_of_occlusion(args,
                                                             cnn,
                                                             model,
                                                             trainset,
                                                             visual_PIL,
                                                             question_str,
                                                             box,
                                                             dataset=dataset,
                                                             is_print=False)
                        try:
                            score = ans.get("val")[
                                ans.get("ans").index(ans_gt)]
                        except:
                            score = 0

                        if score != 0:
                            score_occ = (
                                score_without_black_patch.item()-score.item())/score_without_black_patch.item()
                            image_occlusion[box[0]:box[1],
                                            box[2]:box[3]] += score_occ
                            image_occlusion_times[box[0]:box[1],
                                                  box[2]:box[3]] += 1

                    save_image(visual_PIL, image_occlusion, image_occlusion_times,
                               out_color_path, out_gray_path, out_avg_path,
                               is_show=True)
コード例 #11
0
def get_gradcam_from_image_model(path_img, cnn, dataset, finalconv_name="layer4"):

    cnn.eval()

    # hook the feature extractor
    features_blobs = []

    def hook_feature(module, input, output):
        features_blobs.append(output.data.cpu().numpy())

    cnn._modules.get(finalconv_name).register_forward_hook(hook_feature)

    # get the softmax weight
    params = list(cnn.parameters())
    weight_softmax = np.squeeze(params[-2].data.cpu().numpy())

    normalize = transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
    preprocess = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        normalize
    ])

    img_name = paths_utils.get_filename_without_extension(path_img)
    img_pil = Image.open(path_img)

    img_tensor = preprocess(img_pil)
    img_variable = Variable(img_tensor.unsqueeze(0))
    img_variable = img_variable.cuda(async=True)
    logit = cnn(img_variable)

    paths_utils.make_dir("temp/gradcam/{}/".format(dataset))
    in_path = "temp/gradcam/{}/{}_in.jpg".format(dataset, img_name)

    # img_pil.thumbnail((256, 256), Image.ANTIALIAS)
    img_pil = img_pil.resize((256, 256), resample=PIL.Image.NEAREST)
    img_pil.save(in_path)

    # download the imagenet category list
    classes = {
        0: "Benign",
        1: "InSitu",
        2: "Invasive",
        3: "Normal"
    }

    h_x = F.softmax(logit, dim=1).data.squeeze()
    probs, idx = h_x.sort(0, True)
    probs = probs.cpu().numpy()
    idx = idx.cpu().numpy()

    # generate class activation mapping for the top1 prediction
    cam = get_gadcam_image(features_blobs[0], weight_softmax, [idx[0]])

    img_name = paths_utils.get_filename_without_extension(path_img)

    img = cv2.imread(in_path)

    result = show_cam_on_image(img, cam)

    out_path = "temp/gradcam/{}/{}_cnn.jpg".format(dataset,
                                                   img_name)

    cv2.imwrite(out_path, result)

    return result, out_path, features_blobs
コード例 #12
0
def get_gradcam_from_vqa_model(visual_features,
                               question_features,
                               features_blobs_visual,
                               ans,
                               path_img,
                               cnn,
                               model,
                               question_str,
                               dataset,
                               vqa_model="minhmul_noatt_train_2048",
                               finalconv_name="linear_classif",
                               is_show_image=False):

    model.eval()

    # hook the feature extractor
    features_blobs = []

    def hook_feature(module, input, output):
        features_blobs.append(output.data.cpu().numpy())

    model._modules.get(finalconv_name).register_forward_hook(hook_feature)

    # get the softmax weight
    params = list(model.parameters())
    weight_softmax = np.squeeze(params[-2].data.cpu().numpy())

    if "noatt" in vqa_model:
        classif_w_params = np.squeeze(params[10].data.cpu().numpy())
        classif_b_params = np.squeeze(params[11].data.cpu().numpy())
    else:
        classif_w_params = np.squeeze(params[26].data.cpu().numpy())
        temp_classif_w_params = np.zeros((classif_w_params.shape[0], 2048))
        temp_classif_w_params = (classif_w_params[:, 0:2048] + classif_w_params[:, 2048:2048*2] +
                                 classif_w_params[:, 2048*2:2048*3] + classif_w_params[:, 2048*3:2048*4]/4)
        classif_w_params = temp_classif_w_params
        classif_b_params = np.squeeze(params[27].data.cpu().numpy())

    logit = model(visual_features, question_features)

    h_x = F.softmax(logit, dim=1).data.squeeze()
    probs, idx = h_x.sort(0, True)
    probs = probs.cpu().numpy()
    idx = idx.cpu().numpy()

    cam = get_gadcam_vqa(features_blobs_visual[0],
                         classif_w_params, classif_b_params, [idx[0]])

    # render the CAM and output
    # print('output CAM.jpg for the top1 prediction: %s' % ans["ans"][idx[0]])

    img_name = paths_utils.get_filename_without_extension(path_img)

    in_path = "temp/gradcam/{}/{}_in.jpg".format(dataset, img_name)

    img = cv2.imread(in_path)

    result = show_cam_on_image(img, cam)

    question_str = question_str.replace(' ', '-')

    paths_utils.make_dir("temp/gradcam/{}/".format(dataset))
    if "noatt" in vqa_model:
        out_path = "temp/gradcam/{}/{}_noatt_question_{}.jpg".format(dataset,
                                                                     img_name,
                                                                     question_str)
    else:
        out_path = "temp/gradcam/{}/{}_att_question_{}.jpg".format(dataset,
                                                                   img_name,
                                                                   question_str)

    cv2.imwrite(out_path, result)

    im_out = Image.open(out_path)

    if is_show_image:
        im_out.show()

    return logit
コード例 #13
0
QA_VALID_TXT = PROJECT_DIR + \
    "/data/raw/vqa_med/ImageClef-2019-VQA-Med-Validation/All_QA_Pairs_val.txt"
QA_TEST_TXT = PROJECT_DIR + \
    "/data/raw/vqa_med/VQAMed2019Test/VQAMed2019_Test_Questions.txt"
DATASETS_TRAIN_TXT = PROJECT_DIR + \
    "/data/raw/vqa_med/ImageClef-2019-VQA-Med-Training/All_QA_Pairs_train.txt"
RAW_DIR = PROJECT_DIR + "/data/vqa_med/raw/raw/"
PROCESSED_QA_PER_QUESTION_PATH = RAW_DIR + "med_qa_per_question.csv"
PROCESSED_QA_PER_QUESTION_AUGMENT_PATH = RAW_DIR + \
    "med_qa_per_question_augment.csv"

PREPROCESSED_DIR = PROJECT_DIR + \
    "/data/raw/vqa_med/preprocessed/raw"

IMAGEID_PATH = RAW_DIR + "image_id.csv"
path_utils.make_dir(RAW_DIR)


def generate_image_id(is_augment=False):
    if is_augment:
        img_train_paths = glob.glob(
            os.path.join(PREPROCESSED_DIR, "train_augment", "*.jpg"))
        img_val_paths = glob.glob(
            os.path.join(PREPROCESSED_DIR, "val_augment", "*.jpg"))
        img_test_paths = glob.glob(
            os.path.join(PREPROCESSED_DIR, "test_augment", "*.jpg"))
    else:
        img_train_paths = glob.glob(os.path.join(DATASETS_TRAIN_DIR, "*.jpg"))
        img_val_paths = glob.glob(os.path.join(DATASETS_VALID_DIR, "*.jpg"))
        img_test_paths = glob.glob(os.path.join(DATASETS_TEST_DIR, "*.jpg"))
コード例 #14
0
from scipy.misc import imsave, imresize
from scipy.misc import toimage
import os
OPENSLIDE_PATH = "C:/Users/minhm/Documents/GitHub/bin/openslide-win64-20171122/bin"
if os.path.exists(OPENSLIDE_PATH):
    os.environ['PATH'] = OPENSLIDE_PATH + ";" + os.environ['PATH']
import openslide

CURRENT_WORKING_DIR = os.path.realpath(__file__)
PROJECT_DIR = path_utils.get_project_dir(CURRENT_WORKING_DIR, "vqa_idrid")
DATASETS_PHOTOS_DIR = PROJECT_DIR + \
    "/data/raw/m2cai16-tool-locations/JPEGImages/"
PREPROCESSED_IMAGE_DIR = PROJECT_DIR + \
    "/data/raw/m2cai16-tool-locations/preprocessed/full/"

path_utils.make_dir(PREPROCESSED_IMAGE_DIR)


def normalize(path_in,
              path_out=None,
              is_debug=False,
              is_save=False,
              is_resize=True,
              is_normalize=True):
    im = Image.open(path_in)
    imarray = np.array(im)
    if is_normalize:
        im_norm = normalization_utils.normalize_staining(imarray)
    else:
        im_norm = imarray
コード例 #15
0
def process_occlusion(path, dataset="breast"):
    # global args
    args = parser.parse_args()

    LIST_QUESTION_BREAST = [
        "how many classes are there",
        "is there any benign class in the image",
        "is there any in situ class in the image",
        "is there any invasive class in the image",
        "what is the major class in the image",
        "what is the minor class in the image",
        "is benign in 64_64_32_32 location",
        "is invasive in 96_96_32_32 location",
    ]

    LIST_QUESTION_TOOLS = [
        "how many tools are there",
        "is scissors in 64_32_32_32 location",
        "is irrigator in 64_96_32_32 location",
        "is grasper in 64_96_32_32 location"
        "is bipolar in 64_96_32_32 location"
        "is hook in 64_96_32_32 location"
        "is clipper in 64_96_32_32 location"
        "is specimenbag in 64_96_32_32 location"
        "is there any grasper in the image",
        "is there any bipolar in the image",
        "is there any hook in the image",
        "is there any scissors in the image",
        "is there any clipper in the image",
        "is there any irrigator in the image",
        "is there any specimenbag in the image",
    ]

    LIST_QUESTION_IDRID = [
        "is there haemorrhages in the fundus",
        "is there microaneurysms in the fundus",
        "is there soft exudates in the fundus",
        "is there hard exudates in the fundus",
        "is hard exudates larger than soft exudates",
        "is haemorrhages smaller than microaneurysms",
        "is there haemorrhages in the region 32_32_16_16",
        "is there microaneurysms in the region 96_96_16_16",
    ]

    LIST_QUESTION_VQA2 = [
        "what color is the hydrant",
        "why are the men jumping to catch",
        "is the water still",
        "how many people are in the image"
    ]

    if dataset == "breast":
        list_question = LIST_QUESTION_BREAST
    elif dataset == "tools":
        list_question = LIST_QUESTION_TOOLS
    elif dataset == "idrid":
        list_question = LIST_QUESTION_IDRID
    elif dataset == "vqa2":
        list_question = LIST_QUESTION_VQA2

    img_dirs = glob.glob(os.path.join(path, "*"))

    # args = update_args(
    #     args, vqa_model="minhmul_att_train_2048", dataset=dataset)
    args = update_args(
        args, vqa_model="minhmul_att_train", dataset=dataset)

    shuffle(img_dirs)
    shuffle(list_question)
    # for (path_img, question_str) in zip(img_dirs, list_question):
    for path_img in img_dirs:
        for question_str in list_question:
            if dataset in ["vqa", "vqa2"]:
                if not ((question_str == "what color is the hydrant" and ("img1" in path_img or "img2" in path_img)) or
                        (question_str == "why are the men jumping to catch" and ("img3" in path_img or "img4" in path_img)) or
                        (question_str == "is the water still" and ("img5" in path_img or "img6" in path_img)) or
                        (question_str == "how many people are in the image" and ("img7" in path_img or "img8" in path_img))):
                    continue

            print(
                "\n\n=========================================================================")
            print("{} - {}".format(question_str, path_img))

            if dataset == "vqa2":
                ans_gt = "red"
            else:
                ans_gt = get_answer(dataset, path_img, question_str)

            if ans_gt is None:
                continue
            else:
                input_size = 256
                step = 2
                windows_size = 32

                dst_dir = "temp/occlusion"
                paths_utils.make_dir(dst_dir)
                out_color_path = "{}/{}_{}_w_{:0}_s_{:0}_color.jpg".format(dst_dir,
                                                                           paths_utils.get_filename_without_extension(
                                                                               path_img),
                                                                           question_str.replace(
                                                                               ' ', '_'),
                                                                           windows_size,
                                                                           step
                                                                           )
                out_gray_path = "{}/{}_{}_w_{:0}_s_{:0}_gray.jpg".format(dst_dir,
                                                                         paths_utils.get_filename_without_extension(
                                                                             path_img),
                                                                         question_str.replace(
                                                                             ' ', '_'),
                                                                         windows_size,
                                                                         step
                                                                         )
                out_avg_path = "{}/{}_{}_w_{:0}_s_{:0}_avg.jpg".format(dst_dir,
                                                                       paths_utils.get_filename_without_extension(
                                                                           path_img),
                                                                       question_str.replace(
                                                                           ' ', '_'),
                                                                       windows_size,
                                                                       step
                                                                       )

                if not os.path.exists(out_color_path):

                    visual_PIL = Image.open(path_img)
                    indices = np.asarray(
                        np.mgrid[0:input_size-windows_size+1:step, 0:input_size-windows_size+1:step].reshape(2, -1).T, dtype=np.int)

                    cnn, model, trainset = initialize(args, dataset=dataset)
                    # cnn, model, trainset = None, None, None

                    image_occlusion = np.zeros((input_size, input_size))
                    image_occlusion_times = np.zeros((input_size, input_size))

                    ans_without_black_patch = process_one_batch_of_occlusion(args,
                                                                             cnn,
                                                                             model,
                                                                             trainset,
                                                                             visual_PIL,
                                                                             question_str,
                                                                             list_box=None,
                                                                             dataset=dataset)

                    try:
                        score_without_black_patch = ans_without_black_patch[0].get("val")[
                            ans_without_black_patch[0].get("ans").index(ans_gt)]
                    except:
                        score_without_black_patch = torch.tensor(0)

                    batch = 32
                    count = 0
                    list_box = []
                    for i in range(indices.shape[0]):
                        print_utils.print_tqdm(i, indices.shape[0])
                        list_box.append([indices[i][0], indices[i][0]+windows_size -
                                         1, indices[i][1], indices[i][1]+windows_size-1])
                        count += 1

                        # if count == batch or i == indices.shape[0] - 1:
                        if count == batch:
                            # print(count)
                            ans = process_one_batch_of_occlusion(args,
                                                                 cnn,
                                                                 model,
                                                                 trainset,
                                                                 visual_PIL,
                                                                 question_str,
                                                                 list_box,
                                                                 dataset=dataset,
                                                                 is_print=False)

                            for i in range(len(list_box)):
                                try:
                                    score = ans[i].get("val")[
                                        ans[i].get("ans").index(ans_gt)]
                                except:
                                    score = 0
                                box = list_box[i]

                                if score != 0:
                                    try:
                                        score_occ = (
                                            score.item() - score_without_black_patch.item())/score_without_black_patch.item()
                                    except:
                                        score_occ = 0
                                    image_occlusion[box[0]:box[1],
                                                    box[2]:box[3]] += score_occ
                                    image_occlusion_times[box[0]:box[1],
                                                          box[2]:box[3]] += 1

                            count = 0
                            list_box = []

                    save_image(visual_PIL, image_occlusion, image_occlusion_times,
                               out_color_path, out_gray_path, out_avg_path,
                               is_show=False)
コード例 #16
0
import pandas as pd
import glob
import os
import json


CURRENT_WORKING_DIR = os.path.realpath(__file__)
PROJECT_DIR = path_utils.get_project_dir(CURRENT_WORKING_DIR, "vqa_idrid")
IMAGE_SET_DIR = PROJECT_DIR + \
    "/data/raw/breast-cancer/preprocessed/imagesets/"
RAW_DIR = PROJECT_DIR + "/data/vqa_breast/raw/raw/"
QA_PATH = RAW_DIR + "breast_qa_full.csv"
processed_qa_per_question_path = RAW_DIR + "breast_qa_per_question.csv"

INTERIM_DIR = PROJECT_DIR + "/data/vqa_breast/interim/"
path_utils.make_dir(INTERIM_DIR)
train_annotations_filename = INTERIM_DIR + 'train_questions_annotations.json'
val_annotations_filename = INTERIM_DIR + 'val_questions_annotations.json'


def read_train_val_split():
    segmentation_dict_tool = {k: [] for k in ["train", "val"]}
    
    # read files
    for dataset in segmentation_dict_tool.keys():
        path_write = "{}{}.txt".format(IMAGE_SET_DIR, dataset)
        temp = io_utils.read_file_to_list(path_write)
        my_list = list()
        for i in range(len(temp)):
            my_list.append(temp[i].strip('\n'))
        segmentation_dict_tool[dataset] = my_list
コード例 #17
0
def vqa_processed(params):

    #####################################################
    # Read input files
    #####################################################

    path_train = os.path.join(
        params['dir'], 'interim',
        params['trainsplit'] + '_questions_annotations.json')
    if params['trainsplit'] == 'train':
        path_val = os.path.join(params['dir'], 'interim',
                                'val_questions_annotations.json')
    path_test = os.path.join(params['dir'], 'interim', 'test_questions.json')
    path_testdev = os.path.join(params['dir'], 'interim',
                                'testdev_questions.json')

    # An example is a tuple (question, image, answer)
    # /!\ test and test-dev have no answer
    trainset = json.load(open(path_train, 'r'))
    if params['trainsplit'] == 'train':
        valset = json.load(open(path_val, 'r'))
    testset = json.load(open(path_test, 'r'))
    testdevset = json.load(open(path_testdev, 'r'))

    #####################################################
    # Preprocess examples (questions and answers)
    #####################################################

    top_answers = process_utils.get_top_answers(trainset, params['nans'])
    aid_to_ans = [a for i, a in enumerate(top_answers)]
    ans_to_aid = {a: i for i, a in enumerate(top_answers)}
    # Remove examples if answer is not in top answers
    trainset = process_utils.remove_examples(trainset, ans_to_aid)

    # Add 'question_words' to the initial tuple
    trainset = process_utils.preprocess_questions(trainset, params['nlp'])
    if params['trainsplit'] == 'train':
        valset = process_utils.preprocess_questions(valset, params['nlp'])
    testset = process_utils.preprocess_questions(testset, params['nlp'])
    testdevset = process_utils.preprocess_questions(testdevset, params['nlp'])

    # Also process top_words which contains a UNK char
    trainset, top_words = process_utils.remove_long_tail_train(
        trainset, params['minwcount'])
    wid_to_word = {i + 1: w for i, w in enumerate(top_words)}
    word_to_wid = {w: i + 1 for i, w in enumerate(top_words)}

    if params['trainsplit'] == 'train':
        valset = process_utils.remove_long_tail_test(valset, word_to_wid)
    testset = process_utils.remove_long_tail_test(testset, word_to_wid)
    testdevset = process_utils.remove_long_tail_test(testdevset, word_to_wid)

    trainset = process_utils.encode_question(trainset, word_to_wid,
                                             params['maxlength'],
                                             params['pad'])
    if params['trainsplit'] == 'train':
        valset = process_utils.encode_question(valset, word_to_wid,
                                               params['maxlength'],
                                               params['pad'])
    testset = process_utils.encode_question(testset, word_to_wid,
                                            params['maxlength'], params['pad'])
    testdevset = process_utils.encode_question(testdevset, word_to_wid,
                                               params['maxlength'],
                                               params['pad'])

    trainset = process_utils.encode_answer(trainset, ans_to_aid)
    trainset = process_utils.encode_answers_occurence(trainset, ans_to_aid)
    if params['trainsplit'] == 'train':
        valset = process_utils.encode_answer(valset, ans_to_aid)
        valset = process_utils.encode_answers_occurence(valset, ans_to_aid)

    #####################################################
    # Write output files
    #####################################################

    # Paths to output files
    # Ex: data/vqa/processed/nans,3000_maxlength,15_..._trainsplit,train_testsplit,val/id_to_word.json
    subdirname = 'nans,' + str(params['nans'])
    for param in ['maxlength', 'minwcount', 'nlp', 'pad', 'trainsplit']:
        subdirname += '_' + param + ',' + str(params[param])
    os.system('mkdir -p ' +
              os.path.join(params['dir'], 'processed', subdirname))

    dir_save = os.path.join(params['dir'], 'processed', subdirname)
    if not os.path.exists(dir_save):
        print('>> make dir', dir_save)
        path_utils.make_dir(dir_save)

    path_wid_to_word = os.path.join(params['dir'], 'processed', subdirname,
                                    'wid_to_word.pickle')
    path_word_to_wid = os.path.join(params['dir'], 'processed', subdirname,
                                    'word_to_wid.pickle')
    path_aid_to_ans = os.path.join(params['dir'], 'processed', subdirname,
                                   'aid_to_ans.pickle')
    path_ans_to_aid = os.path.join(params['dir'], 'processed', subdirname,
                                   'ans_to_aid.pickle')
    if params['trainsplit'] == 'train':
        path_trainset = os.path.join(params['dir'], 'processed', subdirname,
                                     'trainset.pickle')
        path_valset = os.path.join(params['dir'], 'processed', subdirname,
                                   'valset.pickle')
    elif params['trainsplit'] == 'trainval':
        path_trainset = os.path.join(params['dir'], 'processed', subdirname,
                                     'trainvalset.pickle')
    path_testset = os.path.join(params['dir'], 'processed', subdirname,
                                'testset.pickle')
    path_testdevset = os.path.join(params['dir'], 'processed', subdirname,
                                   'testdevset.pickle')

    print('Write wid_to_word to', path_wid_to_word)
    with open(path_wid_to_word, 'wb') as handle:
        pickle.dump(wid_to_word, handle)

    print('Write word_to_wid to', path_word_to_wid)
    with open(path_word_to_wid, 'wb') as handle:
        pickle.dump(word_to_wid, handle)

    print('Write aid_to_ans to', path_aid_to_ans)
    with open(path_aid_to_ans, 'wb') as handle:
        pickle.dump(aid_to_ans, handle)

    print('Write ans_to_aid to', path_ans_to_aid)
    with open(path_ans_to_aid, 'wb') as handle:
        pickle.dump(ans_to_aid, handle)

    print('Write trainset to', path_trainset)
    with open(path_trainset, 'wb') as handle:
        pickle.dump(trainset, handle)

    if params['trainsplit'] == 'train':
        print('Write valset to', path_valset)
        with open(path_valset, 'wb') as handle:
            pickle.dump(valset, handle)

    print('Write testset to', path_testset)
    with open(path_testset, 'wb') as handle:
        pickle.dump(testset, handle)

    print('Write testdevset to', path_testdevset)
    with open(path_testdevset, 'wb') as handle:
        pickle.dump(testdevset, handle)
コード例 #18
0
DATASETS_PHOTOS_DIR = PROJECT_DIR + \
    "/data/raw/breast-cancer/ICIAR2018_BACH_Challenge/Photos/"
DATASETS_WSI_DIR = PROJECT_DIR + \
    "/data/raw/breast-cancer/ICIAR2018_BACH_Challenge/WSI/"
PREPROCESSED_IMAGE_PHOTOS_DIR = PROJECT_DIR + \
    "/data/raw/breast-cancer/preprocessed/Photos/"
PREPROCESSED_IMAGE_WSI_DIR = PROJECT_DIR + \
    "/data/raw/breast-cancer/preprocessed/WSI/"
IMAGE_SET_DIR = PROJECT_DIR + \
    "/data/raw/breast-cancer/preprocessed/imagesets/"
PREPROCESSED_IMAGE_WSI_RAW_DIR = PREPROCESSED_IMAGE_WSI_DIR + "raw/"
PREPROCESSED_IMAGE_WSI_PATCH_DIR = PREPROCESSED_IMAGE_WSI_DIR + "patch/"
PREPROCESSED_IMAGE_WSI_GT_DIR = PREPROCESSED_IMAGE_WSI_DIR + "patch_gt/"


path_utils.make_dir(PREPROCESSED_IMAGE_PHOTOS_DIR)
path_utils.make_dir(PREPROCESSED_IMAGE_WSI_DIR)
path_utils.make_dir(PREPROCESSED_IMAGE_WSI_PATCH_DIR)
path_utils.make_dir(PREPROCESSED_IMAGE_WSI_GT_DIR)
path_utils.make_dir(IMAGE_SET_DIR)
path_utils.make_dir(PREPROCESSED_IMAGE_WSI_RAW_DIR)


LIST_BREAST_CLASS = ["Benign", "InSitu", "Invasive", "Normal"]


def normalize(path_in, path_out=None, is_debug=False, is_save=False, is_resize=True):
    im = Image.open(path_in)
    imarray = np.array(im)
    im_norm = normalization_utils.normalize_staining(imarray)
    im_norm = Image.fromarray(im_norm)
コード例 #19
0
import pandas as pd
import numpy as np
from pprint import pprint
import glob
import json


CURRENT_WORKING_DIR = os.path.realpath(__file__)
PROJECT_DIR = path_utils.get_project_dir(CURRENT_WORKING_DIR, "vqa_idrid")
LOGS_DIR = os.path.join(PROJECT_DIR, "logs/med/trainval")
RAW_DIR = PROJECT_DIR + "/data/vqa_med/raw/raw/"
PROCESSED_QA_PER_QUESTION_PATH = RAW_DIR + "med_qa_per_question.csv"
TEST_DIR = PROJECT_DIR + \
    "/data/raw/vqa_med/VQAMed2019Test/VQAMed2019_Test_ImageList.txt"
SUB_DIR = PROJECT_DIR + "/data/vqa_med/submission/"
path_utils.make_dir(SUB_DIR)


# SUB_QC_MLB = [
#     "minhmul_att_trainval_imagenet_h200_g4_relu",
#     "minhmul_att_trainval_imagenet_h200_g4_relu_bert_uncased",
#     "minhmul_att_trainval_imagenet_h200_g4_relu_bert_cased",
#     "minhmul_att_trainval_imagenet_h200_g4_relu_bert_uncased_768",
#     "minhmul_att_trainval_imagenet_h200_g4_relu_bert_cased_768",
#     "minhmul_att_trainval_imagenet_h200_g8_relu",
#     "minhmul_att_trainval_imagenet_h400_g8_relu",
#     "minhmul_att_trainval_imagenet_h100_g8_relu",
#     "minhmul_att_trainval_imagenet_h100_g8_relu_bert_uncased",
#     "minhmul_att_trainval_imagenet_h100_g8_relu_bert_cased",
#     "minhmul_att_trainval_imagenet_h100_g8_relu_bert_uncased_768",
#     "minhmul_att_trainval_imagenet_h100_g8_relu_bert_cased_768",
コード例 #20
0
def preprocess_dataset(dataset="train", is_show=False, is_overwrite=False, is_augment=False):
    if dataset == "train":
        dataset_dir = DATASETS_TRAIN_DIR
    elif dataset == "val":
        dataset_dir = DATASETS_VALID_DIR
    else:
        dataset_dir = DATASETS_TEST_DIR

    if is_augment:
        preprocessed_dir = os.path.join(
            PREPROCESSED_DIR, "raw", dataset + "_augment")
    else:
        preprocessed_dir = os.path.join(PREPROCESSED_DIR, "raw", dataset)

    if is_overwrite or not os.path.exists(preprocessed_dir):
        path_utils.make_dir(preprocessed_dir)
        img_paths = glob.glob(os.path.join(dataset_dir, "*.jpg"))

        for index in tqdm(range(len(img_paths))):
            img_preprocessed = preprocess.process(index, img_paths)
            img_preprocessed = cv2.resize(img_preprocessed, (256, 256))
            if is_show:
                cv2.imshow('Done', img_preprocessed)
                cv2.waitKey(0)
            out_path = os.path.join(
                preprocessed_dir, path_utils.get_filename(img_paths[index]))
            cv2.imwrite(out_path, img_preprocessed)
            out = img_preprocessed

            if is_augment:
                for augment in LIST_AUGMENT:
                    out_path = os.path.join(
                        preprocessed_dir, path_utils.get_filename_without_extension(img_paths[index]) + "_{}.jpg".format(augment))
                    img_preprocessed = Image.fromarray(out)
                    if augment == "fliplr":
                        img_preprocessed = img_preprocessed.transpose(
                            Image.FLIP_LEFT_RIGHT)
                    elif augment == "rot10":
                        img_preprocessed = img_preprocessed.rotate(10)
                    elif augment == "rot20":
                        img_preprocessed = img_preprocessed.rotate(20)
                    elif augment == "rot30":
                        img_preprocessed = img_preprocessed.rotate(30)
                    elif augment == "rot_10":
                        img_preprocessed = img_preprocessed.rotate(-10)
                    elif augment == "rot_20":
                        img_preprocessed = img_preprocessed.rotate(-20)
                    elif augment == "rot_30":
                        img_preprocessed = img_preprocessed.rotate(-30)
                    elif augment == "bright1":
                        img_preprocessed = ie.Contrast(
                            img_preprocessed).enhance(1)
                    elif augment == "bright_1":
                        img_preprocessed = ie.Contrast(
                            img_preprocessed).enhance(-1)
                    elif augment == "sharp3":
                        img_preprocessed = ie.Sharpness(
                            img_preprocessed).enhance(3)
                    elif augment == "sharp_3":
                        img_preprocessed = ie.Sharpness(
                            img_preprocessed).enhance(-3)
                    # elif augment == "contrast2":
                    #     img_preprocessed = ie.Contrast(img_preprocessed).enhance(2)
                    # elif augment == "contrast_2":
                    #     img_preprocessed = ie.Contrast(img_preprocessed).enhance(-2)
                    img_preprocessed.save(out_path)
コード例 #21
0
def get_gradcam_from_vqa_model(visual_features,
                               question_features,
                               features_blobs_visual,
                               ans,
                               path_img,
                               cnn,
                               model,
                               question_str,
                               dataset,
                               vqa_model="minhmul_noatt_train_2048",
                               finalconv_name="linear_classif",
                               is_show_image=False,
                               is_att=True):

    if is_att:
        logit, list_v_record = model(visual_features, question_features)
        cam = get_gadcam_vqa_new(list_v_record)

    else:
        # grad_cam = gradcam_utils.GradCam(model=model,
        #                                  target_layer_names=["linear_v"], use_cuda=True)

        # target_index = None
        # mask = grad_cam(visual_features,
        #                 question_features,
        #                 target_index)

        # hook the feature extractor
        features_blobs = []

        def hook_feature(module, input, output):
            features_blobs.append(output.data.cpu().numpy())

        model._modules.get(finalconv_name).register_forward_hook(hook_feature)

        # model.fusion.linear_v.register_forward_hook(hook_feature)

        # model.fusion.linear_v.register_backward_hook(hook_feature)

        # get the softmax weight
        params = list(model.parameters())
        weight_softmax = np.squeeze(params[-2].data.cpu().numpy())

        if "noatt" in vqa_model:
            classif_w_params = np.squeeze(params[10].data.cpu().numpy())
            classif_b_params = np.squeeze(params[11].data.cpu().numpy())

        logit = model(visual_features, question_features)
        h_x = F.softmax(logit, dim=1).data.squeeze()
        probs, idx = h_x.sort(0, True)
        probs = probs.cpu().numpy()
        idx = idx.cpu().numpy()

        cam = get_gadcam_vqa(features_blobs_visual[0], classif_w_params,
                             classif_b_params, [idx[0]])

    img_name = paths_utils.get_filename_without_extension(path_img)

    in_path = "temp/gradcam/{}/{}_in.jpg".format(dataset, img_name)

    img = cv2.imread(in_path)

    result = show_cam_on_image(img, cam)

    question_str = question_str.replace(' ', '-')

    paths_utils.make_dir("temp/gradcam/{}/".format(dataset))
    if "noatt" in vqa_model:
        out_path = "temp/gradcam/{}/{}_noatt_question_{}.jpg".format(
            dataset, img_name, question_str)
    else:
        out_path = "temp/gradcam/{}/{}_att_question_{}.jpg".format(
            dataset, img_name, question_str)

    cv2.imwrite(out_path, result)

    im_out = Image.open(out_path)

    if is_show_image:
        im_out.show()

    return logit