Beispiel #1
0
def build_bbox_tensors(infos, max_length):
    num_bbox = min(max_length, len(infos))

    # After num_bbox, everything else should be zero
    coord_tensor = torch.zeros((max_length, 4), dtype=torch.float)
    width_tensor = torch.zeros(max_length, dtype=torch.float)
    height_tensor = torch.zeros(max_length, dtype=torch.float)
    bbox_types = ["xyxy"] * max_length

    infos = infos[:num_bbox]
    sample = Sample()

    for idx, info in enumerate(infos):
        bbox = info["bounding_box"]
        x = bbox["top_left_x"]
        y = bbox["top_left_y"]
        width = bbox["width"]
        height = bbox["height"]

        coord_tensor[idx][0] = x
        coord_tensor[idx][1] = y
        coord_tensor[idx][2] = x + width
        coord_tensor[idx][3] = y + height

        width_tensor[idx] = width
        height_tensor[idx] = height
    sample.coordinates = coord_tensor
    sample.width = width_tensor
    sample.height = height_tensor
    sample.bbox_types = bbox_types

    return sample
Beispiel #2
0
def build_bbox_tensors(infos, max_length, feats, img_id, obj_bbox):

    # num of ocr bbox
    num_bbox = min(max_length, len(infos))
    # ocr bbox
    coord_tensor = torch.zeros((max_length, 4), dtype=torch.float)
    infos = infos[:num_bbox]
    sample = Sample()

    for idx, info in enumerate(infos):
        bbox = info["bounding_box"]
        if "top_left_x" in bbox:
            x = bbox["top_left_x"]  # key might be 'topLeftX'
            y = bbox["top_left_y"]  # key might be 'topLeftY'
        else:
            x = bbox["topLeftX"]
            y = bbox["topLeftY"]
        width = bbox["width"]
        height = bbox["height"]
        coord_tensor[idx][0] = x
        coord_tensor[idx][1] = y
        coord_tensor[idx][2] = x + width
        coord_tensor[idx][3] = y + height

    sample.coordinates = coord_tensor
    sample.ocr_mask = num_bbox

    image_path_org = './data/open_images/textvqa_gcy/'
    # image_path_org = './data/open_images/GT_OBJ_FRCN/'
    # image_path_org = './data/open_images/visual_genome/'

    oo_edge_path = image_path_org + 'edge_oo/'
    ot_edge_path = image_path_org + 'edge_ot/'
    tt_edge_path = image_path_org + 'edge_tt/'
    to_edge_path = image_path_org + 'edge_to/'

    set_name = search_file(image_path_org, img_id)
    knn_k = 5

    try:
        oo_node_matrix = torch.load(oo_edge_path + img_id + '_oo.pdh')
        sample.edge_oo = oo_node_matrix
        oo_feats = torch.load(oo_edge_path + img_id + '_oofeats.pdh')
        sample.edge_oofeats = oo_feats

        ot_node_matrix = torch.load(ot_edge_path + img_id + '_ot.pdh')
        sample.edge_ot = ot_node_matrix
        ot_feats = torch.load(ot_edge_path + img_id + '_otfeats.pdh')
        sample.edge_otfeats = ot_feats

        tt_node_matrix = torch.load(tt_edge_path + img_id + '_tt.pdh')
        sample.edge_tt = tt_node_matrix
        tt_feats = torch.load(tt_edge_path + img_id + '_ttfeats.pdh')
        sample.edge_ttfeats = tt_feats

        to_node_matrix = torch.load(to_edge_path + img_id + '_to.pdh')
        sample.edge_to = to_node_matrix
        to_feats = torch.load(to_edge_path + img_id + '_tofeats.pdh')
        sample.edge_tofeats = to_feats
    except:
        #Todo: generate obj-obj relation edge
        oo_node_matrix = finde_k_nearest_node(obj_bbox, knn_k)
        sample.edge_oo = oo_node_matrix
        oo_edge_file_name = oo_edge_path + img_id + "_oo.pdh"
        torch.save(oo_node_matrix, oo_edge_file_name)

        obj_obj_feat_variable = gen_oo_edge_feature(obj_bbox,
                                                    oo_node_matrix,
                                                    knn_k=knn_k)
        oo_edge_file_name = oo_edge_path + img_id + "_oofeats.pdh"
        torch.save(obj_obj_feat_variable, oo_edge_file_name)
        sample.edge_oofeats = obj_obj_feat_variable

        #Todo: generate object-text relation edge
        ot_node_matrix = dc_finde_k_nearest_node(obj_bbox, coord_tensor, knn_k)
        sample.edge_ot = ot_node_matrix
        ot_edge_file_name = ot_edge_path + img_id + "_ot.pdh"
        torch.save(ot_node_matrix, ot_edge_file_name)

        obj_text_feat_variable = gen_ot_edge_feature(obj_bbox,
                                                     coord_tensor,
                                                     ot_node_matrix,
                                                     knn_k=knn_k)
        ot_edge_file_name = ot_edge_path + img_id + "_otfeats.pdh"
        torch.save(obj_text_feat_variable, ot_edge_file_name)
        sample.edge_otfeats = obj_text_feat_variable

        #Todo: generate text-text relation edge
        tt_node_matrix = finde_k_nearest_node(coord_tensor, knn_k)
        sample.edge_tt = tt_node_matrix
        tt_edge_file_name = tt_edge_path + img_id + "_tt.pdh"
        torch.save(tt_node_matrix, tt_edge_file_name)

        text_text_edge_feature = gen_tt_edge_feature(coord_tensor,
                                                     tt_node_matrix,
                                                     knn_k=knn_k)
        tt_edge_file_name = tt_edge_path + img_id + "_ttfeats.pdh"
        torch.save(text_text_edge_feature, tt_edge_file_name)
        sample.edge_ttfeats = text_text_edge_feature

        #Todo: generate text-obj relation edge
        to_node_matrix = dc_finde_k_nearest_node(coord_tensor, obj_bbox, knn_k)
        sample.edge_to = to_node_matrix
        to_edge_file_name = to_edge_path + img_id + "_to.pdh"
        torch.save(to_node_matrix, to_edge_file_name)

        text_obj_feat_variable = gen_to_edge_feature(coord_tensor,
                                                     obj_bbox,
                                                     to_node_matrix,
                                                     knn_k=knn_k)
        to_edge_file_name = to_edge_path + img_id + "_tofeats.pdh"
        torch.save(text_obj_feat_variable, to_edge_file_name)
        sample.edge_tofeats = text_obj_feat_variable

    return sample