def __init__(self, json_file, type_dataset): self.type_dataset = type_dataset self.db_file = '/scratch/datasets/vrd/relations_' + type_dataset + '.data' with open(json_file, 'r') as file: contents = json.load(file) contents = convert_to_string(contents).items() self.data = [(key, rel) for key, value in contents for rel in value] print("number of relations:", len(self.data)) self.list_relation = [] if os.path.isfile(self.db_file): with open(self.db_file, 'rb') as f: self.list_relation = pkl.load(f) else: for i, (key, rel) in enumerate(self.data): print(i, len(self.data)) subject_box = rel['subject'][ 'bbox'] # [ymin, ymax, xmin, xmax] object_box = rel['object']['bbox'] minbbox = [ min(subject_box[0], object_box[0]), max(subject_box[1], object_box[1]), min(subject_box[2], object_box[2]), max(subject_box[3], object_box[3]) ] image = imread('/scratch/datasets/vrd/sg_dataset/sg_' + type_dataset + '_images/' + key) bboxes = [subject_box, object_box, minbbox] list_image = [ image[bbox[0]:bbox[1], bbox[2]:bbox[3]] for bbox in bboxes ] list_binary_image = [ np.zeros_like(image) for _ in range(len(bboxes)) ] for (binary_image, bbox) in zip(list_binary_image, bboxes): binary_image[bbox[0]:bbox[1], bbox[2]:bbox[3]] = 1 relation = [transform(x) for x in list_image] + \ [spatial_transform(x)[0, :, :].view(1, 32, 32) for x in list_binary_image] + \ [torch.LongTensor([rel['subject']['category'], rel['object']['category'], rel['predicate']])] self.list_relation.append(relation) with open(self.db_file, 'wb') as f: pkl.dump(self.list_relation, f, protocol=pkl.HIGHEST_PROTOCOL) print("complete_loading", type_dataset)
def __getitem__(self, index): """Return a (transformed) vrd_input and target sample from an integer index""" key, rel = self.data[index] subject_box = rel['subject']['bbox'] # [ymin, ymax, xmin, xmax] object_box = rel['object']['bbox'] minbbox = [ min(subject_box[0], object_box[0]), max(subject_box[1], object_box[1]), min(subject_box[2], object_box[2]), max(subject_box[3], object_box[3]) ] image = imread('/scratch/datasets/sg_dataset/sg_' + self.type_dataset + '_images/' + key) bboxes = [subject_box, object_box, minbbox] list_image = [ image[bbox[0]:bbox[1], bbox[2]:bbox[3]] for bbox in bboxes ] subject_visual_input, object_visual_input, union_visual_input = tuple( transform(x) for x in list_image) list_binary_image = [np.zeros_like(image) for _ in range(len(bboxes))] for (binary_image, bbox) in zip(list_binary_image, bboxes): binary_image[bbox[0]:bbox[1], bbox[2]:bbox[3]] = 1 subject_spatial_input, object_spatial_input, union_spatial_input = \ tuple(spatial_transform(x)[0, :, :].view(1, 32, 32) for x in list_binary_image) predicate_spatial_feature = torch.cat( [subject_spatial_input, object_spatial_input], 0) object_word_feature = torch.FloatTensor( index_to_emb_dict[rel['object']['category']]) subject_word_feature = torch.FloatTensor( index_to_emb_dict[rel['subject']['category']]) if use_model == 1: input_sample = union_visual_input, predicate_spatial_feature target_sample = rel['subject']['category'], rel['object'][ 'category'], rel['predicate'] elif use_model == 2: input_sample = torch.FloatTensor(to_categorical(rel['subject']['category'], object_size)), \ torch.FloatTensor(to_categorical(rel['object']['category'], object_size)), \ union_visual_input, predicate_spatial_feature target_sample = rel['predicate'] elif use_model == 3: input_sample = (subject_word_feature, object_word_feature, union_visual_input, predicate_spatial_feature) target_sample = rel['predicate'] return input_sample, target_sample
removeMask[index_elem_to_elim] = 0 removed.extend([ good[i] for i in range(len(good)) if not removeMask[i] ]) good = [ good[i] for i in range(len(good)) if removeMask[i] ] else: print("Not possible to find another homography") matchesMask = None else: polygon = Polygon([ transform(M, pts[0]), transform(M, pts[1]), transform(M, pts[2]), transform(M, pts[3]) ]) print("POLYGON") for i in range(4): print("Vertex #" + str(i)) print("src->(" + str(pts[i][0][0]) + "," + str(pts[i][0][1]) + ") dst->" + str(transform(M, pts[i])) + "\n") print("---------------------------------------\n") for i in range(len(index_inliers)):
def run_evaluate(model, json_file, type_dataset): output_file = 'data/temp_output_3.pkl' if os.path.isfile(output_file): with open(output_file, 'rb') as f: list_relations, list_gt_relations = pickle.load(f) else: model.load( '/scratch/datasets/vrd/weights.07-train_loss:0.10-train_acc:0.52-val_loss:0.12-val_acc:0.51.pkl' ) with open(json_file, 'r') as file: contents = json.load(file) contents = convert_to_string(contents).items() list_relations = [] list_gt_relations = [] for i, (key, value) in enumerate(contents): print(i, len(contents)) list_relation = [] list_gt_relation = [] # get list of object list_objects = [] for rel in value: list_objects.append( Object(rel['subject']['category'], *tuple(rel['subject']['bbox']))) list_objects.append( Object(rel['object']['category'], *tuple(rel['object']['bbox']))) list_gt_relation.append(rel['predicate']) list_objects = list(set(list_objects)) for subject, object_ in combinations(list_objects, 2): subject_box = subject.ymin, subject.ymax, subject.xmin, subject.xmax # [ymin, ymax, xmin, xmax] object_box = object_.ymin, object_.ymax, object_.xmin, object_.xmax minbbox = [ min(subject_box[0], object_box[0]), max(subject_box[1], object_box[1]), min(subject_box[2], object_box[2]), max(subject_box[3], object_box[3]) ] image = imread('/scratch/datasets/sg_dataset/sg_' + type_dataset + '_images/' + key) bboxes = [subject_box, object_box, minbbox] list_image = [ image[bbox[0]:bbox[1], bbox[2]:bbox[3]] for bbox in bboxes ] list_binary_image = [ np.zeros_like(image) for _ in range(len(bboxes)) ] for (binary_image, bbox) in zip(list_binary_image, bboxes): binary_image[bbox[0]:bbox[1], bbox[2]:bbox[3]] = 1 subject_visual_input, object_visual_input, union_visual_input = tuple( transform(x) for x in list_image) subject_spatial_input, object_spatial_input, union_spatial_input = \ tuple(spatial_transform(x)[0, :, :].view(1, 32, 32) for x in list_binary_image) predicate_spatial_feature = torch.cat( [subject_spatial_input, object_spatial_input], 0) inputs = (torch.FloatTensor( to_categorical(subject.category, object_size)), torch.FloatTensor( to_categorical(object_.category, object_size)), union_visual_input, predicate_spatial_feature) # wrap them in Variable if isGPU: inputs = [ Variable(x.cuda(), volatile=True) for x in inputs ] else: inputs = [Variable(x, volatile=True) for x in inputs] # forward outputs = model.net(inputs) if isGPU: list_relation.append(outputs.data.cpu().numpy()) else: list_relation.append(outputs.data.numpy()) list_relations.append(np.array(list_relation)) list_gt_relations.append(list_gt_relation) with open(output_file, 'wb') as f: pickle.dump((list_relations, list_gt_relations), f, pickle.HIGHEST_PROTOCOL) score = eval_recall(list_relations, list_gt_relations) return score
def build_db(self): count_relation = self.num_records for i, (key, rel) in enumerate(self.data): if i < self.num_records: continue print(i, len(self.data)) subject_box = rel['subject']['bbox'] # [ymin, ymax, xmin, xmax] object_box = rel['object']['bbox'] minbbox = [ min(subject_box[0], object_box[0]), max(subject_box[1], object_box[1]), min(subject_box[2], object_box[2]), max(subject_box[3], object_box[3]) ] image = imread('/scratch/datasets/vrd/sg_dataset/sg_' + self.type_dataset + '_images/' + key) bboxes = [subject_box, object_box, minbbox] list_image = [ image[bbox[0]:bbox[1], bbox[2]:bbox[3]] for bbox in bboxes ] list_binary_image = [ np.zeros_like(image) for _ in range(len(bboxes)) ] for (binary_image, bbox) in zip(list_binary_image, bboxes): binary_image[bbox[0]:bbox[1], bbox[2]:bbox[3]] = 1 subject_visual_input, object_visual_input, union_visual_input = tuple( transform(x) for x in list_image) subject_spatial_input, object_spatial_input, union_spatial_input = \ tuple(spatial_transform(x)[0, :, :].view(1, 32, 32) for x in list_binary_image) predicate_spatial_feature = torch.cat( [subject_spatial_input, object_spatial_input], 0) # subject_word_feature = np.array(emb.emb(index_to_object_dict[rel['subject']['category']]), dtype=np.float32) # object_word_feature = np.array(emb.emb(index_to_object_dict[rel['subject']['category']]), dtype=np.float32) relation = { 'image_id': key, 'subject_visual_feature': self.save_numpy_array(subject_visual_input.numpy()), # 'subject_word_feature': self.save_numpy_array(subject_word_feature.numpy()), 'object_visual_feature': self.save_numpy_array(object_visual_input.numpy()), # 'object_word_feature': self.save_numpy_array(object_word_feature.numpy()), 'predicate_visual_feature': self.save_numpy_array(union_visual_input.numpy()), 'predicate_spatial_feature': self.save_numpy_array(predicate_spatial_feature.numpy()), 'subject_id': rel['subject']['category'], 'object_id': rel['object']['category'], 'predicate_id': rel['predicate'], } self.db.hmset(count_relation, relation) count_relation += 1
def __init__(self, json_file, type_dataset): self.type_dataset = type_dataset self.db_file = '/scratch/datasets/vrd/relations_' + type_dataset + '.db' self.table_name = 'relations' self.batch_database_size = 1000 with open(json_file, 'r') as file: contents = json.load(file) contents = convert_to_string(contents).items() data = [(key, rel) for key, value in contents for rel in value] print("number of relations:", len(data)) create_sql = ( 'create table if not exists relations(' + 'id integer primary key,' + 'image_id text,' + 'subject_visual_feature array,' + 'subject_word_feature array,' + 'object_visual_feature array,' + 'object_word_feature array,' + 'predicate_visual_feature array,' + 'predicate_spatial_feature array,' + 'subject_id integer,' + 'object_id integer,' + 'predicate_id integer' + ')') self.db = SQLiteDatabase(self.db_file, self.table_name, create_sql) num_records = len(self.db) if num_records < len(data): list_relation = [] count_relation = num_records for i, (key, rel) in enumerate(data): if i < num_records: continue print(i, len(data)) subject_box = rel['subject'][ 'bbox'] # [ymin, ymax, xmin, xmax] object_box = rel['object']['bbox'] minbbox = [ min(subject_box[0], object_box[0]), max(subject_box[1], object_box[1]), min(subject_box[2], object_box[2]), max(subject_box[3], object_box[3]) ] image = imread('/scratch/datasets/vrd/sg_dataset/sg_' + type_dataset + '_images/' + key) bboxes = [subject_box, object_box, minbbox] list_image = [ image[bbox[0]:bbox[1], bbox[2]:bbox[3]] for bbox in bboxes ] list_binary_image = [ np.zeros_like(image) for _ in range(len(bboxes)) ] for (binary_image, bbox) in zip(list_binary_image, bboxes): binary_image[bbox[0]:bbox[1], bbox[2]:bbox[3]] = 1 subject_visual_input, object_visual_input, union_visual_input = tuple( transform(x) for x in list_image) subject_spatial_input, object_spatial_input, union_spatial_input = \ tuple(spatial_transform(x)[0, :, :].view(1, 32, 32) for x in list_binary_image) predicate_spatial_feature = torch.cat( [subject_spatial_input, object_spatial_input], 0) # subject_word_feature = np.array(emb.emb(index_to_object_dict[rel['subject']['category']]), dtype=np.float32) # object_word_feature = np.array(emb.emb(index_to_object_dict[rel['subject']['category']]), dtype=np.float32) relation = { 'id': count_relation, 'image_id': key, 'subject_visual_feature': subject_visual_input.numpy(), # 'subject_word_feature': subject_word_feature, 'object_visual_feature': object_visual_input.numpy(), # 'object_word_feature': object_word_feature, 'predicate_visual_feature': union_visual_input.numpy(), 'predicate_spatial_feature': predicate_spatial_feature.numpy(), 'subject_id': rel['subject']['category'], 'object_id': rel['object']['category'], 'predicate_id': rel['predicate'], } list_relation.append(relation) count_relation += 1 if len(list_relation) == self.batch_database_size: print("start inserting") self.db.insert_batch(list_relation) list_relation.clear() if list_relation: self.db.insert_batch(list_relation) list_relation.clear()