(args.W, args.H), interpolation=cv2.INTER_AREA) / 255.

        frame_objs = []
        frame_rels = []

        ids_cur_frame = []

        for j in range(n_objects):
            id = get_identifier(objects[j])
            if check_contain_id(id, ids_filter) and not check_contain_id(
                    id, ids_cur_frame):
                ids_cur_frame.append(id)

                material = objects[j]['material']
                shape = objects[j]['shape']
                attr = encode_attr(material, shape, bbox_size, args.attr_dim)
                mask_raw = decode(objects[j]['mask'])
                mask = cv2.resize(mask_raw, (W, H),
                                  interpolation=cv2.INTER_NEAREST)
                # cv2.imshow('mask', mask * 255)
                # cv2.waitKey(0)

                bbox, pos = convert_mask_to_bbox(mask_raw, H, W, bbox_size)

                pos_mean = torch.FloatTensor(np.array([H / 2., W / 2.]))
                pos_mean = pos_mean.unsqueeze(1).unsqueeze(1)
                pos_std = pos_mean

                pos = normalize(pos, pos_mean, pos_std)
                mask_crop = normalize(crop(mask, bbox, H, W), 0.5,
                                      1).unsqueeze(0)
Exemple #2
0
    def __getitem__(self, idx):
        #pdb.set_trace()
        n_his = self.args.n_his
        frame_offset = self.args.frame_offset
        idx_video, idx_frame = self.valid_idx[idx][0], self.valid_idx[idx][1]

        objs = []
        attrs = []
        hws = []
        for i in range(idx_frame - n_his * frame_offset,
                       idx_frame + frame_offset + 1, frame_offset):

            frame = self.metadata[idx_video]['frames'][i]
            #frame_filename = frame['frame_filename']
            frame_filename = os.path.join(
                'video_' + str(idx_video).zfill(5),
                str(frame['frame_index'] + 1) + '.png')
            #pdb.set_trace()
            objects = frame['objects']
            n_objects = len(objects)
            sub_id = idx_video // 1000

            full_img_dir = os.path.join(
                self.data_dir,
                'image_' + str(sub_id * 1000).zfill(5) + '-' + str(
                    (sub_id + 1) * 1000).zfill(5))
            img = self.loader(os.path.join(full_img_dir, frame_filename))
            img = np.array(img)[:, :, ::-1].copy()
            img = cv2.resize(img, (self.W, self.H),
                             interpolation=cv2.INTER_AREA).astype(
                                 np.float) / 255.
            ### prepare object inputs
            object_inputs = []
            for j in range(n_objects):
                material = objects[j]['material']
                shape = objects[j]['shape']

                if i == idx_frame - n_his * frame_offset:
                    attrs.append(
                        encode_attr(material, shape, self.bbox_size,
                                    self.args.attr_dim))

                mask_raw = decode(objects[j]['mask'])
                mask = cv2.resize(mask_raw, (self.W, self.H),
                                  interpolation=cv2.INTER_NEAREST)
                # cv2.imshow("mask", mask * 255)
                # cv2.waitKey(0)
                #bbox, pos, box_hw = convert_mask_to_bbox_hw(mask_raw, self.H, self.W, self.bbox_size, objects[j]['mask'])
                bbox, pos = convert_mask_to_bbox(mask_raw, self.H, self.W,
                                                 self.bbox_size)
                pos_mean = torch.FloatTensor(
                    np.array([self.H / 2., self.W / 2.]))
                pos_mean = pos_mean.unsqueeze(1).unsqueeze(1)
                pos_std = pos_mean
                pos = normalize(pos, pos_mean, pos_std)
                mask_crop = normalize(crop(mask, bbox, self.H, self.W), 0.5,
                                      1).unsqueeze(0)
                img_crop = normalize(crop(img, bbox, self.H, self.W), 0.5,
                                     0.5).permute(2, 0, 1)

                if self.args.box_only_flag:
                    bbx_xyxy, ret, crop_box, crop_box_v2 = decode_mask_to_box(
                        objects[j]['mask'], [self.bbox_size, self.bbox_size],
                        self.H, self.W)
                    ret_mean = torch.FloatTensor(
                        np.array([1 / 2., 1 / 2., 1 / 2., 1 / 2.]))
                    ret_mean = ret_mean.unsqueeze(1).unsqueeze(1)
                    ret_std = ret_mean
                    ret = normalize(ret, ret_mean, ret_std)
                    pos = ret[:2]
                    hw = ret[2:]

                elif self.args.add_hw_state_flag:
                    bbx_xyxy, ret, crop_box, crop_box_v2 = decode_mask_to_box(
                        objects[j]['mask'], [self.bbox_size, self.bbox_size],
                        self.H, self.W)
                    ret_mean = torch.FloatTensor(
                        np.array([1 / 2., 1 / 2., 1 / 2., 1 / 2.]))
                    ret_mean = ret_mean.unsqueeze(1).unsqueeze(1)
                    ret_std = ret_mean
                    ret = normalize(ret, ret_mean, ret_std)
                    hw = ret[2:]

                elif self.args.add_xyhw_state_flag:
                    bbx_xyxy, ret, crop_box, crop_box_v2 = decode_mask_to_box(
                        objects[j]['mask'], [self.bbox_size, self.bbox_size],
                        self.H, self.W)
                    ret_mean = torch.FloatTensor(
                        np.array([1 / 2., 1 / 2., 1 / 2., 1 / 2.]))
                    ret_mean = ret_mean.unsqueeze(1).unsqueeze(1)
                    ret_std = ret_mean
                    ret = normalize(ret, ret_mean, ret_std)
                    pos = ret[:2]
                    hw = ret[2:]

                identifier = get_identifier(objects[j])

                if self.args.box_only_flag:
                    s = torch.cat([pos, hw], 0).unsqueeze(0), identifier
                elif self.args.add_hw_state_flag or self.args.add_xyhw_state_flag:
                    s = torch.cat([mask_crop, pos, img_crop, hw],
                                  0).unsqueeze(0), identifier
                elif self.args.rm_mask_state_flag:
                    s = torch.cat([mask_crop * 0, pos, img_crop],
                                  0).unsqueeze(0), identifier
                else:
                    s = torch.cat([mask_crop, pos, img_crop],
                                  0).unsqueeze(0), identifier
                object_inputs.append(s)

            objs.append(object_inputs)

        attr = torch.cat(attrs, 0).view(n_objects, self.args.attr_dim,
                                        self.bbox_size, self.bbox_size)

        feats = []
        for x in range(n_objects):
            feats.append(objs[0][x][0])

        for i in range(1, len(objs)):
            for x in range(n_objects):
                for y in range(n_objects):
                    id_x = objs[0][x][1]
                    id_y = objs[i][y][1]
                    if check_same_identifier(id_x, id_y):
                        feats[x] = torch.cat([feats[x], objs[i][y][0]], 1)

        try:
            feats = torch.cat(feats, 0)
        except:
            print(idx_video, idx_frame)
        # print("feats shape", feats.size())

        ### prepare relation attributes
        n_relations = n_objects * n_objects
        Ra = torch.FloatTensor(
            np.ones((n_relations, self.args.relation_dim *
                     (self.args.n_his + 2), self.bbox_size, self.bbox_size)) *
            -0.5)

        # change to relative position
        relation_dim = self.args.relation_dim
        state_dim = self.args.state_dim
        if self.args.box_only_flag:
            for i in range(n_objects):
                for j in range(n_objects):
                    idx = i * n_objects + j
                    Ra[idx, 1::relation_dim] = feats[i, 0::state_dim] - feats[
                        j, 0::state_dim]  # x
                    Ra[idx, 2::relation_dim] = feats[i, 1::state_dim] - feats[
                        j, 1::state_dim]  # y
        else:
            for i in range(n_objects):
                for j in range(n_objects):
                    idx = i * n_objects + j
                    Ra[idx, 1::relation_dim] = feats[i, 1::state_dim] - feats[
                        j, 1::state_dim]  # x
                    Ra[idx, 2::relation_dim] = feats[i, 2::state_dim] - feats[
                        j, 2::state_dim]  # y

        # add collision attr
        gt = self.metadata[idx_video]['ground_truth']
        gt_ids = gt['objects']
        gt_collisions = gt['collisions']

        label_rel = torch.FloatTensor(
            np.ones((n_objects * n_objects, 1)) * -0.5)

        if self.args.edge_superv:
            for i in range(idx_frame - n_his * frame_offset,
                           idx_frame + frame_offset + 1, frame_offset):

                for j in range(len(gt_collisions)):
                    frame_id = gt_collisions[j]['frame']
                    if 0 <= frame_id - i < self.args.frame_offset:
                        id_0 = gt_collisions[j]['object'][0]
                        id_1 = gt_collisions[j]['object'][1]
                        for k in range(len(gt_ids)):
                            if id_0 == gt_ids[k]['id']:
                                id_x = get_identifier(gt_ids[k])
                            if id_1 == gt_ids[k]['id']:
                                id_y = get_identifier(gt_ids[k])

                        # id_0 = get_identifier(gt_ids[gt_collisions[j]['object'][0]])
                        # id_1 = get_identifier(gt_ids[gt_collisions[j]['object'][1]])

                        for k in range(n_objects):
                            if check_same_identifier(objs[0][k][1], id_x):
                                x = k
                            if check_same_identifier(objs[0][k][1], id_y):
                                y = k

                        idx_rel_xy = x * n_objects + y
                        idx_rel_yx = y * n_objects + x

                        # print(x, y, n_objects)

                        idx = i - (idx_frame - n_his * frame_offset)
                        idx /= frame_offset
                        Ra[idx_rel_xy, int(idx) * relation_dim] = 0.5
                        Ra[idx_rel_yx, int(idx) * relation_dim] = 0.5

                        if i == idx_frame + frame_offset:
                            label_rel[idx_rel_xy] = 1
                            label_rel[idx_rel_yx] = 1
        '''
        print(feats[0, -state_dim])
        print(feats[0, -state_dim+1])
        print(feats[0, -state_dim+2])
        print(feats[0, -state_dim+3])
        print(feats[0, -state_dim+4])
        '''
        '''
        ### change absolute pos to relative pos
        feats[:, state_dim+1::state_dim] = \
                feats[:, state_dim+1::state_dim] - feats[:, 1:-state_dim:state_dim]   # x
        feats[:, state_dim+2::state_dim] = \
                feats[:, state_dim+2::state_dim] - feats[:, 2:-state_dim:state_dim]   # y
        feats[:, 1] = 0
        feats[:, 2] = 0
        '''

        x = feats[:, :-state_dim]
        label_obj = feats[:, -state_dim:]
        if self.args.box_only_flag:
            label_obj[:, 1] -= feats[:, -2 * state_dim + 1]
            label_obj[:, 2] -= feats[:, -2 * state_dim + 2]
            label_obj[:, 0] -= feats[:, -2 * state_dim + 0]
            label_obj[:, 3] -= feats[:, -2 * state_dim + 3]
        else:
            label_obj[:, 1] -= feats[:, -2 * state_dim + 1]
            label_obj[:, 2] -= feats[:, -2 * state_dim + 2]
        rel = prepare_relations(n_objects)
        rel.append(Ra[:, :-relation_dim])
        '''
        print(rel[-1][0, 0])
        print(rel[-1][0, 1])
        print(rel[-1][0, 2])
        print(rel[-1][2, 3])
        print(rel[-1][2, 4])
        print(rel[-1][2, 5])
        '''

        # print("attr shape", attr.size())
        # print("x shape", x.size())
        # print("label_obj shape", label_obj.size())
        # print("label_rel shape", label_rel.size())
        '''
        for i in range(n_objects):
            print(objs[0][i][1])
            print(label_obj[i, 1])

        time.sleep(10)
        '''

        return attr, x, rel, label_obj, label_rel
Exemple #3
0
    def __getitem__(self, idx):
        #pdb.set_trace()
        n_his = self.args.n_his
        frame_offset = self.args.frame_offset
        idx_video, idx_frame = self.valid_idx[idx][0], self.valid_idx[idx][1]

        objs = []
        attrs = []
        for i in range(idx_frame - n_his * frame_offset,
                       idx_frame + frame_offset + 1, frame_offset):

            frame = self.metadata[idx_video]['proposals']['frames'][i]
            #frame_filename = frame['frame_filename']
            frame_filename = os.path.join(
                'video_' + str(idx_video).zfill(5),
                str(frame['frame_index'] + 1) + '.png')

            objects = frame['objects']
            n_objects = len(objects)

            vid = int(idx_video / 1000)
            ann_full_dir = os.path.join(
                self.data_dir, 'image_%02d000-%02d000' % (vid, vid + 1))
            img = self.loader(os.path.join(ann_full_dir, frame_filename))
            img = np.array(img)[:, :, ::-1].copy()
            img = cv2.resize(img, (self.W, self.H),
                             interpolation=cv2.INTER_AREA).astype(
                                 np.float) / 255.

            ### prepare object inputs
            object_inputs = []
            for j in range(n_objects):
                material = objects[j]['material']
                shape = objects[j]['shape']

                if i == idx_frame - n_his * frame_offset:
                    attrs.append(
                        encode_attr(material, shape, self.bbox_size,
                                    self.args.attr_dim))

                bbox_xyxy, xyhw_exp, crop_box, crop_box_v2 = decode_mask_to_box(objects[j]['mask'],\
                        [self.bbox_size, self.bbox_size], self.H, self.W)
                #img_crop = normalize(crop(img, crop_box, self.H, self.W), 0.5, 0.5).permute(2, 0, 1)
                img_crop = normalize(crop(img, crop_box_v2, self.H, self.W),
                                     0.5, 0.5).permute(2, 0, 1)
                tube_id = utilsTube.get_tube_id_from_bbox(
                    bbox_xyxy, frame['frame_index'],
                    self.metadata[idx_video]['tubes'])
                if tube_id == -1:
                    pdb.set_trace()
                if self.args.box_only_flag:
                    xyhw_norm = (xyhw_exp - 0.5) / 0.5
                    s = torch.cat([xyhw_norm], 0).unsqueeze(0), tube_id
                elif self.args.new_mode == 1:
                    xyhw_norm = (xyhw_exp - 0.5) / 0.5
                    s = torch.cat([xyhw_norm, img_crop],
                                  0).unsqueeze(0), tube_id
                else:
                    s = torch.cat([xyhw_exp, img_crop],
                                  0).unsqueeze(0), tube_id
                object_inputs.append(s)

            objs.append(object_inputs)

        attr = torch.cat(attrs, 0).view(n_objects, self.args.attr_dim,
                                        self.bbox_size, self.bbox_size)

        feats = []
        for x in range(n_objects):
            feats.append(objs[0][x][0])

        for i in range(1, len(objs)):
            for x in range(n_objects):
                for y in range(n_objects):
                    id_x = objs[0][x][1]
                    id_y = objs[i][y][1]
                    if id_x == id_y:
                        feats[x] = torch.cat([feats[x], objs[i][y][0]], 1)

        try:
            feats = torch.cat(feats, 0)
        except:
            print(idx_video, idx_frame)

        #pdb.set_trace()
        ### prepare relation attributes
        n_relations = n_objects * n_objects
        Ra = torch.FloatTensor(
            np.ones((n_relations, self.args.relation_dim *
                     (self.args.n_his + 2), self.bbox_size, self.bbox_size)) *
            -0.5)

        # change to relative position
        relation_dim = self.args.relation_dim
        state_dim = self.args.state_dim
        if self.args.box_only_flag or self.args.new_mode == 1:
            for i in range(n_objects):
                for j in range(n_objects):
                    idx = i * n_objects + j
                    Ra[idx, 1::relation_dim] = feats[i, 0::state_dim] - feats[
                        j, 0::state_dim]  # x
                    Ra[idx, 2::relation_dim] = feats[i, 1::state_dim] - feats[
                        j, 1::state_dim]  # y
        else:
            for i in range(n_objects):
                for j in range(n_objects):
                    idx = i * n_objects + j
                    Ra[idx, 1::relation_dim] = feats[i, 0::state_dim] - feats[
                        j, 0::state_dim]  # x
                    Ra[idx, 2::relation_dim] = feats[i, 1::state_dim] - feats[
                        j, 1::state_dim]  # y
                    Ra[idx, 3::relation_dim] = feats[i, 2::state_dim] - feats[
                        j, 2::state_dim]  # h
                    Ra[idx, 4::relation_dim] = feats[i, 3::state_dim] - feats[
                        j, 3::state_dim]  # w
        label_rel = torch.FloatTensor(
            np.ones((n_objects * n_objects, 1)) * -0.5)
        '''
        ### change absolute pos to relative pos
        feats[:, state_dim+1::state_dim] = \
                feats[:, state_dim+1::state_dim] - feats[:, 1:-state_dim:state_dim]   # x
        feats[:, state_dim+2::state_dim] = \
                feats[:, state_dim+2::state_dim] - feats[:, 2:-state_dim:state_dim]   # y
        feats[:, 1] = 0
        feats[:, 2] = 0
        '''
        #pdb.set_trace()
        x = feats[:, :-state_dim]
        label_obj = feats[:, -state_dim:]
        label_obj[:, 0] -= feats[:, -2 * state_dim + 0]
        label_obj[:, 1] -= feats[:, -2 * state_dim + 1]
        label_obj[:, 2] -= feats[:, -2 * state_dim + 2]
        label_obj[:, 3] -= feats[:, -2 * state_dim + 3]
        rel = prepare_relations(n_objects)
        rel.append(Ra[:, :-relation_dim])
        '''
        print(rel[-1][0, 0])
        print(rel[-1][0, 1])
        print(rel[-1][0, 2])
        print(rel[-1][2, 3])
        print(rel[-1][2, 4])
        print(rel[-1][2, 5])
        '''

        # print("attr shape", attr.size())
        # print("x shape", x.size())
        # print("label_obj shape", label_obj.size())
        # print("label_rel shape", label_rel.size())
        '''
        for i in range(n_objects):
            print(objs[0][i][1])
            print(label_obj[i, 1])

        time.sleep(10)
        '''

        return attr, x, rel, label_obj, label_rel