def process_frame(self, changed_object_id=None):
        self.event = self.env.last_event
        self.pose = game_util.get_pose(self.event)

        self.s_t_orig = self.event.frame
        self.s_t = game_util.imresize(self.event.frame,
                                      (constants.SCREEN_HEIGHT, constants.SCREEN_WIDTH), rescale=False)

        self.s_t_depth = game_util.imresize(self.event.depth_frame,
                                            (constants.SCREEN_HEIGHT, constants.SCREEN_WIDTH), rescale=False)
コード例 #2
0
    def process_frame(self, run_object_detection=False):
        self.im_count += 1
        #print ("pose b4 manipulation", self.event.pose)
        self.pose = game_util.get_pose(self.event)
        #self.pose,pose_2 = game_util.get_pose(self.event)
        #print ("pose after manipulation", self.pose)
        #print ("pose after own  manipulation", pose_2)
        #print ()
        #for key,items in self.event.metadata.items():
        #print (len(self.event.events))
        i = 0
        #for key,value in self.event.__dict__.items():
        #    if key == "frame" :
        #        print (key,type(value),len(value))#value)
        #        break
        #    i += 1

        return

        #print ("++++++ B$ function call")
        self.s_t_orig = self.event.frame
        self.s_t = game_util.imresize(
            self.event.frame,
            (constants.SCREEN_HEIGHT, constants.SCREEN_WIDTH),
            rescale=False)
        #print ("========== after function call")
        #print ("size of s_t", len(self.s_t))
        #print ("type of s_t", type(self.s_t))
        #return#
        #print ("predict depth , drawing ", constants.PREDICT_DEPTH,constants.DRAWING)
        if constants.DRAWING:
            self.detection_image = self.s_t_orig.copy()
        if constants.PREDICT_DEPTH:
            print("in predict depth")
            t_start = time.time()
            self.s_t_depth = self.depth_estimator.get_depth(self.s_t)
            self.times[0, 0] += time.time() - t_start
            self.times[0, 1] += 1
            if self.times[0, 1] % 100 == 0:
                print('depth time  %.3f' %
                      (self.times[0, 0] / self.times[0, 1]))
        elif constants.RENDER_DEPTH_IMAGE:
            self.s_t_depth = game_util.imresize(
                self.event.depth_frame,
                (constants.SCREEN_HEIGHT, constants.SCREEN_WIDTH),
                rescale=False)

        if (constants.GT_OBJECT_DETECTION or constants.OBJECT_DETECTION
                or (constants.END_TO_END_BASELINE
                    and constants.USE_OBJECT_DETECTION_AS_INPUT)
                and not run_object_detection):
            if constants.OBJECT_DETECTION and not run_object_detection:
                # Get detections.

                t_start = time.time()
                boxes, scores, class_names = self.object_detector.detect(
                    game_util.imresize(self.event.frame, (608, 608),
                                       rescale=False))
                self.times[1, 0] += time.time() - t_start
                self.times[1, 1] += 1
                if self.times[1, 1] % 100 == 0:
                    print('detection time %.3f' %
                          (self.times[1, 0] / self.times[1, 1]))
                mask_dict = {}
                used_inds = []
                inds = list(range(len(boxes)))
                for (ii, box, score,
                     class_name) in zip(inds, boxes, scores, class_names):
                    if class_name in constants.OBJECT_CLASS_TO_ID:
                        if class_name not in mask_dict:
                            mask_dict[class_name] = np.zeros(
                                (constants.SCREEN_HEIGHT,
                                 constants.SCREEN_WIDTH),
                                dtype=np.float32)
                        mask_dict[class_name][box[1]:box[3] + 1,
                                              box[0]:box[2] + 1] += score
                        used_inds.append(ii)
                mask_dict = {k: np.minimum(v, 1) for k, v in mask_dict.items()}
                used_inds = np.array(used_inds)
                if len(used_inds) > 0:
                    boxes = boxes[used_inds]
                    scores = scores[used_inds]
                    class_names = class_names[used_inds]
                else:
                    boxes = np.zeros((0, 4))
                    scores = np.zeros(0)
                    class_names = np.zeros(0)
                masks = [mask_dict[class_name] for class_name in class_names]

                if constants.END_TO_END_BASELINE:
                    self.detection_mask_image = np.zeros(
                        (constants.SCREEN_HEIGHT, constants.SCREEN_WIDTH,
                         len(constants.OBJECTS)),
                        dtype=np.float32)
                    for cls in constants.OBJECTS:
                        if cls not in mask_dict:
                            continue
                        self.detection_mask_image[:, :,
                                                  constants.OBJECT_CLASS_TO_ID[
                                                      cls]] = mask_dict[cls]

            else:
                scores = []
                class_names = []
                masks = []
                for (k, v) in self.event.class_masks.items():
                    if k in constants.OBJECT_CLASS_TO_ID and len(v) > 0:
                        scores.append(1)
                        class_names.append(k)
                        masks.append(v)

                if constants.END_TO_END_BASELINE:
                    self.detection_mask_image = np.zeros(
                        (constants.SCREEN_HEIGHT, constants.SCREEN_WIDTH,
                         constants.NUM_CLASSES),
                        dtype=np.uint8)
                    for cls in constants.OBJECTS:
                        if cls not in self.event.class_detections2D:
                            continue
                        for box in self.event.class_detections2D[cls]:
                            self.detection_mask_image[
                                box[1]:box[3] + 1, box[0]:box[2] + 1,
                                constants.OBJECT_CLASS_TO_ID[cls]] = 1

            if constants.RENDER_DEPTH_IMAGE or constants.PREDICT_DEPTH:
                xzy = game_util.depth_to_world_coordinates(
                    self.s_t_depth, self.pose,
                    self.camera_height / constants.AGENT_STEP_SIZE)
                max_depth_mask = self.s_t_depth >= constants.MAX_DEPTH
                for ii in range(len(masks)):
                    mask = masks[ii]
                    mask_locs = (mask > 0)
                    locations = xzy[mask_locs, :2]
                    max_depth_locs = max_depth_mask[mask_locs]
                    depth_locs = np.logical_not(max_depth_locs)
                    locations = locations[depth_locs]
                    score = mask[mask_locs]
                    score = score[depth_locs]
                    # remove outliers:
                    locations = locations.reshape(-1, 2)

                    locations = np.round(locations).astype(np.int32)
                    locations -= np.array(self.bounds)[[0, 1]]
                    locations[:, 0] = np.clip(locations[:, 0], 0,
                                              self.bounds[2] - 1)
                    locations[:, 1] = np.clip(locations[:, 1], 0,
                                              self.bounds[3] - 1)
                    locations, unique_inds = game_util.unique_rows(
                        locations, return_index=True)
                    score = score[unique_inds]

                    curr_score = self.graph.memory[
                        locations[:, 1], locations[:, 0],
                        constants.OBJECT_CLASS_TO_ID[class_names[ii]] + 1]

                    avg_locs = np.logical_and(curr_score > 0, curr_score < 1)
                    curr_score[avg_locs] = curr_score[avg_locs] * .5 + score[
                        avg_locs] * .5
                    curr_score[curr_score == 0] = score[curr_score == 0]
                    self.graph.memory[
                        locations[:, 1], locations[:, 0],
                        constants.OBJECT_CLASS_TO_ID[class_names[ii]] +
                        1] = curr_score

                    # inverse marked as empty
                    locations = xzy[np.logical_not(mask_locs), :2]
                    max_depth_locs = max_depth_mask[np.logical_not(mask_locs)]
                    depth_locs = np.logical_not(max_depth_locs)
                    locations = locations[depth_locs]
                    locations = locations.reshape(-1, 2)
                    locations = np.round(locations).astype(np.int32)
                    locations[:,
                              0] = np.clip(locations[:, 0], self.bounds[0],
                                           self.bounds[0] + self.bounds[2] - 1)
                    locations[:,
                              1] = np.clip(locations[:, 1], self.bounds[1],
                                           self.bounds[1] + self.bounds[3] - 1)
                    locations = game_util.unique_rows(locations)
                    locations -= np.array(self.bounds)[[0, 1]]
                    curr_score = self.graph.memory[
                        locations[:, 1], locations[:, 0],
                        constants.OBJECT_CLASS_TO_ID[class_names[ii]] + 1]
                    replace_locs = np.logical_and(curr_score > 0,
                                                  curr_score < 1)
                    curr_score[replace_locs] = curr_score[replace_locs] * .8
                    self.graph.memory[
                        locations[:, 1], locations[:, 0],
                        constants.OBJECT_CLASS_TO_ID[class_names[ii]] +
                        1] = curr_score
            if constants.DRAWING:
                if constants.GT_OBJECT_DETECTION:
                    boxes = []
                    scores = []
                    class_names = []
                    for k, v in self.event.class_detections2D.items():
                        if k in constants.OBJECT_CLASS_TO_ID and len(v) > 0:
                            boxes.extend(v)
                            scores.extend([1] * len(v))
                            class_names.extend([k] * len(v))
                boxes = np.array(boxes)
                scores = np.array(scores)
                self.detection_image = detector.visualize_detections(
                    self.event.frame, boxes, class_names, scores)