class LVISEvaluator(DatasetEvaluator):
    """
    Evaluate object proposal and instance detection/segmentation outputs using
    LVIS's metrics and evaluation API.
    """
    def __init__(self, dataset_name, cfg, distributed, output_dir=None):
        """
        Args:
            dataset_name (str): name of the dataset to be evaluated.
                It must have the following corresponding metadata:
                    "json_file": the path to the LVIS format annotation
            cfg (CfgNode): config instance
            distributed (True): if True, will collect results from all ranks for evaluation.
                Otherwise, will evaluate the results in the current process.
            output_dir (str): optional, an output directory to dump results.
        """
        from lvis import LVIS

        self._tasks = self._tasks_from_config(cfg)
        self._distributed = distributed
        self._output_dir = output_dir

        self._cpu_device = torch.device("cpu")
        self._logger = logging.getLogger(__name__)

        self._metadata = MetadataCatalog.get(dataset_name)
        json_file = PathManager.get_local_path(self._metadata.json_file)
        self._lvis_api = LVIS(json_file)
        # Test set json files do not contain annotations (evaluation must be
        # performed using the LVIS evaluation server).
        self._do_evaluation = len(self._lvis_api.get_ann_ids()) > 0

    def reset(self):
        self._predictions = []
        self._lvis_results = []

    def _tasks_from_config(self, cfg):
        """
        Returns:
            tuple[str]: tasks that can be evaluated under the given configuration.
        """
        tasks = ("bbox", )
        if cfg.MODEL.MASK_ON:
            tasks = tasks + ("segm", )
        return tasks

    def process(self, inputs, outputs):
        """
        Args:
            inputs: the inputs to a LVIS model (e.g., GeneralizedRCNN).
                It is a list of dict. Each dict corresponds to an image and
                contains keys like "height", "width", "file_name", "image_id".
            outputs: the outputs of a LVIS model. It is a list of dicts with key
                "instances" that contains :class:`Instances`.
        """
        for input, output in zip(inputs, outputs):
            prediction = {"image_id": input["image_id"]}

            # TODO this is ugly
            if "instances" in output:
                instances = output["instances"].to(self._cpu_device)

                if instances.has("pred_masks"):
                    # use RLE to encode the masks, because they are too large and takes memory
                    # since this evaluator stores outputs of the entire dataset
                    rles = [
                        mask_util.encode(np.array(mask[:, :, None],
                                                  order="F"))[0]
                        for mask in instances.pred_masks
                    ]
                    for rle in rles:
                        # "counts" is an array encoded by mask_util as a byte-stream. Python3's
                        # json writer which always produces strings cannot serialize a bytestream
                        # unless you decode it. Thankfully, utf-8 works out (which is also what
                        # the pycocotools/_mask.pyx does).
                        rle["counts"] = rle["counts"].decode("utf-8")
                    instances.pred_masks_rle = rles
                    instances.remove("pred_masks")

                prediction["instances"] = instances_to_json(
                    instances, input["image_id"])
            if "proposals" in output:
                prediction["proposals"] = output["proposals"].to(
                    self._cpu_device)
            self._predictions.append(prediction)

    def evaluate(self):
        if self._distributed:
            comm.synchronize()
            self._predictions = comm.gather(self._predictions, dst=0)
            self._predictions = list(itertools.chain(*self._predictions))

            if not comm.is_main_process():
                return

        if len(self._predictions) == 0:
            self._logger.warning(
                "[LVISEvaluator] Did not receive valid predictions.")
            return {}

        if self._output_dir:
            PathManager.mkdirs(self._output_dir)
            file_path = os.path.join(self._output_dir,
                                     "instances_predictions.pth")
            with PathManager.open(file_path, "wb") as f:
                torch.save(self._predictions, f)

        self._results = OrderedDict()
        if "proposals" in self._predictions[0]:
            self._eval_box_proposals()
        if "instances" in self._predictions[0]:
            self._eval_predictions(set(self._tasks))
        # Copy so the caller can do whatever with results
        return copy.deepcopy(self._results)

    def _eval_predictions(self, tasks):
        """
        Evaluate self._predictions on the given tasks.
        Fill self._results with the metrics of the tasks.
        """
        self._logger.info("Preparing results in the LVIS format ...")
        self._lvis_results = list(
            itertools.chain(*[x["instances"] for x in self._predictions]))

        # unmap the category ids for LVIS (from 0-indexed to 1-indexed)
        for result in self._lvis_results:
            result["category_id"] += 1

        if self._output_dir:
            file_path = os.path.join(self._output_dir,
                                     "lvis_instances_results.json")
            self._logger.info("Saving results to {}".format(file_path))
            with PathManager.open(file_path, "w") as f:
                f.write(json.dumps(self._lvis_results))
                f.flush()

        if not self._do_evaluation:
            self._logger.info("Annotations are not available for evaluation.")
            return

        self._logger.info("Evaluating predictions ...")
        for task in sorted(tasks):
            res = _evaluate_predictions_on_lvis(
                self._lvis_api,
                self._lvis_results,
                task,
                class_names=self._metadata.get("thing_classes"),
            )
            self._results[task] = res

    def _eval_box_proposals(self):
        """
        Evaluate the box proposals in self._predictions.
        Fill self._results with the metrics for "box_proposals" task.
        """
        if self._output_dir:
            # Saving generated box proposals to file.
            # Predicted box_proposals are in XYXY_ABS mode.
            bbox_mode = BoxMode.XYXY_ABS.value
            ids, boxes, objectness_logits = [], [], []
            for prediction in self._predictions:
                ids.append(prediction["image_id"])
                boxes.append(
                    prediction["proposals"].proposal_boxes.tensor.numpy())
                objectness_logits.append(
                    prediction["proposals"].objectness_logits.numpy())

            proposal_data = {
                "boxes": boxes,
                "objectness_logits": objectness_logits,
                "ids": ids,
                "bbox_mode": bbox_mode,
            }
            with PathManager.open(
                    os.path.join(self._output_dir, "box_proposals.pkl"),
                    "wb") as f:
                pickle.dump(proposal_data, f)

        if not self._do_evaluation:
            self._logger.info("Annotations are not available for evaluation.")
            return

        self._logger.info("Evaluating bbox proposals ...")
        res = {}
        areas = {"all": "", "small": "s", "medium": "m", "large": "l"}
        for limit in [100, 1000]:
            for area, suffix in areas.items():
                stats = _evaluate_box_proposals(self._predictions,
                                                self._lvis_api,
                                                area=area,
                                                limit=limit)
                key = "AR{}@{:d}".format(suffix, limit)
                res[key] = float(stats["ar"].item() * 100)
        self._logger.info("Proposal metrics: \n" + create_small_table(res))
        self._results["box_proposals"] = res
Exemple #2
0
class LVISDataset(CocoDataset):

    CLASSES = (
        'acorn', 'aerosol_can', 'air_conditioner', 'airplane', 'alarm_clock',
        'alcohol', 'alligator', 'almond', 'ambulance', 'amplifier', 'anklet',
        'antenna', 'apple', 'apple_juice', 'applesauce', 'apricot', 'apron',
        'aquarium', 'armband', 'armchair', 'armoire', 'armor', 'artichoke',
        'trash_can', 'ashtray', 'asparagus', 'atomizer', 'avocado', 'award',
        'awning', 'ax', 'baby_buggy', 'basketball_backboard', 'backpack',
        'handbag', 'suitcase', 'bagel', 'bagpipe', 'baguet', 'bait', 'ball',
        'ballet_skirt', 'balloon', 'bamboo', 'banana', 'Band_Aid', 'bandage',
        'bandanna', 'banjo', 'banner', 'barbell', 'barge', 'barrel',
        'barrette', 'barrow', 'baseball_base', 'baseball', 'baseball_bat',
        'baseball_cap', 'baseball_glove', 'basket', 'basketball_hoop',
        'basketball', 'bass_horn', 'bat_(animal)', 'bath_mat', 'bath_towel',
        'bathrobe', 'bathtub', 'batter_(food)', 'battery', 'beachball', 'bead',
        'beaker', 'bean_curd', 'beanbag', 'beanie', 'bear', 'bed',
        'bedspread', 'cow', 'beef_(food)', 'beeper', 'beer_bottle', 'beer_can',
        'beetle', 'bell', 'bell_pepper', 'belt', 'belt_buckle', 'bench',
        'beret', 'bib', 'Bible', 'bicycle', 'visor', 'binder', 'binoculars',
        'bird', 'birdfeeder', 'birdbath', 'birdcage', 'birdhouse',
        'birthday_cake', 'birthday_card', 'biscuit_(bread)', 'pirate_flag',
        'black_sheep', 'blackboard', 'blanket', 'blazer', 'blender', 'blimp',
        'blinker', 'blueberry', 'boar', 'gameboard', 'boat', 'bobbin',
        'bobby_pin', 'boiled_egg', 'bolo_tie', 'deadbolt', 'bolt', 'bonnet',
        'book', 'book_bag', 'bookcase', 'booklet', 'bookmark',
        'boom_microphone', 'boot', 'bottle', 'bottle_opener', 'bouquet',
        'bow_(weapon)', 'bow_(decorative_ribbons)', 'bow-tie', 'bowl',
        'pipe_bowl', 'bowler_hat', 'bowling_ball', 'bowling_pin',
        'boxing_glove', 'suspenders', 'bracelet', 'brass_plaque', 'brassiere',
        'bread-bin', 'breechcloth', 'bridal_gown', 'briefcase',
        'bristle_brush', 'broccoli', 'broach', 'broom', 'brownie',
        'brussels_sprouts', 'bubble_gum', 'bucket', 'horse_buggy', 'bull',
        'bulldog', 'bulldozer', 'bullet_train', 'bulletin_board',
        'bulletproof_vest', 'bullhorn', 'corned_beef', 'bun', 'bunk_bed',
        'buoy', 'burrito', 'bus_(vehicle)', 'business_card', 'butcher_knife',
        'butter', 'butterfly', 'button', 'cab_(taxi)', 'cabana', 'cabin_car',
        'cabinet', 'locker', 'cake', 'calculator', 'calendar', 'calf',
        'camcorder', 'camel', 'camera', 'camera_lens', 'camper_(vehicle)',
        'can', 'can_opener', 'candelabrum', 'candle', 'candle_holder',
        'candy_bar', 'candy_cane', 'walking_cane', 'canister', 'cannon',
        'canoe', 'cantaloup', 'canteen', 'cap_(headwear)', 'bottle_cap',
        'cape', 'cappuccino', 'car_(automobile)', 'railcar_(part_of_a_train)',
        'elevator_car', 'car_battery', 'identity_card', 'card', 'cardigan',
        'cargo_ship', 'carnation', 'horse_carriage', 'carrot', 'tote_bag',
        'cart', 'carton', 'cash_register', 'casserole', 'cassette', 'cast',
        'cat', 'cauliflower', 'caviar', 'cayenne_(spice)', 'CD_player',
        'celery', 'cellular_telephone', 'chain_mail', 'chair', 'chaise_longue',
        'champagne', 'chandelier', 'chap', 'checkbook', 'checkerboard',
        'cherry', 'chessboard', 'chest_of_drawers_(furniture)',
        'chicken_(animal)', 'chicken_wire', 'chickpea', 'Chihuahua',
        'chili_(vegetable)', 'chime', 'chinaware', 'crisp_(potato_chip)',
        'poker_chip', 'chocolate_bar', 'chocolate_cake', 'chocolate_milk',
        'chocolate_mousse', 'choker', 'chopping_board', 'chopstick',
        'Christmas_tree', 'slide', 'cider', 'cigar_box', 'cigarette',
        'cigarette_case', 'cistern', 'clarinet', 'clasp', 'cleansing_agent',
        'clementine', 'clip', 'clipboard', 'clock', 'clock_tower',
        'clothes_hamper', 'clothespin', 'clutch_bag', 'coaster', 'coat',
        'coat_hanger', 'coatrack', 'c**k', 'coconut', 'coffee_filter',
        'coffee_maker', 'coffee_table', 'coffeepot', 'coil', 'coin',
        'colander', 'coleslaw', 'coloring_material', 'combination_lock',
        'pacifier', 'comic_book', 'computer_keyboard', 'concrete_mixer',
        'cone', 'control', 'convertible_(automobile)', 'sofa_bed', 'cookie',
        'cookie_jar', 'cooking_utensil', 'cooler_(for_food)',
        'cork_(bottle_plug)', 'corkboard', 'corkscrew', 'edible_corn',
        'cornbread', 'cornet', 'cornice', 'cornmeal', 'corset',
        'romaine_lettuce', 'costume', 'cougar', 'coverall', 'cowbell',
        'cowboy_hat', 'crab_(animal)', 'cracker', 'crape', 'crate', 'crayon',
        'cream_pitcher', 'credit_card', 'crescent_roll', 'crib', 'crock_pot',
        'crossbar', 'crouton', 'crow', 'crown', 'crucifix', 'cruise_ship',
        'police_cruiser', 'crumb', 'crutch', 'cub_(animal)', 'cube',
        'cucumber', 'cufflink', 'cup', 'trophy_cup', 'cupcake', 'hair_curler',
        'curling_iron', 'curtain', 'cushion', 'custard', 'cutting_tool',
        'cylinder', 'cymbal', 'dachshund', 'dagger', 'dartboard',
        'date_(fruit)', 'deck_chair', 'deer', 'dental_floss', 'desk',
        'detergent', 'diaper', 'diary', 'die', 'dinghy', 'dining_table', 'tux',
        'dish', 'dish_antenna', 'dishrag', 'dishtowel', 'dishwasher',
        'dishwasher_detergent', 'diskette', 'dispenser', 'Dixie_cup', 'dog',
        'dog_collar', 'doll', 'dollar', 'dolphin', 'domestic_ass', 'eye_mask',
        'doorbell', 'doorknob', 'doormat', 'doughnut', 'dove', 'dragonfly',
        'drawer', 'underdrawers', 'dress', 'dress_hat', 'dress_suit',
        'dresser', 'drill', 'drinking_fountain', 'drone', 'dropper',
        'drum_(musical_instrument)', 'drumstick', 'duck', 'duckling',
        'duct_tape', 'duffel_bag', 'dumbbell', 'dumpster', 'dustpan',
        'Dutch_oven', 'eagle', 'earphone', 'earplug', 'earring', 'easel',
        'eclair', 'eel', 'egg', 'egg_roll', 'egg_yolk', 'eggbeater',
        'eggplant', 'electric_chair', 'refrigerator', 'elephant', 'elk',
        'envelope', 'eraser', 'escargot', 'eyepatch', 'falcon', 'fan',
        'faucet', 'fedora', 'ferret', 'Ferris_wheel', 'ferry', 'fig_(fruit)',
        'fighter_jet', 'figurine', 'file_cabinet', 'file_(tool)', 'fire_alarm',
        'fire_engine', 'fire_extinguisher', 'fire_hose', 'fireplace',
        'fireplug', 'fish', 'fish_(food)', 'fishbowl', 'fishing_boat',
        'fishing_rod', 'flag', 'flagpole', 'flamingo', 'flannel', 'flash',
        'flashlight', 'fleece', 'flip-flop_(sandal)', 'flipper_(footwear)',
        'flower_arrangement', 'flute_glass', 'foal', 'folding_chair',
        'food_processor', 'football_(American)', 'football_helmet',
        'footstool', 'fork', 'forklift', 'freight_car', 'French_toast',
        'freshener', 'frisbee', 'frog', 'fruit_juice', 'fruit_salad',
        'frying_pan', 'fudge', 'funnel', 'futon', 'gag', 'garbage',
        'garbage_truck', 'garden_hose', 'gargle', 'gargoyle', 'garlic',
        'gasmask', 'gazelle', 'gelatin', 'gemstone', 'giant_panda',
        'gift_wrap', 'ginger', 'giraffe', 'cincture',
        'glass_(drink_container)', 'globe', 'glove', 'goat', 'goggles',
        'goldfish', 'golf_club', 'golfcart', 'gondola_(boat)', 'goose',
        'gorilla', 'gourd', 'surgical_gown', 'grape', 'grasshopper', 'grater',
        'gravestone', 'gravy_boat', 'green_bean', 'green_onion', 'griddle',
        'grillroom', 'grinder_(tool)', 'grits', 'grizzly', 'grocery_bag',
        'guacamole', 'guitar', 'gull', 'gun', 'hair_spray', 'hairbrush',
        'hairnet', 'hairpin', 'ham', 'hamburger', 'hammer', 'hammock',
        'hamper', 'hamster', 'hair_dryer', 'hand_glass', 'hand_towel',
        'handcart', 'handcuff', 'handkerchief', 'handle', 'handsaw',
        'hardback_book', 'harmonium', 'hat', 'hatbox', 'hatch', 'veil',
        'headband', 'headboard', 'headlight', 'headscarf', 'headset',
        'headstall_(for_horses)', 'hearing_aid', 'heart', 'heater',
        'helicopter', 'helmet', 'heron', 'highchair', 'hinge', 'hippopotamus',
        'hockey_stick', 'hog', 'home_plate_(baseball)', 'honey', 'fume_hood',
        'hook', 'horse', 'hose', 'hot-air_balloon', 'hotplate', 'hot_sauce',
        'hourglass', 'houseboat', 'hummingbird', 'hummus', 'polar_bear',
        'icecream', 'popsicle', 'ice_maker', 'ice_pack', 'ice_skate',
        'ice_tea', 'igniter', 'incense', 'inhaler', 'iPod',
        'iron_(for_clothing)', 'ironing_board', 'jacket', 'jam', 'jean',
        'jeep', 'jelly_bean', 'jersey', 'jet_plane', 'jewelry', 'joystick',
        'jumpsuit', 'kayak', 'keg', 'kennel', 'kettle', 'key', 'keycard',
        'kilt', 'kimono', 'kitchen_sink', 'kitchen_table', 'kite', 'kitten',
        'kiwi_fruit', 'knee_pad', 'knife', 'knight_(chess_piece)',
        'knitting_needle', 'knob', 'knocker_(on_a_door)', 'koala', 'lab_coat',
        'ladder', 'ladle', 'ladybug', 'lamb_(animal)', 'lamb-chop', 'lamp',
        'lamppost', 'lampshade', 'lantern', 'lanyard', 'laptop_computer',
        'lasagna', 'latch', 'lawn_mower', 'leather', 'legging_(clothing)',
        'Lego', 'lemon', 'lemonade', 'lettuce', 'license_plate', 'life_buoy',
        'life_jacket', 'lightbulb', 'lightning_rod', 'lime', 'limousine',
        'linen_paper', 'lion', 'lip_balm', 'lipstick', 'liquor', 'lizard',
        'Loafer_(type_of_shoe)', 'log', 'lollipop', 'lotion',
        'speaker_(stero_equipment)', 'loveseat', 'machine_gun', 'magazine',
        'magnet', 'mail_slot', 'mailbox_(at_home)', 'mallet', 'mammoth',
        'mandarin_orange', 'manger', 'manhole', 'map', 'marker', 'martini',
        'mascot', 'mashed_potato', 'masher', 'mask', 'mast',
        'mat_(gym_equipment)', 'matchbox', 'mattress', 'measuring_cup',
        'measuring_stick', 'meatball', 'medicine', 'melon', 'microphone',
        'microscope', 'microwave_oven', 'milestone', 'milk', 'minivan',
        'mint_candy', 'mirror', 'mitten', 'mixer_(kitchen_tool)', 'money',
        'monitor_(computer_equipment) computer_monitor', 'monkey', 'motor',
        'motor_scooter', 'motor_vehicle', 'motorboat', 'motorcycle',
        'mound_(baseball)', 'mouse_(animal_rodent)',
        'mouse_(computer_equipment)', 'mousepad', 'muffin', 'mug', 'mushroom',
        'music_stool', 'musical_instrument', 'nailfile', 'nameplate', 'napkin',
        'neckerchief', 'necklace', 'necktie', 'needle', 'nest', 'newsstand',
        'nightshirt', 'nosebag_(for_animals)', 'noseband_(for_animals)',
        'notebook', 'notepad', 'nut', 'nutcracker', 'oar', 'octopus_(food)',
        'octopus_(animal)', 'oil_lamp', 'olive_oil', 'omelet', 'onion',
        'orange_(fruit)', 'orange_juice', 'oregano', 'ostrich', 'ottoman',
        'overalls_(clothing)', 'owl', 'packet', 'inkpad', 'pad', 'paddle',
        'padlock', 'paintbox', 'paintbrush', 'painting', 'pajamas', 'palette',
        'pan_(for_cooking)', 'pan_(metal_container)', 'pancake', 'pantyhose',
        'papaya', 'paperclip', 'paper_plate', 'paper_towel', 'paperback_book',
        'paperweight', 'parachute', 'parakeet', 'parasail_(sports)',
        'parchment', 'parka', 'parking_meter', 'parrot',
        'passenger_car_(part_of_a_train)', 'passenger_ship', 'passport',
        'pastry', 'patty_(food)', 'pea_(food)', 'peach', 'peanut_butter',
        'pear', 'peeler_(tool_for_fruit_and_vegetables)', 'pegboard',
        'pelican', 'pen', 'pencil', 'pencil_box', 'pencil_sharpener',
        'pendulum', 'penguin', 'pennant', 'penny_(coin)', 'pepper',
        'pepper_mill', 'perfume', 'persimmon', 'baby', 'pet', 'petfood',
        'pew_(church_bench)', 'phonebook', 'phonograph_record', 'piano',
        'pickle', 'pickup_truck', 'pie', 'pigeon', 'piggy_bank', 'pillow',
        'pin_(non_jewelry)', 'pineapple', 'pinecone', 'ping-pong_ball',
        'pinwheel', 'tobacco_pipe', 'pipe', 'pistol', 'pita_(bread)',
        'pitcher_(vessel_for_liquid)', 'pitchfork', 'pizza', 'place_mat',
        'plate', 'platter', 'playing_card', 'playpen', 'pliers',
        'plow_(farm_equipment)', 'pocket_watch', 'pocketknife',
        'poker_(fire_stirring_tool)', 'pole', 'police_van', 'polo_shirt',
        'poncho', 'pony', 'pool_table', 'pop_(soda)', 'portrait',
        'postbox_(public)', 'postcard', 'poster', 'pot', 'flowerpot', 'potato',
        'potholder', 'pottery', 'pouch', 'power_shovel', 'prawn', 'printer',
        'projectile_(weapon)', 'projector', 'propeller', 'prune', 'pudding',
        'puffer_(fish)', 'puffin', 'pug-dog', 'pumpkin', 'puncher', 'puppet',
        'puppy', 'quesadilla', 'quiche', 'quilt', 'rabbit', 'race_car',
        'racket', 'radar', 'radiator', 'radio_receiver', 'radish', 'raft',
        'rag_doll', 'raincoat', 'ram_(animal)', 'raspberry', 'rat',
        'razorblade', 'reamer_(juicer)', 'rearview_mirror', 'receipt',
        'recliner', 'record_player', 'red_cabbage', 'reflector',
        'remote_control', 'rhinoceros', 'rib_(food)', 'rifle', 'ring',
        'river_boat', 'road_map', 'robe', 'rocking_chair', 'roller_skate',
        'Rollerblade', 'rolling_pin', 'root_beer',
        'router_(computer_equipment)', 'rubber_band', 'runner_(carpet)',
        'plastic_bag', 'saddle_(on_an_animal)', 'saddle_blanket', 'saddlebag',
        'safety_pin', 'sail', 'salad', 'salad_plate', 'salami',
        'salmon_(fish)', 'salmon_(food)', 'salsa', 'saltshaker',
        'sandal_(type_of_shoe)', 'sandwich', 'satchel', 'saucepan', 'saucer',
        'sausage', 'sawhorse', 'saxophone', 'scale_(measuring_instrument)',
        'scarecrow', 'scarf', 'school_bus', 'scissors', 'scoreboard',
        'scrambled_eggs', 'scraper', 'scratcher', 'screwdriver',
        'scrubbing_brush', 'sculpture', 'seabird', 'seahorse', 'seaplane',
        'seashell', 'seedling', 'serving_dish', 'sewing_machine', 'shaker',
        'shampoo', 'shark', 'sharpener', 'Sharpie', 'shaver_(electric)',
        'shaving_cream', 'shawl', 'shears', 'sheep', 'shepherd_dog',
        'sherbert', 'shield', 'shirt', 'shoe', 'shopping_bag', 'shopping_cart',
        'short_pants', 'shot_glass', 'shoulder_bag', 'shovel', 'shower_head',
        'shower_curtain', 'shredder_(for_paper)', 'sieve', 'signboard', 'silo',
        'sink', 'skateboard', 'skewer', 'ski', 'ski_boot', 'ski_parka',
        'ski_pole', 'skirt', 'sled', 'sleeping_bag', 'sling_(bandage)',
        'slipper_(footwear)', 'smoothie', 'snake', 'snowboard', 'snowman',
        'snowmobile', 'soap', 'soccer_ball', 'sock', 'soda_fountain',
        'carbonated_water', 'sofa', 'softball', 'solar_array', 'sombrero',
        'soup', 'soup_bowl', 'soupspoon', 'sour_cream', 'soya_milk',
        'space_shuttle', 'sparkler_(fireworks)', 'spatula', 'spear',
        'spectacles', 'spice_rack', 'spider', 'sponge', 'spoon', 'sportswear',
        'spotlight', 'squirrel', 'stapler_(stapling_machine)', 'starfish',
        'statue_(sculpture)', 'steak_(food)', 'steak_knife',
        'steamer_(kitchen_appliance)', 'steering_wheel', 'stencil',
        'stepladder', 'step_stool', 'stereo_(sound_system)', 'stew', 'stirrer',
        'stirrup', 'stockings_(leg_wear)', 'stool', 'stop_sign', 'brake_light',
        'stove', 'strainer', 'strap', 'straw_(for_drinking)', 'strawberry',
        'street_sign', 'streetlight', 'string_cheese', 'stylus', 'subwoofer',
        'sugar_bowl', 'sugarcane_(plant)', 'suit_(clothing)', 'sunflower',
        'sunglasses', 'sunhat', 'sunscreen', 'surfboard', 'sushi', 'mop',
        'sweat_pants', 'sweatband', 'sweater', 'sweatshirt', 'sweet_potato',
        'swimsuit', 'sword', 'syringe', 'Tabasco_sauce', 'table-tennis_table',
        'table', 'table_lamp', 'tablecloth', 'tachometer', 'taco', 'tag',
        'taillight', 'tambourine', 'army_tank', 'tank_(storage_vessel)',
        'tank_top_(clothing)', 'tape_(sticky_cloth_or_paper)', 'tape_measure',
        'tapestry', 'tarp', 'tartan', 'tassel', 'tea_bag', 'teacup',
        'teakettle', 'teapot', 'teddy_bear', 'telephone', 'telephone_booth',
        'telephone_pole', 'telephoto_lens', 'television_camera',
        'television_set', 'tennis_ball', 'tennis_racket', 'tequila',
        'thermometer', 'thermos_bottle', 'thermostat', 'thimble', 'thread',
        'thumbtack', 'tiara', 'tiger', 'tights_(clothing)', 'timer', 'tinfoil',
        'tinsel', 'tissue_paper', 'toast_(food)', 'toaster', 'toaster_oven',
        'toilet', 'toilet_tissue', 'tomato', 'tongs', 'toolbox', 'toothbrush',
        'toothpaste', 'toothpick', 'cover', 'tortilla', 'tow_truck', 'towel',
        'towel_rack', 'toy', 'tractor_(farm_equipment)', 'traffic_light',
        'dirt_bike', 'trailer_truck', 'train_(railroad_vehicle)', 'trampoline',
        'tray', 'tree_house', 'trench_coat', 'triangle_(musical_instrument)',
        'tricycle', 'tripod', 'trousers', 'truck', 'truffle_(chocolate)',
        'trunk', 'vat', 'turban', 'turkey_(bird)', 'turkey_(food)', 'turnip',
        'turtle', 'turtleneck_(clothing)', 'typewriter', 'umbrella',
        'underwear', 'unicycle', 'urinal', 'urn', 'vacuum_cleaner', 'valve',
        'vase', 'vending_machine', 'vent', 'videotape', 'vinegar', 'violin',
        'vodka', 'volleyball', 'vulture', 'waffle', 'waffle_iron', 'wagon',
        'wagon_wheel', 'walking_stick', 'wall_clock', 'wall_socket', 'wallet',
        'walrus', 'wardrobe', 'wasabi', 'automatic_washer', 'watch',
        'water_bottle', 'water_cooler', 'water_faucet', 'water_filter',
        'water_heater', 'water_jug', 'water_gun', 'water_scooter', 'water_ski',
        'water_tower', 'watering_can', 'watermelon', 'weathervane', 'webcam',
        'wedding_cake', 'wedding_ring', 'wet_suit', 'wheel', 'wheelchair',
        'whipped_cream', 'whiskey', 'whistle', 'wick', 'wig', 'wind_chime',
        'windmill', 'window_box_(for_plants)', 'windshield_wiper', 'windsock',
        'wine_bottle', 'wine_bucket', 'wineglass', 'wing_chair',
        'blinder_(for_horses)', 'wok', 'wolf', 'wooden_spoon', 'wreath',
        'wrench', 'wristband', 'wristlet', 'yacht', 'yak', 'yogurt',
        'yoke_(animal_equipment)', 'zebra', 'zucchini')

    def load_annotations(self, ann_file):
        """Load annotation from lvis style annotation file.

        Args:
            ann_file (str): Path of annotation file.

        Returns:
            list[dict]: Annotation info from LVIS api.
        """

        try:
            from lvis import LVIS
        except ImportError:
            raise ImportError('Please follow config/lvis/README.md to '
                              'install open-mmlab forked lvis first.')
        self.coco = LVIS(ann_file)
        assert not self.custom_classes, 'LVIS custom classes is not supported'
        self.cat_ids = self.coco.get_cat_ids()
        self.cat2label = {cat_id: i for i, cat_id in enumerate(self.cat_ids)}
        self.img_ids = self.coco.get_img_ids()
        data_infos = []
        for i in self.img_ids:
            info = self.coco.load_imgs([i])[0]
            if info['file_name'].startswith('COCO'):
                # Convert form the COCO 2014 file naming convention of
                # COCO_[train/val/test]2014_000000000000.jpg to the 2017
                # naming convention of 000000000000.jpg
                # (LVIS v1 will fix this naming issue)
                info['filename'] = info['file_name'][-16:]
            else:
                info['filename'] = info['file_name']
            data_infos.append(info)
        return data_infos

    def fast_eval_recall(self, results, proposal_nums, iou_thrs, logger=None):
        gt_bboxes = []
        for i in range(len(self.img_ids)):
            ann_ids = self.coco.get_ann_ids(img_ids=[self.img_ids[i]])
            ann_info = self.coco.load_anns(ann_ids)
            if len(ann_info) == 0:
                gt_bboxes.append(np.zeros((0, 4)))
                continue
            bboxes = []
            for ann in ann_info:
                if ann.get('ignore', False):
                    continue
                x1, y1, w, h = ann['bbox']
                bboxes.append([x1, y1, x1 + w, y1 + h])
            bboxes = np.array(bboxes, dtype=np.float32)
            if bboxes.shape[0] == 0:
                bboxes = np.zeros((0, 4))
            gt_bboxes.append(bboxes)

        recalls = eval_recalls(gt_bboxes,
                               results,
                               proposal_nums,
                               iou_thrs,
                               logger=logger)
        ar = recalls.mean(axis=1)
        return ar

    def evaluate(self,
                 results,
                 metric='bbox',
                 logger=None,
                 jsonfile_prefix=None,
                 classwise=False,
                 proposal_nums=(100, 300, 1000),
                 iou_thrs=np.arange(0.5, 0.96, 0.05)):
        """Evaluation in LVIS protocol.

        Args:
            results (list[list | tuple]): Testing results of the dataset.
            metric (str | list[str]): Metrics to be evaluated. Options are
                'bbox', 'segm', 'proposal', 'proposal_fast'.
            logger (logging.Logger | str | None): Logger used for printing
                related information during evaluation. Default: None.
            jsonfile_prefix (str | None):
            classwise (bool): Whether to evaluating the AP for each class.
            proposal_nums (Sequence[int]): Proposal number used for evaluating
                recalls, such as recall@100, recall@1000.
                Default: (100, 300, 1000).
            iou_thrs (Sequence[float]): IoU threshold used for evaluating
                recalls. If set to a list, the average recall of all IoUs will
                also be computed. Default: 0.5.

        Returns:
            dict[str, float]: LVIS style metrics.
        """

        try:
            from lvis import LVISResults, LVISEval
        except ImportError:
            raise ImportError('Please follow config/lvis/README.md to '
                              'install open-mmlab forked lvis first.')
        assert isinstance(results, list), 'results must be a list'
        assert len(results) == len(self), (
            'The length of results is not equal to the dataset len: {} != {}'.
            format(len(results), len(self)))

        metrics = metric if isinstance(metric, list) else [metric]
        allowed_metrics = ['bbox', 'segm', 'proposal', 'proposal_fast']
        for metric in metrics:
            if metric not in allowed_metrics:
                raise KeyError('metric {} is not supported'.format(metric))

        if jsonfile_prefix is None:
            tmp_dir = tempfile.TemporaryDirectory()
            jsonfile_prefix = osp.join(tmp_dir.name, 'results')
        else:
            tmp_dir = None
        result_files = self.results2json(results, jsonfile_prefix)

        eval_results = {}
        # get original api
        lvis_gt = self.coco
        for metric in metrics:
            msg = 'Evaluating {}...'.format(metric)
            if logger is None:
                msg = '\n' + msg
            print_log(msg, logger=logger)

            if metric == 'proposal_fast':
                ar = self.fast_eval_recall(results,
                                           proposal_nums,
                                           iou_thrs,
                                           logger='silent')
                log_msg = []
                for i, num in enumerate(proposal_nums):
                    eval_results['AR@{}'.format(num)] = ar[i]
                    log_msg.append('\nAR@{}\t{:.4f}'.format(num, ar[i]))
                log_msg = ''.join(log_msg)
                print_log(log_msg, logger=logger)
                continue

            if metric not in result_files:
                raise KeyError('{} is not in results'.format(metric))
            try:
                lvis_dt = LVISResults(lvis_gt, result_files[metric])
            except IndexError:
                print_log('The testing results of the whole dataset is empty.',
                          logger=logger,
                          level=logging.ERROR)
                break

            iou_type = 'bbox' if metric == 'proposal' else metric
            lvis_eval = LVISEval(lvis_gt, lvis_dt, iou_type)
            lvis_eval.params.imgIds = self.img_ids
            if metric == 'proposal':
                lvis_eval.params.useCats = 0
                lvis_eval.params.maxDets = list(proposal_nums)
                lvis_eval.evaluate()
                lvis_eval.accumulate()
                lvis_eval.summarize()
                for k, v in lvis_eval.get_results().items():
                    if k.startswith('AR'):
                        val = float('{:.3f}'.format(float(v)))
                        eval_results[k] = val
            else:
                lvis_eval.evaluate()
                lvis_eval.accumulate()
                lvis_eval.summarize()
                lvis_results = lvis_eval.get_results()
                if classwise:  # Compute per-category AP
                    # Compute per-category AP
                    # from https://github.com/facebookresearch/detectron2/
                    precisions = lvis_eval.eval['precision']
                    # precision: (iou, recall, cls, area range, max dets)
                    assert len(self.cat_ids) == precisions.shape[2]

                    results_per_category = []
                    for idx, catId in enumerate(self.cat_ids):
                        # area range index 0: all area ranges
                        # max dets index -1: typically 100 per image
                        nm = self.coco.load_cats(catId)[0]
                        precision = precisions[:, :, idx, 0, -1]
                        precision = precision[precision > -1]
                        if precision.size:
                            ap = np.mean(precision)
                        else:
                            ap = float('nan')
                        results_per_category.append(
                            (f'{nm["name"]}', f'{float(ap):0.3f}'))

                    num_columns = min(6, len(results_per_category) * 2)
                    results_flatten = list(
                        itertools.chain(*results_per_category))
                    headers = ['category', 'AP'] * (num_columns // 2)
                    results_2d = itertools.zip_longest(*[
                        results_flatten[i::num_columns]
                        for i in range(num_columns)
                    ])
                    table_data = [headers]
                    table_data += [result for result in results_2d]
                    table = AsciiTable(table_data)
                    print_log('\n' + table.table, logger=logger)

                for k, v in lvis_results.items():
                    if k.startswith('AP'):
                        key = '{}_{}'.format(metric, k)
                        val = float('{:.3f}'.format(float(v)))
                        eval_results[key] = val
                ap_summary = ' '.join([
                    '{}:{:.3f}'.format(k, float(v))
                    for k, v in lvis_results.items() if k.startswith('AP')
                ])
                eval_results['{}_mAP_copypaste'.format(metric)] = ap_summary
            lvis_eval.print_results()
        if tmp_dir is not None:
            tmp_dir.cleanup()
        return eval_results
class LVISEvaluator(DatasetEvaluator):
    """
    Evaluate object proposal and instance detection/segmentation outputs using
    LVIS's metrics and evaluation API.
    """
    def __init__(self,
                 dataset_name,
                 meta,
                 cfg,
                 distributed,
                 output_dir=None,
                 dump=False):
        """
        Args:
            dataset_name (str): name of the dataset to be evaluated.
                It must have the following corresponding metadata:

                    "json_file": the path to the LVIS format annotation

            meta (SimpleNamespace): dataset metadata.
            cfg (CfgNode): cvpods Config instance.
            distributed (True): if True, will collect results from all ranks for evaluation.
                Otherwise, will evaluate the results in the current process.
            output_dir (str): optional, an output directory to dump results.
            dump (bool): If True, after the evaluation is completed, a Markdown file
                that records the model evaluation metrics and corresponding scores
                will be generated in the working directory.
        """
        from lvis import LVIS
        self._dump = dump
        self._tasks = self._tasks_from_config(cfg)
        self._distributed = distributed
        self._output_dir = output_dir

        self._cpu_device = torch.device("cpu")
        self._logger = logging.getLogger(__name__)

        self._metadata = meta
        json_file = PathManager.get_local_path(self._metadata.json_file)
        self._lvis_api = LVIS(json_file)
        # Test set json files do not contain annotations (evaluation must be
        # performed using the LVIS evaluation server).
        self._do_evaluation = len(self._lvis_api.get_ann_ids()) > 0

    def reset(self):
        self._predictions = []
        self._lvis_results = []
        self._dump_infos = []  # per task

    def _tasks_from_config(self, cfg):
        """
        Returns:
            tuple[str]: tasks that can be evaluated under the given configuration.
        """
        tasks = ("bbox", )
        if cfg.MODEL.MASK_ON:
            tasks = tasks + ("segm", )
        return tasks

    def process(self, inputs, outputs):
        """
        Args:
            inputs: the inputs to a LVIS model (e.g., GeneralizedRCNN).
                It is a list of dict. Each dict corresponds to an image and
                contains keys like "height", "width", "file_name", "image_id".
            outputs: the outputs of a LVIS model. It is a list of dicts with key
                "instances" that contains :class:`Instances`.
        """
        for input, output in zip(inputs, outputs):
            prediction = {"image_id": input["image_id"]}

            if "instances" in output:
                instances = output["instances"].to(self._cpu_device)
                prediction["instances"] = instances_to_coco_json(
                    instances, input["image_id"])
            if "proposals" in output:
                prediction["proposals"] = output["proposals"].to(
                    self._cpu_device)
            self._predictions.append(prediction)

    def evaluate(self):
        if self._distributed:
            comm.synchronize()
            self._predictions = comm.gather(self._predictions, dst=0)
            self._predictions = list(itertools.chain(*self._predictions))

            if not comm.is_main_process():
                return

        if len(self._predictions) == 0:
            self._logger.warning(
                "[LVISEvaluator] Did not receive valid predictions.")
            return {}

        if self._output_dir:
            PathManager.mkdirs(self._output_dir)
            file_path = os.path.join(self._output_dir,
                                     "instances_predictions.pth")
            with PathManager.open(file_path, "wb") as f:
                torch.save(self._predictions, f)

        self._results = OrderedDict()
        if "proposals" in self._predictions[0]:
            self._eval_box_proposals()
        if "instances" in self._predictions[0]:
            self._eval_predictions(set(self._tasks))

        if self._dump:
            _dump_to_markdown(self._dump_infos)

        # Copy so the caller can do whatever with results
        return copy.deepcopy(self._results)

    def _eval_predictions(self, tasks):
        """
        Evaluate self._predictions on the given tasks.
        Fill self._results with the metrics of the tasks.
        """
        self._logger.info("Preparing results in the LVIS format ...")
        self._lvis_results = list(
            itertools.chain(*[x["instances"] for x in self._predictions]))

        # LVIS evaluator can be used to evaluate results for COCO dataset categories.
        # In this case `_metadata` variable will have a field with COCO-specific category mapping.
        if hasattr(self._metadata, "thing_dataset_id_to_contiguous_id"):
            reverse_id_mapping = {
                v: k
                for k, v in
                self._metadata.thing_dataset_id_to_contiguous_id.items()
            }
            for result in self._lvis_results:
                result["category_id"] = reverse_id_mapping[
                    result["category_id"]]
        else:
            # unmap the category ids for LVIS (from 0-indexed to 1-indexed)
            for result in self._lvis_results:
                result["category_id"] += 1

        if self._output_dir:
            file_path = os.path.join(self._output_dir,
                                     "lvis_instances_results.json")
            self._logger.info("Saving results to {}".format(file_path))
            with PathManager.open(file_path, "w") as f:
                f.write(json.dumps(self._lvis_results))
                f.flush()

        if not self._do_evaluation:
            self._logger.info("Annotations are not available for evaluation.")
            return

        self._logger.info("Evaluating predictions ...")
        for task in sorted(tasks):
            lvis_eval = (_evaluate_predictions_on_lvis(
                self._lvis_api, self._lvis_results, task)
                         if len(self._lvis_results) > 0 else None)
            res = self._derive_lvis_results(lvis_eval, task)
            self._results[task] = res

    def _eval_box_proposals(self):
        """
        Evaluate the box proposals in self._predictions.
        Fill self._results with the metrics for "box_proposals" task.
        """
        if self._output_dir:
            # Saving generated box proposals to file.
            # Predicted box_proposals are in XYXY_ABS mode.
            bbox_mode = BoxMode.XYXY_ABS.value
            ids, boxes, objectness_logits = [], [], []
            for prediction in self._predictions:
                ids.append(prediction["image_id"])
                boxes.append(
                    prediction["proposals"].proposal_boxes.tensor.numpy())
                objectness_logits.append(
                    prediction["proposals"].objectness_logits.numpy())

            proposal_data = {
                "boxes": boxes,
                "objectness_logits": objectness_logits,
                "ids": ids,
                "bbox_mode": bbox_mode,
            }
            with PathManager.open(
                    os.path.join(self._output_dir, "box_proposals.pkl"),
                    "wb") as f:
                pickle.dump(proposal_data, f)

        if not self._do_evaluation:
            self._logger.info("Annotations are not available for evaluation.")
            return

        self._logger.info("Evaluating bbox proposals ...")
        res = {}
        areas = {"all": "", "small": "s", "medium": "m", "large": "l"}
        for limit in [100, 1000]:
            for area, suffix in areas.items():
                stats = _evaluate_box_proposals(self._predictions,
                                                self._lvis_api,
                                                area=area,
                                                limit=limit)
                key = "AR{}@{:d}".format(suffix, limit)
                res[key] = float(stats["ar"].item() * 100)
        self._logger.info("Proposal metrics: \n" + create_small_table(res))
        self._results["box_proposals"] = res

    def _derive_lvis_results(self, lvis_eval, iou_type):
        """
        Derive the desired score numbers from summarized LVISEval.

        Args:
            lvis_eval (None or LVISEval): None represents no predictions from model.
            iou_type (str): specific evaluation task, optional values are: "bbox", "segm".

        Returns:
            a dict of {metric name: score}
        """
        metrics = {
            "bbox":
            ["AP", "AP50", "AP75", "APs", "APm", "APl", "APr", "APc", "APf"],
            "segm":
            ["AP", "AP50", "AP75", "APs", "APm", "APl", "APr", "APc", "APf"],
        }[iou_type]

        if lvis_eval is None:
            self._logger.warn("No predictions from the model!")
            return {metric: float("nan") for metric in metrics}

        # Pull the standard metrics from the LVIS results
        results = lvis_eval.get_results()
        results = {metric: float(results[metric] * 100) for metric in metrics}
        small_table = create_small_table(results)
        self._logger.info("Evaluation results for {}: \n".format(iou_type) +
                          small_table)

        if self._dump:
            dump_info_one_task = {
                "task": iou_type,
                "lvis_eval": lvis_eval,
                "tables": [small_table],
            }
            self._dump_infos.append(dump_info_one_task)
        return results
Exemple #4
0
class LVISEvalCustom(LVISEval):
    def __init__(self, lvis_gt, lvis_dt, iou_type="segm"):
        """Constructor for LVISEval.
        Args:
            lvis_gt (LVIS class instance, or str containing path of annotation file)
            lvis_dt (LVISResult class instance, or str containing path of result file,
            or list of dict)
            iou_type (str): segm or bbox evaluation
        """
        self.logger = logging.getLogger(__name__)

        if iou_type not in ["bbox", "segm"]:
            raise ValueError("iou_type: {} is not supported.".format(iou_type))

        if isinstance(lvis_gt, LVIS):
            self.lvis_gt = lvis_gt
        elif isinstance(lvis_gt, str):
            self.lvis_gt = LVIS(lvis_gt)
        else:
            raise TypeError("Unsupported type {} of lvis_gt.".format(lvis_gt))

        if isinstance(lvis_dt, LVISResults):
            self.lvis_dt = lvis_dt
        elif isinstance(lvis_dt, (str, list)):
            # set max_dets=-1 to avoid ignoring
            self.lvis_dt = LVISResults(self.lvis_gt, lvis_dt, max_dets=-1)
        else:
            raise TypeError("Unsupported type {} of lvis_dt.".format(lvis_dt))

        # per-image per-category evaluation results
        self.eval_imgs = defaultdict(list)
        self.eval = {}  # accumulated evaluation results
        self._gts = defaultdict(list)  # gt for evaluation
        self._dts = defaultdict(list)  # dt for evaluation
        self.params = ParamsCustom(iou_type=iou_type)  # parameters
        self.results = OrderedDict()
        self.ious = {}  # ious between all gts and dts

        self.params.img_ids = sorted(self.lvis_gt.get_img_ids())
        self.params.cat_ids = sorted(self.lvis_gt.get_cat_ids())

    def _prepare(self):
        """Prepare self._gts and self._dts for evaluation based on params."""
        cat_ids = self.params.cat_ids if self.params.cat_ids else None

        gts = self.lvis_gt.load_anns(
            self.lvis_gt.get_ann_ids(img_ids=self.params.img_ids, cat_ids=cat_ids)
        )
        dts = self.lvis_dt.load_anns(
            self.lvis_dt.get_ann_ids(img_ids=self.params.img_ids,
                                     cat_ids=None if self.params.use_proposal else cat_ids)
        )
        # convert ground truth to mask if iou_type == 'segm'
        if self.params.iou_type == "segm":
            self._to_mask(gts, self.lvis_gt)
            self._to_mask(dts, self.lvis_dt)

        # set ignore flag
        for gt in gts:
            if "ignore" not in gt:
                gt["ignore"] = 0

        for gt in gts:
            self._gts[gt["image_id"], gt["category_id"]].append(gt)

        # For federated dataset evaluation we will filter out all dt for an
        # image which belong to categories not present in gt and not present in
        # the negative list for an image. In other words detector is not penalized
        # for categories about which we don't have gt information about their
        # presence or absence in an image.
        img_data = self.lvis_gt.load_imgs(ids=self.params.img_ids)
        # per image map of categories not present in image
        img_nl = {d["id"]: d["not_exhaustive_category_ids"] for d in img_data}
        # per image list of categories present in image
        img_pl = defaultdict(set)
        for ann in gts:
            img_pl[ann["image_id"]].add(ann["category_id"])
        # per image map of categoires which have missing gt. For these
        # categories we don't penalize the detector for flase positives.
        self.img_nel = {d["id"]: d["not_exhaustive_category_ids"] for d in img_data}

        for dt in dts:
            img_id, cat_id = dt["image_id"], dt["category_id"]
            if self.params.use_proposal:  # for proposal eval
                for cat_id in img_pl[img_id]:
                    dt['category_id'] = cat_id
                    self._dts[img_id, cat_id].append(dt)
                continue
            elif cat_id not in img_nl[img_id] and cat_id not in img_pl[img_id]:
                continue
            self._dts[img_id, cat_id].append(dt)

        self.freq_groups = self._prepare_freq_group()

    def _summarize(
        self, summary_type, iou_thr=None, area_rng="all", freq_group_idx=None
    ):
        aidx = [
            idx
            for idx, _area_rng in enumerate(self.params.area_rng_lbl)
            if _area_rng == area_rng
        ]

        if summary_type == 'ap':
            s = self.eval["precision"]
            if iou_thr is not None:
                tidx = np.where(iou_thr == self.params.iou_thrs)[0]
                s = s[tidx]
            if freq_group_idx is not None:
                s = s[:, :, self.freq_groups[freq_group_idx], aidx]
            else:
                s = s[:, :, :, aidx]
        else:
            s = self.eval["recall"]
            if iou_thr is not None:
                tidx = np.where(iou_thr == self.params.iou_thrs)[0]
                s = s[tidx]
            if freq_group_idx is not None:  # add freq_group for recall
                s = s[:, self.freq_groups[freq_group_idx], aidx]
            else:
                s = s[:, :, aidx]

        if len(s[s > -1]) == 0:
            mean_s = -1
        else:
            mean_s = np.mean(s[s > -1])
        return mean_s

    def summarize(self):
        """Compute and display summary metrics for evaluation results."""
        if not self.eval:
            raise RuntimeError("Please run accumulate() first.")

        max_dets = self.params.max_dets

        self.results["AP"]   = self._summarize('ap')
        self.results["AP50"] = self._summarize('ap', iou_thr=0.50)
        self.results["AP75"] = self._summarize('ap', iou_thr=0.75)
        self.results["APs"]  = self._summarize('ap', area_rng="small")
        self.results["APm"]  = self._summarize('ap', area_rng="medium")
        self.results["APl"]  = self._summarize('ap', area_rng="large")
        self.results["APr"]  = self._summarize('ap', freq_group_idx=0)
        self.results["APc"]  = self._summarize('ap', freq_group_idx=1)
        self.results["APf"]  = self._summarize('ap', freq_group_idx=2)

        key = "AR@{}".format(max_dets)
        self.results[key] = self._summarize('ar')

        for area_rng in ["small", "medium", "large"]:
            key = "AR{}@{}".format(area_rng[0], max_dets)
            self.results[key] = self._summarize('ar', area_rng=area_rng)
        # add freq_group for recall
        for idx, freq_group in enumerate(self.params.img_count_lbl):
            key = "AR{}@{}".format(freq_group[0], max_dets)
            self.results[key] = self._summarize('ar', freq_group_idx=idx)
Exemple #5
0
    def __init__(self,annfile,dset_name,device='cuda',reduce='sum',reduce_mini_batch=True):
        super(IDFTransformer, self).__init__()
        cwd = os.getenv('owd')
        self.reduce=reduce
        self.reduce_mini_batch = reduce_mini_batch
        self.loss = nn.BCELoss(reduction=reduce)
        annfile = os.path.join(cwd,annfile)
        idf_path = dset_name+"_files"
        idf_path = os.path.join(cwd,idf_path)
        self.device=device 
        if not os.path.exists(idf_path):
            os.mkdir(idf_path)
        if not os.path.exists(os.path.join(idf_path,'idf.csv')):
            df = pd.DataFrame()
            if dset_name=='coco':
                coco=COCO(annfile)
                ims=[]
                last_cat = coco.getCatIds()[-1]
                num_classes = last_cat +1 # for bg
                self.num_classes=num_classes
                for imgid in coco.getImgIds():
                    anids = coco.getAnnIds(imgIds=imgid)
                    categories=[]
                    for anid in anids:
                        categories.append(coco.loadAnns(int(anid))[0]['category_id'])
                    ims.append(categories)
            else:
                lvis=LVIS(annfile)
                ims=[]
                last_cat = lvis.get_cat_ids()[-1]
                num_classes = last_cat +1 # for bg
                self.num_classes=num_classes
                for imgid in lvis.get_img_ids():
                    anids = lvis.get_ann_ids(img_ids=[imgid])
                    categories=[]
                    for annot in lvis.load_anns(anids):
                        categories.append(annot['category_id'])
                    ims.append(categories)
            
            final=0
            k=0
            print('calculating idf ...')
            for im in tqdm(ims):
                cats = np.array(im,dtype=np.int)
                cats = np.bincount(cats,minlength=self.num_classes)
                cats=np.array([cats])
                cats = coo_matrix(cats)
                if k==0:
                    final= cats
                else:
                    final = vstack([cats, final])
                k=k+1

            mask = final.sum(axis=0)>0
            mask=mask.tolist()[0]
            final = final.tocsr()[:,mask]
            self.num_classes = final.shape[1]
            doc_freq = (final>0).sum(axis=0)
            smooth = (np.log((final.shape[0]+1)/(doc_freq+1))+1).tolist()[0]
            raw = (np.log((final.shape[0])/(doc_freq))).tolist()[0]
            prob = (np.log((final.shape[0]-doc_freq)/(doc_freq))).tolist()[0]
            df['smooth'] = smooth
            df['raw'] = raw
            df['prob'] = prob
            self.idf_weights={} 
            self.idf_weights['smooth'] = torch.tensor(
                smooth, dtype=torch.float, device=self.device)
            self.idf_weights['raw'] = torch.tensor(
                raw, dtype=torch.float, device=self.device)
            self.idf_weights['prob'] = torch.tensor(
                prob, dtype=torch.float, device=self.device)
            df.to_csv(os.path.join(idf_path,'idf.csv'))

        else:
            df=pd.read_csv(os.path.join(idf_path,'idf.csv'))
            self.idf_weights = {}
            self.idf_weights['smooth'] = torch.tensor(
                df['smooth'], dtype=torch.float, device=self.device)
            self.idf_weights['raw'] = torch.tensor(
                df['raw'], dtype=torch.float, device=self.device)
            self.idf_weights['prob'] = torch.tensor(
                df['prob'], dtype=torch.float, device=self.device)

            self.num_classes = self.idf_weights['smooth'].shape[0]
Exemple #6
0
class MiniLvisDataSet(LvisDataSet):
    """Only contains the same 80 classes as COCO dataset"""
    # TODO: fix the lvis-coco classes
    LVIS_TO_COCO = {
        'computer_mouse': 'mouse',
        'cellphone': 'cell_phone',
        'hair_dryer': 'hair_drier',
        'sausage': 'hot_dog',  # not sure!!!
        'laptop_computer': 'laptop',
        'microwave_oven': 'microwave',
        'toaster_oven': 'oven',
        'flower_arrangement': 'potted_plant',  # not sure!!
        'remote_control': 'remote',
        'ski': 'skis',
        'baseball': 'sports_ball',  # too many types of balls
        'wineglass': 'wine_glass',
    }

    def load_annotations(self, ann_file):
        self.lvis = LVIS(ann_file)
        COCO_CLASSES = sorted(list(CocoDataset.CLASSES))
        self.synonyms_classes = [
            value['synonyms'] for value in self.lvis.cats.values()
        ]
        self.cat_ids = []
        self.CLASSES = []
        self.not_in_classes = COCO_CLASSES
        for id in self.lvis.get_cat_ids():
            for name in self.lvis.cats[id]['synonyms']:
                if name in self.LVIS_TO_COCO:
                    self.cat_ids.append(id)
                    self.CLASSES.append(name)
                    self.not_in_classes.remove(self.LVIS_TO_COCO[name])
                    break
                elif '(' not in name and name in COCO_CLASSES:
                    self.cat_ids.append(id)
                    self.CLASSES.append(name)
                    self.not_in_classes.remove(name)
                    break
                elif '_' in name:
                    new_name = name.split('_(')[0]
                    if new_name in COCO_CLASSES:
                        self.cat_ids.append(id)
                        self.CLASSES.append(name)
                        self.not_in_classes.remove(new_name)
                        break
        data_dir = osp.dirname(ann_file)
        with open(osp.join(data_dir, 'synonyms_classes.json'), 'w') as f:
            f.write(json.dumps(self.synonyms_classes, indent=2))
        with open(osp.join(data_dir, 'not_in_classes.json'), 'w') as f:
            f.write(json.dumps(self.not_in_classes, indent=2))
        with open(osp.join(data_dir, 'coco_classes.json'), 'w') as f:
            f.write(json.dumps(COCO_CLASSES, indent=2))
        with open(osp.join(data_dir, 'lvis_coco_classes.json'), 'w') as f:
            f.write(json.dumps(self.CLASSES, indent=2))
        self.CLASSES = tuple(self.CLASSES)
        self.cat2label = {
            cat_id: i + 1
            for i, cat_id in enumerate(self.cat_ids)
        }
        self.CLASSES = CocoDataset.CLASSES
        self.img_ids = self.lvis.get_img_ids()
        img_infos = []
        for i in self.img_ids:
            info = self.lvis.load_imgs([i])[0]
            info['filename'] = info['file_name']
            img_infos.append(info)
        return img_infos

    def get_ann_info(self, idx):
        img_id = self.img_infos[idx]['id']
        ann_ids = self.lvis.get_ann_ids(img_ids=[img_id])
        ann_info = self.lvis.load_anns(ann_ids)
        return self._parse_ann_info(ann_info, self.with_mask)

    def _filter_imgs(self, min_size=32):
        """Filter images too small or without ground truths."""
        valid_inds = []
        ids_with_ann = set(_['image_id'] for _ in self.lvis.anns.values())
        for i, img_info in enumerate(self.img_infos):
            if self.img_ids[i] not in ids_with_ann:
                continue
            if min(img_info['width'], img_info['height']) >= min_size:
                valid_inds.append(i)
        return valid_inds
Exemple #7
0
class LvisDataset1(CustomDataset):
    def load_annotations(self, ann_file):
        self.lvis = LVIS(ann_file)
        self.full_cat_ids = self.lvis.get_cat_ids()
        self.full_cat2label = {
            cat_id: i + 1
            for i, cat_id in enumerate(self.full_cat_ids)
        }

        self.CLASSES = tuple(
            [item['name'] for item in self.lvis.dataset['categories']])
        self.cat_ids = self.lvis.get_cat_ids()
        self.cat2label = {
            cat_id: i + 1
            for i, cat_id in enumerate(self.cat_ids)
        }

        self.img_ids = self.lvis.get_img_ids()
        img_infos = []
        for i in self.img_ids:
            info = self.lvis.load_imgs([i])[0]
            # info['filename'] = info['file_name'].split('_')[-1]
            info['filename'] = info['file_name']
            img_infos.append(info)
        return img_infos

    def get_ann_info(self, idx):
        img_id = self.data_infos[idx]['id']
        ann_ids = self.lvis.get_ann_ids(img_ids=[img_id])
        ann_info = self.lvis.load_anns(ann_ids)
        return self._parse_ann_info(self.data_infos[idx], ann_info)

    def get_ann_info_withoutparse(self, idx):
        img_id = self.data_infos[idx]['id']
        ann_ids = self.lvis.get_ann_ids(img_ids=[img_id])
        ann_info = self.lvis.load_anns(ann_ids)
        return ann_info

    def _filter_imgs(self, min_size=32):
        """Filter images too small or without ground truths."""
        valid_inds = []
        ids_with_ann = set(_['image_id'] for _ in self.lvis.anns.values())
        for i, img_info in enumerate(self.data_infos):
            if self.img_ids[i] not in ids_with_ann:
                continue
            if min(img_info['width'], img_info['height']) >= min_size:
                valid_inds.append(i)
        return valid_inds

    def _parse_ann_info(self, img_info, ann_info):
        """Parse bbox and mask annotation.

        Args:
            ann_info (list[dict]): Annotation info of an image.
            with_mask (bool): Whether to parse mask annotations.

        Returns:
            dict: A dict containing the following keys: bboxes, bboxes_ignore,
                labels, masks, mask_polys, poly_lens.
        """
        gt_bboxes = []
        gt_labels = []
        gt_bboxes_ignore = []
        # Two formats are provided.
        # 1. mask: a binary map of the same size of the image.
        # 2. polys: each mask consists of one or several polys, each poly is a
        # list of float.
        """gt_masks = []

        for i, ann in enumerate(ann_info):
            if ann.get('ignore', False):
                continue
            x1, y1, w, h = ann['bbox']
            if ann['area'] <= 0 or w < 1 or h < 1:
                continue
            bbox = [x1, y1, x1 + w - 1, y1 + h - 1]
            if 'iscrowd' in ann.keys():
                if ann['iscrowd']:
                    gt_bboxes_ignore.append(bbox)
            else:
                gt_bboxes.append(bbox)
                gt_labels.append(self.cat2label[ann['category_id']])

            gt_masks.append(self.lvis.ann_to_mask(ann))"""

        if gt_bboxes:
            gt_bboxes = np.array(gt_bboxes, dtype=np.float32)
            gt_labels = np.array(gt_labels, dtype=np.int64)
        else:
            gt_bboxes = np.zeros((0, 4), dtype=np.float32)
            gt_labels = np.array([], dtype=np.int64)

        if gt_bboxes_ignore:
            gt_bboxes_ignore = np.array(gt_bboxes_ignore, dtype=np.float32)
        else:
            gt_bboxes_ignore = np.zeros((0, 4), dtype=np.float32)

        # seg_map = img_info['filename'].replace('jpg', 'png')

        ann = dict(bboxes=gt_bboxes,
                   labels=gt_labels,
                   bboxes_ignore=gt_bboxes_ignore)
        # masks=gt_masks,
        # seg_map=seg_map)

        return ann
Exemple #8
0
class LVISEvaluator(DatasetEvaluator):
    """
    Evaluate instance detection outputs using LVIS's metrics and evaluation API.
    """
    def __init__(self, dataset_name, cfg, distributed, output_dir=None):
        """
        Args:
            dataset_name (str): name of the dataset to be evaluated.
                It must have the following corresponding metadata:
                    "json_file": the path to the LVIS format annotation
            cfg (CfgNode): config instance
            distributed (True): if True, will collect results from all ranks for evaluation.
                Otherwise, will evaluate the results in the current process.
            output_dir (str): optional, an output directory to dump results.
        """
        from lvis import LVIS

        self._distributed = distributed
        self._output_dir = output_dir

        self._cpu_device = torch.device("cpu")
        self._logger = logging.getLogger(__name__)

        self._metadata = MetadataCatalog.get(dataset_name)
        json_file = PathManager.get_local_path(self._metadata.json_file)
        self._lvis_api = LVIS(json_file)
        # Test set json files do not contain annotations (evaluation must be
        # performed using the LVIS evaluation server).
        self._do_evaluation = len(self._lvis_api.get_ann_ids()) > 0

    def reset(self):
        self._predictions = []
        self._lvis_results = []

    def process(self, inputs, outputs):
        """
        Args:
            inputs: the inputs to a LVIS model (e.g., GeneralizedRCNN).
                It is a list of dict. Each dict corresponds to an image and
                contains keys like "height", "width", "file_name", "image_id".
            outputs: the outputs of a LVIS model. It is a list of dicts with key
                "instances" that contains :class:`Instances`.
        """
        for input, output in zip(inputs, outputs):
            prediction = {"image_id": input["image_id"]}

            if "instances" in output:
                instances = output["instances"].to(self._cpu_device)
                prediction["instances"] = instances_to_coco_json(
                    instances, input["image_id"])
            self._predictions.append(prediction)

    def evaluate(self):
        if self._distributed:
            comm.synchronize()
            self._predictions = comm.gather(self._predictions, dst=0)
            self._predictions = list(itertools.chain(*self._predictions))

            if not comm.is_main_process():
                return

        if len(self._predictions) == 0:
            self._logger.warning(
                "[LVISEvaluator] Did not receive valid predictions.")
            return {}

        if self._output_dir:
            PathManager.mkdirs(self._output_dir)
            file_path = os.path.join(self._output_dir,
                                     "instances_predictions.pth")
            with PathManager.open(file_path, "wb") as f:
                torch.save(self._predictions, f)

        self._results = OrderedDict()
        if "instances" in self._predictions[0]:
            self._eval_predictions()
        # Copy so the caller can do whatever with results
        return copy.deepcopy(self._results)

    def _eval_predictions(self):
        """
        Evaluate self._predictions on the instance detection task.
        Fill self._results with the metrics of the instance detection task.
        """
        self._logger.info("Preparing results in the LVIS format ...")
        self._lvis_results = list(
            itertools.chain(*[x["instances"] for x in self._predictions]))

        # unmap the category ids for LVIS
        if hasattr(self._metadata, "class_mapping"):
            # using reverse mapping
            reverse_id_mapping = {
                v: k
                for k, v in self._metadata.class_mapping.items()
            }
            for result in self._lvis_results:
                result["category_id"] = reverse_id_mapping[
                    result["category_id"]] + 1
        else:
            # from 0-indexed to 1-indexed
            for result in self._lvis_results:
                result["category_id"] += 1

        if self._output_dir:
            file_path = os.path.join(self._output_dir,
                                     "lvis_instances_results.json")
            self._logger.info("Saving results to {}".format(file_path))
            with PathManager.open(file_path, "w") as f:
                f.write(json.dumps(self._lvis_results))
                f.flush()

        if not self._do_evaluation:
            self._logger.info("Annotations are not available for evaluation.")
            return

        self._logger.info("Evaluating predictions ...")
        res = _evaluate_predictions_on_lvis(
            self._lvis_api,
            self._lvis_results,
            "bbox",
            class_names=self._metadata.get("thing_classes"),
        )
        self._results["bbox"] = res
Exemple #9
0
class LvisDataSet(CustomDataset):
    def __init__(self, samples_per_cls_file=None, **kwargs):
        self.samples_per_cls_file = samples_per_cls_file
        super(LvisDataSet, self).__init__(**kwargs)

    def load_annotations(self, ann_file):
        self.lvis = LVIS(ann_file)
        self.cat_ids = self.lvis.get_cat_ids()
        self.cat2label = {
            cat_id: i + 1
            for i, cat_id in enumerate(self.cat_ids)
        }
        self.CLASSES = [_ for _ in self.cat_ids]
        self.cat_instance_count = [_ for _ in self.cat_ids]
        self.cat_image_count = [_ for _ in self.cat_ids]
        img_count_lbl = ["r", "c", "f"]
        self.freq_groups = [[] for _ in img_count_lbl]
        self.cat_group_idxs = [_ for _ in self.cat_ids]
        freq_group_count = {'f': 0, 'cf': 0, 'rcf': 0}
        self.cat_fake_idxs = {
            'f': [-1 for _ in self.cat_ids],
            'cf': [-1 for _ in self.cat_ids],
            'rcf': [-1 for _ in self.cat_ids]
        }
        self.freq_group_dict = {'rcf': (0, 1, 2), 'cf': (1, 2), 'f': (2, )}
        for value in self.lvis.cats.values():
            idx = value['id'] - 1
            self.CLASSES[idx] = value['name']
            self.cat_instance_count[idx] = value['instance_count']
            self.cat_image_count[idx] = value['image_count']
            group_idx = img_count_lbl.index(value["frequency"])
            self.freq_groups[group_idx].append(idx + 1)
            self.cat_group_idxs[idx] = group_idx
            if group_idx == 0:  # rare
                freq_group_count['rcf'] += 1
                self.cat_fake_idxs['rcf'][idx] = freq_group_count['rcf']
            if group_idx == 1:  # common
                freq_group_count['rcf'] += 1
                freq_group_count['cf'] += 1
                self.cat_fake_idxs['rcf'][idx] = freq_group_count['rcf']
                self.cat_fake_idxs['cf'][idx] = freq_group_count['cf']
            elif group_idx == 2:  # freq
                freq_group_count['rcf'] += 1
                freq_group_count['cf'] += 1
                freq_group_count['f'] += 1
                self.cat_fake_idxs['rcf'][idx] = freq_group_count['rcf']
                self.cat_fake_idxs['cf'][idx] = freq_group_count['cf']
                self.cat_fake_idxs['f'][idx] = freq_group_count['f']

        if self.samples_per_cls_file is not None:
            with open(self.samples_per_cls_file, 'w') as file:
                file.writelines(str(x) + '\n' for x in self.cat_instance_count)

        self.img_ids = self.lvis.get_img_ids()
        img_infos = []
        for i in self.img_ids:
            info = self.lvis.load_imgs([i])[0]
            info['filename'] = info['file_name']
            img_infos.append(info)
        return img_infos

    def get_ann_info(self, idx, freq_groups=('rcf', )):
        img_id = self.img_infos[idx]['id']
        ann_ids = self.lvis.get_ann_ids(img_ids=[img_id])
        ann_info = self.lvis.load_anns(ann_ids)
        return self._parse_ann_info(ann_info,
                                    self.with_mask,
                                    freq_groups=freq_groups)

    def _filter_imgs(self, min_size=32):
        """Filter images too small or without ground truths."""
        valid_inds = []
        ids_with_ann = set(_['image_id'] for _ in self.lvis.anns.values())
        for i, img_info in enumerate(self.img_infos):
            if self.img_ids[i] not in ids_with_ann:
                continue
            if min(img_info['width'], img_info['height']) >= min_size:
                valid_inds.append(i)
        return valid_inds

    def _parse_ann_info(self, ann_info, with_mask=True, freq_groups=('rcf', )):
        """Parse bbox and mask annotation.

        Args:
            ann_info (list[dict]): Annotation info of an image.
            with_mask (bool): Whether to parse mask annotations.

        Returns:
            dict: A dict containing the following keys: bboxes, bboxes_ignore,
                labels, masks, mask_polys, poly_lens.
        """
        gt_bboxes = []
        gt_labels = []
        gt_bboxes_ignore = []
        assert isinstance(freq_groups, tuple)
        gt_valid_idxs = {name: [] for name in freq_groups}
        gt_count = 0
        # Two formats are provided.
        # 1. mask: a binary map of the same size of the image.
        # 2. polys: each mask consists of one or several polys, each poly is a
        # list of float.
        if with_mask:
            gt_masks = []
            gt_mask_polys = []
            gt_poly_lens = []
        for i, ann in enumerate(ann_info):
            if ann.get('ignore', False):
                continue
            x1, y1, w, h = ann['bbox']
            if ann['area'] <= 0 or w < 1 or h < 1:
                continue

            for name in freq_groups:
                if self.cat_group_idxs[ann['category_id'] -
                                       1] in self.freq_group_dict[name]:
                    gt_valid_idxs[name].append(gt_count)
            gt_count += 1

            bbox = [x1, y1, x1 + w - 1, y1 + h - 1]
            gt_bboxes.append(bbox)
            gt_labels.append(self.cat2label[ann['category_id']])
            if with_mask:
                gt_masks.append(self.lvis.ann_to_mask(ann))
                mask_polys = [
                    p for p in ann['segmentation'] if len(p) >= 6
                ]  # valid polygons have >= 3 points (6 coordinates)
                poly_lens = [len(p) for p in mask_polys]
                gt_mask_polys.append(mask_polys)
                gt_poly_lens.extend(poly_lens)
        if gt_bboxes:
            gt_bboxes = np.array(gt_bboxes, dtype=np.float32)
            gt_labels = np.array(gt_labels, dtype=np.int64)
        else:
            gt_bboxes = np.zeros((0, 4), dtype=np.float32)
            gt_labels = np.array([], dtype=np.int64)

        if gt_bboxes_ignore:
            gt_bboxes_ignore = np.array(gt_bboxes_ignore, dtype=np.float32)
        else:
            gt_bboxes_ignore = np.zeros((0, 4), dtype=np.float32)

        ann = dict(
            bboxes=gt_bboxes,
            labels=gt_labels,
            bboxes_ignore=gt_bboxes_ignore,
            gt_valid_idxs=gt_valid_idxs  # add gt_valid_idxs
        )

        if with_mask:
            ann['masks'] = gt_masks
            # poly format is not used in the current implementation
            ann['mask_polys'] = gt_mask_polys
            ann['poly_lens'] = gt_poly_lens
        return ann

    def prepare_train_img(self, idx):
        img_info = self.img_infos[idx]
        # load image
        if 'COCO' in img_info['filename']:
            img = mmcv_custom.imread(
                osp.join(
                    self.img_prefix, img_info['filename']
                    [img_info['filename'].find('COCO_val2014_') +
                     len('COCO_val2014_'):]))
        else:
            img = mmcv_custom.imread(
                osp.join(self.img_prefix, img_info['filename']))
        # corruption
        if self.corruption is not None:
            img = corrupt(img,
                          severity=self.corruption_severity,
                          corruption_name=self.corruption)
        # load proposals if necessary
        if self.proposals is not None:
            proposals = self.proposals[idx][:self.num_max_proposals]
            # TODO: Handle empty proposals properly. Currently images with
            # no proposals are just ignored, but they can be used for
            # training in concept.
            if len(proposals) == 0:
                return None
            if not (proposals.shape[1] == 4 or proposals.shape[1] == 5):
                raise AssertionError(
                    'proposals should have shapes (n, 4) or (n, 5), '
                    'but found {}'.format(proposals.shape))
            if proposals.shape[1] == 5:
                scores = proposals[:, 4, None]
                proposals = proposals[:, :4]
            else:
                scores = None

        ann = self.get_ann_info(idx, freq_groups=('rcf', 'cf', 'f'))
        gt_bboxes = ann['bboxes']
        gt_labels = ann['labels']
        gt_valid_idxs = ann['gt_valid_idxs']
        if self.with_crowd:
            gt_bboxes_ignore = ann['bboxes_ignore']

        # skip the image if there is no valid gt bbox
        if len(gt_bboxes) == 0 and self.skip_img_without_anno:
            warnings.warn('Skip the image "%s" that has no valid gt bbox' %
                          osp.join(self.img_prefix, img_info['filename']))
            return None

        # extra augmentation
        if self.extra_aug is not None:
            img, gt_bboxes, gt_labels = self.extra_aug(img, gt_bboxes,
                                                       gt_labels)

        # apply transforms
        flip = True if np.random.rand() < self.flip_ratio else False
        # randomly sample a scale
        img_scale = random_scale(self.img_scales, self.multiscale_mode)
        img, img_shape, pad_shape, scale_factor = self.img_transform(
            img, img_scale, flip, keep_ratio=self.resize_keep_ratio)
        img = img.copy()
        if self.with_seg:
            gt_seg = mmcv_custom.imread(osp.join(
                self.seg_prefix, img_info['filename'].replace('jpg', 'png')),
                                        flag='unchanged')
            gt_seg = self.seg_transform(gt_seg.squeeze(), img_scale, flip)
            gt_seg = mmcv.imrescale(gt_seg,
                                    self.seg_scale_factor,
                                    interpolation='nearest')
            gt_seg = gt_seg[None, ...]
        if self.proposals is not None:
            proposals = self.bbox_transform(proposals, img_shape, scale_factor,
                                            flip)
            proposals = np.hstack([proposals, scores
                                   ]) if scores is not None else proposals

        gt_bboxes = self.bbox_transform(gt_bboxes, img_shape, scale_factor,
                                        flip)

        if self.with_crowd:
            gt_bboxes_ignore = self.bbox_transform(gt_bboxes_ignore, img_shape,
                                                   scale_factor, flip)
        if self.with_mask:
            gt_masks = self.mask_transform(ann['masks'], pad_shape,
                                           scale_factor, flip)

        ori_shape = (img_info['height'], img_info['width'], 3)
        not_exhaustive_category_ids = img_info['not_exhaustive_category_ids']
        neg_category_ids = img_info['neg_category_ids']
        img_meta = dict(
            ori_shape=ori_shape,
            img_shape=img_shape,
            pad_shape=pad_shape,
            scale_factor=scale_factor,
            flip=flip,
            not_exhaustive_category_ids=not_exhaustive_category_ids,
            neg_category_ids=neg_category_ids,
            # cat_group_idxs=self.cat_group_idxs,
            cat_instance_count=self.cat_instance_count,
            freq_groups=self.freq_groups,
            cat_fake_idxs=self.cat_fake_idxs,
            freq_group_dict=self.freq_group_dict,
        )

        data = dict(
            img=DC(to_tensor(img), stack=True),
            img_meta=DC(img_meta, cpu_only=True),
            gt_bboxes=DC(to_tensor(gt_bboxes)),
            gt_valid_idxs=DC(gt_valid_idxs, cpu_only=True),
        )
        if self.proposals is not None:
            data['proposals'] = DC(to_tensor(proposals))
        if self.with_label:
            data['gt_labels'] = DC(to_tensor(gt_labels))
        if self.with_crowd:
            data['gt_bboxes_ignore'] = DC(to_tensor(gt_bboxes_ignore))
        if self.with_mask:
            data['gt_masks'] = DC(gt_masks, cpu_only=True)
        if self.with_seg:
            data['gt_semantic_seg'] = DC(to_tensor(gt_seg), stack=True)
        return data

    def prepare_test_img(self, idx):
        """Prepare an image for testing (multi-scale and flipping)"""
        img_info = self.img_infos[idx]
        # load image
        if 'COCO' in img_info['filename']:
            img = mmcv_custom.imread(
                osp.join(
                    self.img_prefix, img_info['filename']
                    [img_info['filename'].find('COCO_val2014_') +
                     len('COCO_val2014_'):]))
        else:
            img = mmcv_custom.imread(
                osp.join(self.img_prefix, img_info['filename']))
        # corruption
        if self.corruption is not None:
            img = corrupt(img,
                          severity=self.corruption_severity,
                          corruption_name=self.corruption)
        # load proposals if necessary
        if self.proposals is not None:
            proposal = self.proposals[idx][:self.num_max_proposals]
            if not (proposal.shape[1] == 4 or proposal.shape[1] == 5):
                raise AssertionError(
                    'proposals should have shapes (n, 4) or (n, 5), '
                    'but found {}'.format(proposal.shape))
        else:
            proposal = None

        def prepare_single(img, scale, flip, proposal=None):
            _img, img_shape, pad_shape, scale_factor = self.img_transform(
                img, scale, flip, keep_ratio=self.resize_keep_ratio)
            _img = to_tensor(_img)
            _img_meta = dict(ori_shape=(img_info['height'], img_info['width'],
                                        3),
                             img_shape=img_shape,
                             pad_shape=pad_shape,
                             scale_factor=scale_factor,
                             flip=flip)
            if proposal is not None:
                if proposal.shape[1] == 5:
                    score = proposal[:, 4, None]
                    proposal = proposal[:, :4]
                else:
                    score = None
                _proposal = self.bbox_transform(proposal, img_shape,
                                                scale_factor, flip)
                _proposal = np.hstack([_proposal, score
                                       ]) if score is not None else _proposal
                _proposal = to_tensor(_proposal)
            else:
                _proposal = None
            return _img, _img_meta, _proposal

        imgs = []
        img_metas = []
        proposals = []
        for scale in self.img_scales:
            _img, _img_meta, _proposal = prepare_single(
                img, scale, False, proposal)
            imgs.append(_img)
            img_metas.append(DC(_img_meta, cpu_only=True))
            proposals.append(_proposal)
            if self.flip_ratio > 0:
                _img, _img_meta, _proposal = prepare_single(
                    img, scale, True, proposal)
                imgs.append(_img)
                img_metas.append(DC(_img_meta, cpu_only=True))
                proposals.append(_proposal)
        data = dict(img=imgs, img_meta=img_metas)
        if self.proposals is not None:
            data['proposals'] = proposals
        return data
Exemple #10
0
class LVISEvaluator(DatasetEvaluator):
    """
    Evaluate object proposal and instance detection/segmentation outputs using
    LVIS's metrics and evaluation API.
    """
    def __init__(self,
                 dataset_name,
                 tasks=None,
                 distributed=True,
                 output_dir=None):
        """
        Args:
            dataset_name (str): name of the dataset to be evaluated.
                It must have the following corresponding metadata:
                "json_file": the path to the LVIS format annotation
            tasks (tuple[str]): tasks that can be evaluated under the given
                configuration. A task is one of "bbox", "segm".
                By default, will infer this automatically from predictions.
            distributed (True): if True, will collect results from all ranks for evaluation.
                Otherwise, will evaluate the results in the current process.
            output_dir (str): optional, an output directory to dump results.
        """
        from lvis import LVIS

        self._logger = logging.getLogger(__name__)

        if tasks is not None and isinstance(tasks, CfgNode):
            self._logger.warn(
                "COCO Evaluator instantiated using config, this is deprecated behavior."
                " Please pass in explicit arguments instead.")
            self._tasks = None  # Infering it from predictions should be better
        else:
            self._tasks = tasks

        self._distributed = distributed
        self._output_dir = output_dir

        self._cpu_device = torch.device("cpu")

        self._metadata = MetadataCatalog.get(dataset_name)
        json_file = PathManager.get_local_path(self._metadata.json_file)
        self._lvis_api = LVIS(json_file)
        # Test set json files do not contain annotations (evaluation must be
        # performed using the LVIS evaluation server).
        self._do_evaluation = len(self._lvis_api.get_ann_ids()) > 0

    def reset(self):
        self._predictions = []

    def process(self, inputs, outputs):
        """
        Args:
            inputs: the inputs to a LVIS model (e.g., GeneralizedRCNN).
                It is a list of dict. Each dict corresponds to an image and
                contains keys like "height", "width", "file_name", "image_id".
            outputs: the outputs of a LVIS model. It is a list of dicts with key
                "instances" that contains :class:`Instances`.
        """
        for input, output in zip(inputs, outputs):
            prediction = {"image_id": input["image_id"]}

            if "instances" in output:
                instances = output["instances"].to(self._cpu_device)
                prediction["instances"] = instances_to_coco_json(
                    instances, input["image_id"])
            if "proposals" in output:
                prediction["proposals"] = output["proposals"].to(
                    self._cpu_device)
            self._predictions.append(prediction)

    def evaluate(self):
        if self._distributed:
            comm.synchronize()
            predictions = comm.gather(self._predictions, dst=0)
            predictions = list(itertools.chain(*predictions))

            if not comm.is_main_process():
                return
        else:
            predictions = self._predictions

        if len(predictions) == 0:
            self._logger.warning(
                "[LVISEvaluator] Did not receive valid predictions.")
            return {}

        if self._output_dir:
            PathManager.mkdirs(self._output_dir)
            file_path = os.path.join(self._output_dir,
                                     "instances_predictions.pth")
            with PathManager.open(file_path, "wb") as f:
                torch.save(predictions, f)

        self._results = OrderedDict()
        if "proposals" in predictions[0]:
            self._eval_box_proposals(predictions)
        if "instances" in predictions[0]:
            self._eval_predictions(predictions)
        # Copy so the caller can do whatever with results
        return copy.deepcopy(self._results)

    def _tasks_from_predictions(self, predictions):
        for pred in predictions:
            if "segmentation" in pred:
                return ("bbox", "segm")
        return ("bbox", )

    def _eval_predictions(self, predictions):
        """
        Evaluate predictions. Fill self._results with the metrics of the tasks.

        Args:
            predictions (list[dict]): list of outputs from the model
        """
        self._logger.info("Preparing results in the LVIS format ...")
        lvis_results = list(
            itertools.chain(*[x["instances"] for x in predictions]))
        tasks = self._tasks or self._tasks_from_predictions(lvis_results)

        # LVIS evaluator can be used to evaluate results for COCO dataset categories.
        # In this case `_metadata` variable will have a field with COCO-specific category mapping.
        if hasattr(self._metadata, "thing_dataset_id_to_contiguous_id"):
            reverse_id_mapping = {
                v: k
                for k, v in
                self._metadata.thing_dataset_id_to_contiguous_id.items()
            }
            for result in lvis_results:
                result["category_id"] = reverse_id_mapping[
                    result["category_id"]]
        else:
            # unmap the category ids for LVIS (from 0-indexed to 1-indexed)
            for result in lvis_results:
                result["category_id"] += 1

        if self._output_dir:
            file_path = os.path.join(self._output_dir,
                                     "lvis_instances_results.json")
            self._logger.info("Saving results to {}".format(file_path))
            with PathManager.open(file_path, "w") as f:
                f.write(json.dumps(lvis_results))
                f.flush()

        if not self._do_evaluation:
            self._logger.info("Annotations are not available for evaluation.")
            return

        self._logger.info("Evaluating predictions ...")
        for task in sorted(tasks):
            res = _evaluate_predictions_on_lvis(
                self._lvis_api,
                lvis_results,
                task,
                class_names=self._metadata.get("thing_classes"))
            self._results[task] = res

    def _eval_box_proposals(self, predictions):
        """
        Evaluate the box proposals in predictions.
        Fill self._results with the metrics for "box_proposals" task.
        """
        if self._output_dir:
            # Saving generated box proposals to file.
            # Predicted box_proposals are in XYXY_ABS mode.
            bbox_mode = BoxMode.XYXY_ABS.value
            ids, boxes, objectness_logits = [], [], []
            for prediction in predictions:
                ids.append(prediction["image_id"])
                boxes.append(
                    prediction["proposals"].proposal_boxes.tensor.numpy())
                objectness_logits.append(
                    prediction["proposals"].objectness_logits.numpy())

            proposal_data = {
                "boxes": boxes,
                "objectness_logits": objectness_logits,
                "ids": ids,
                "bbox_mode": bbox_mode,
            }
            with PathManager.open(
                    os.path.join(self._output_dir, "box_proposals.pkl"),
                    "wb") as f:
                pickle.dump(proposal_data, f)

        if not self._do_evaluation:
            self._logger.info("Annotations are not available for evaluation.")
            return

        self._logger.info("Evaluating bbox proposals ...")
        res = {}
        areas = {"all": "", "small": "s", "medium": "m", "large": "l"}
        for limit in [100, 1000]:
            for area, suffix in areas.items():
                stats = _evaluate_box_proposals(predictions,
                                                self._lvis_api,
                                                area=area,
                                                limit=limit)
                key = "AR{}@{:d}".format(suffix, limit)
                res[key] = float(stats["ar"].item() * 100)
        self._logger.info("Proposal metrics: \n" + create_small_table(res))
        self._results["box_proposals"] = res
Exemple #11
0
class LvisDataset(CustomDataset):

    CLASSES = []
    for i in range(1, 1231):
        CLASSES.append(str(i))
    CLASSES = tuple(CLASSES)

    def load_annotations(self, ann_file):
        self.lvis = LVIS(ann_file)
        self.cat_ids = self.lvis.get_cat_ids()
        self.cat2label = {
            cat_id: i + 1
            for i, cat_id in enumerate(self.cat_ids)
        }
        self.img_ids = self.lvis.get_img_ids()
        img_infos = []
        for i in self.img_ids:
            info = self.lvis.load_imgs([i])[0]
            info['filename'] = info['file_name']
            img_infos.append(info)
        return img_infos

    def get_ann_info(self, idx):
        img_id = self.img_infos[idx]['id']
        ann_ids = self.lvis.get_ann_ids(imgIds=[img_id])
        ann_info = self.lvis.load_anns(ann_ids)
        return self._parse_ann_info(self.img_infos[idx], ann_info)

    def _filter_imgs(self, min_size=32):
        """Filter images too small or without ground truths."""
        valid_inds = []
        ids_with_ann = set(_['image_id'] for _ in self.lvis.anns.values())
        for i, img_info in enumerate(self.img_infos):
            if self.img_ids[i] not in ids_with_ann:
                continue
            if min(img_info['width'], img_info['height']) >= min_size:
                valid_inds.append(i)
        return valid_inds

    def _parse_ann_info(self, img_info, ann_info):
        """Parse bbox and mask annotation.

        Args:
            ann_info (list[dict]): Annotation info of an image.
            with_mask (bool): Whether to parse mask annotations.

        Returns:
            dict: A dict containing the following keys: bboxes, bboxes_ignore,
                labels, masks, seg_map. "masks" are raw annotations and not
                decoded into binary masks.
        """
        gt_bboxes = []
        gt_labels = []
        gt_bboxes_ignore = []
        gt_masks_ann = []

        for i, ann in enumerate(ann_info):
            if ann.get('ignore', False):
                continue
            x1, y1, w, h = ann['bbox']
            if ann['area'] <= 0 or w < 1 or h < 1:
                continue
            bbox = [x1, y1, x1 + w - 1, y1 + h - 1]
            if ann.get('iscrowd', False):
                gt_bboxes_ignore.append(bbox)
            else:
                gt_bboxes.append(bbox)
                gt_labels.append(self.cat2label[ann['category_id']])
                gt_masks_ann.append(ann['segmentation'])

        if gt_bboxes:
            gt_bboxes = np.array(gt_bboxes, dtype=np.float32)
            gt_labels = np.array(gt_labels, dtype=np.int64)
        else:
            gt_bboxes = np.zeros((0, 4), dtype=np.float32)
            gt_labels = np.array([], dtype=np.int64)

        if gt_bboxes_ignore:
            gt_bboxes_ignore = np.array(gt_bboxes_ignore, dtype=np.float32)
        else:
            gt_bboxes_ignore = np.zeros((0, 4), dtype=np.float32)

        seg_map = img_info['filename'].replace('jpg', 'png')

        ann = dict(bboxes=gt_bboxes,
                   labels=gt_labels,
                   bboxes_ignore=gt_bboxes_ignore,
                   masks=gt_masks_ann,
                   seg_map=seg_map)

        return ann