Example #1
0
    def _download_and_extract(self, key, url, download_folder):
        file_type = key.split("_")[0]
        os.makedirs(download_folder, exist_ok=True)
        local_filename = url.split("/")[-1]
        extraction_folder = os.path.join(download_folder, local_filename.split(".")[0])
        local_filename = os.path.join(download_folder, local_filename)

        if (
            os.path.exists(local_filename)
            or (
                os.path.exists(extraction_folder) and len(os.listdir(extraction_folder))
            )
            != 0
        ):
            logger.info(
                f"{self.dataset_proper_name} {file_type} already present. "
                + "Skipping download."
            )
            return extraction_folder

        logger.info(f"Downloading the {self.dataset_proper_name} {file_type} now.")
        download(url, download_folder, url.split("/")[-1])

        logger.info(
            f"Extracting the {self.dataset_proper_name} {file_type} now. "
            + "This may take time"
        )
        decompress(download_folder, url.split("/")[-1])

        return extraction_folder
Example #2
0
 def _try_downloading_necessities(self):
     if self.args.model_file is None:
         print("Downloading model and configuration")
         self.args.model_file = self.MODEL_URL.split("/")[-1]
         self.args.config_file = self.CONFIG_URL.split("/")[-1]
         download(self.MODEL_URL, ".", self.args.model_file)
         download(self.CONFIG_URL, ".", self.args.config_file)
Example #3
0
    def _download_and_extract(self, key, url, download_folder):
        file_type = key.split("_")[0]
        os.makedirs(download_folder, exist_ok=True)
        local_filename = url.split("/")[-1]
        extraction_folder = os.path.join(download_folder,
                                         local_filename.split(".")[0])
        local_filename = os.path.join(download_folder, local_filename)

        if (os.path.exists(local_filename)
                or (os.path.exists(extraction_folder)
                    and len(os.listdir(extraction_folder))) != 0):
            self.writer.write(
                "{} {} already present. Skipping download.".format(
                    self.dataset_proper_name, file_type))
            return extraction_folder

        self.writer.write("Downloading the {} {} now.".format(
            self.dataset_proper_name, file_type))
        download(url, download_folder, url.split("/")[-1])

        self.writer.write(
            "Extracting the {} {} now. This may take time".format(
                self.dataset_proper_name, file_type))
        decompress(download_folder, url.split("/")[-1])

        return extraction_folder
Example #4
0
    def build(self, config, dataset_type):
        download_folder = os.path.join(get_mmf_root(), config.data_dir,
                                       config.data_folder)

        file_name = CLEVR_DOWNLOAD_URL.split("/")[-1]
        local_filename = os.path.join(download_folder, file_name)

        extraction_folder = os.path.join(download_folder,
                                         ".".join(file_name.split(".")[:-1]))
        self.data_folder = extraction_folder

        # Either if the zip file is already present or if there are some
        # files inside the folder we don't continue download process
        if os.path.exists(local_filename):
            self.writer.write(
                "CLEVR dataset is already present. Skipping download.")
            return

        if (os.path.exists(extraction_folder)
                and len(os.listdir(extraction_folder)) != 0):
            return

        self.writer.write("Downloading the CLEVR dataset now")
        download(CLEVR_DOWNLOAD_URL, download_folder,
                 CLEVR_DOWNLOAD_URL.split("/")[-1])

        self.writer.write("Downloaded. Extracting now. This can take time.")
        with zipfile.ZipFile(local_filename, "r") as zip_ref:
            zip_ref.extractall(download_folder)
Example #5
0
    def classify(self, image: ImageType, text: str, image_tensor = None, zero_image=False, zero_text=False):
        """Classifies a given image and text in it into Hateful/Non-Hateful.
        Image can be a url or a local path or you can directly pass a PIL.Image.Image
        object. Text needs to be a sentence containing all text in the image.

        Args:
            image (ImageType): Image to be classified
            text (str): Text in the image
            zero_image: zero out the image features when classifying
            zero_text: zero out the text features when classifying
            return_type: either "prob" or "logits"

        Returns:
            {"label": 0, "confidence": 0.56}
        """
        sample = Sample()

        if image_tensor != None:
            sample.image = image_tensor
        else:
            


            if isinstance(image, str):
                if image.startswith("http"):
                    temp_file = tempfile.NamedTemporaryFile()
                    download(image, *os.path.split(temp_file.name), disable_tqdm=True)
                    image = tv_helpers.default_loader(temp_file.name)
                    temp_file.close()
                else:
                    image = tv_helpers.default_loader(image)

        
            image = self.processor_dict["image_processor"](image)
            sample.image = image

        text = self.processor_dict["text_processor"]({"text": text})


        sample.text = text["text"]
        if "input_ids" in text:
            sample.update(text)

        sample_list = SampleList([sample])
        device = next(self.model.parameters()).device
        sample_list = sample_list.to(device)
        output = self.model(sample_list, zero_image=zero_image, zero_text=zero_text)
        scores = nn.functional.softmax(output["scores"], dim=1)

        if image_tensor != None:
            return scores

        confidence, label = torch.max(scores, dim=1)

        return {"label": label.item(), "confidence": confidence.item()}
Example #6
0
 def _try_downloading_necessities(self, model_name):
     if self.args.model_file is None and model_name is not None:
         model_url = self.MODEL_URL[model_name]
         config_url = self.CONFIG_URL[model_name]
         self.args.model_file = model_url.split("/")[-1]
         self.args.config_file = config_url.split("/")[-1]
         if os.path.exists(self.args.model_file) and os.path.exists(
             self.args.config_file
         ):
             print(f"model and config file exists in directory: {os.getcwd()}")
             return
         print("Downloading model and configuration")
         download(model_url, ".", self.args.model_file)
         download(config_url, ".", self.args.config_file)
Example #7
0
    def imageToTensor(self, image):
        """
        Transform the input image into tensor form.
        This function "take out" one of the sub-step in super().classify()
        """
        if isinstance(image, str):
            if image.startswith("http"):
                temp_file = tempfile.NamedTemporaryFile()
                download(image,
                         *os.path.split(temp_file.name),
                         disable_tqdm=True)
                image = tv_helpers.default_loader(temp_file.name)
                temp_file.close()
            else:
                image = tv_helpers.default_loader(image)

        return self.processor_dict["image_processor"](image)
Example #8
0
    def classify(self, image: ImageType, text: str):
        """Classifies a given image and text in it into Hateful/Non-Hateful.
        Image can be a url or a local path or you can directly pass a PIL.Image.Image
        object. Text needs to be a sentence containing all text in the image.

            >>> from mmf.models.mmbt import MMBT
            >>> model = MMBT.from_pretrained("mmbt.hateful_memes.images")
            >>> model.classify("some_url", "some_text")
            {"label": 0, "confidence": 0.56}

        Args:
            image (ImageType): Image to be classified
            text (str): Text in the image

        Returns:
            bool: Whether image is hateful (1) or non hateful (0)
        """
        if isinstance(image, str):
            if image.startswith("http"):
                temp_file = tempfile.NamedTemporaryFile()
                download(image,
                         *os.path.split(temp_file.name),
                         disable_tqdm=True)
                image = tv_helpers.default_loader(temp_file.name)
                temp_file.close()
            else:
                image = tv_helpers.default_loader(image)

        text = self.processor_dict["text_processor"]({"text": text})
        image = self.processor_dict["image_processor"](image)

        sample = Sample()
        sample.text = text["text"]
        if "input_ids" in text:
            sample.update(text)

        sample.image = image
        sample_list = SampleList([sample])
        device = next(self.model.parameters()).device
        sample_list = sample_list.to(device)

        output = self.model(sample_list)
        scores = nn.functional.softmax(output["scores"], dim=1)
        confidence, label = torch.max(scores, dim=1)

        return {"label": label.item(), "confidence": confidence.item()}
Example #9
0
    def __init__(self, config, *args, **kwargs):
        super().__init__(config, *args, **kwargs)
        self._probability = 0
        self.segment_reverse = (config.segment_reverse if hasattr(
            config, "segment_reverse") else False)
        self.sync_seg_reverse = (config.sync_seg_reverse if hasattr(
            config, "sync_seg_reverse") else False)
        self.sync_seg_shuffle = (config.sync_seg_shuffle if hasattr(
            config, "sync_seg_shuffle") else False)
        self.filter_vocab = (config.filter_vocab if hasattr(
            config, "filter_vocab") else "none")
        registry.register("ln_caption_processor", self)

        # attention guidance stop word / filter list
        from nltk.corpus import stopwords

        # import nltk

        # nltk.download("stopwords")
        self.stop_words = set(stopwords.words("english"))
        for w in ["!", ",", ".", "?", "-s", "-ly", "</s>", "s"]:
            self.stop_words.add(w)
        COCO_CATE = [
            "person",
            "bicycle",
            "car",
            "motorcycle",
            "airplane",
            "bus",
            "train",
            "truck",
            "boat",
            "traffic light",
            "fire hydrant",
            "stop sign",
            "parking meter",
            "bench",
            "bird",
            "cat",
            "dog",
            "horse",
            "sheep",
            "cow",
            "elephant",
            "bear",
            "zebra",
            "giraffe",
            "backpack",
            "umbrella",
            "handbag",
            "tie",
            "suitcase",
            "frisbee",
            "skis",
            "snowboard",
            "sports ball",
            "kite",
            "baseball bat",
            "baseball glove",
            "skateboard",
            "surfboard",
            "tennis racket",
            "bottle",
            "wine glass",
            "cup",
            "fork",
            "knife",
            "spoon",
            "bowl",
            "banana",
            "apple",
            "sandwich",
            "orange",
            "broccoli",
            "carrot",
            "hot dog",
            "pizza",
            "donut",
            "cake",
            "chair",
            "couch",
            "potted plant",
            "bed",
            "dining table",
            "toilet",
            "tv",
            "laptop",
            "mouse",
            "remote",
            "keyboard",
            "cell phone",
            "microwave",
            "oven",
            "toaster",
            "sink",
            "refrigerator",
            "book",
            "clock",
            "vase",
            "scissors",
            "teddy bear",
            "hair drier",
            "toothbrush",
        ]
        self.coco_vocab = set()
        for w in COCO_CATE:
            piece = self.tokenize(w)
            for p in piece:
                self.coco_vocab.add(p)

        from mmf.utils.download import download
        import os
        import random

        path = "/tmp/"
        filename = "VG_categoty{}.txt".format(random.randint(0, 10000))
        filepath = os.path.join(path, filename)
        url = "http://visualgenome.org/static/data/dataset/object_alias.txt"
        if download(url, path, filename, redownload=False):
            cate = []
            with open(filepath) as fin:
                for line in fin:
                    cate += line.strip().split(",")
            self.vg_vocab = set()
            for w in cate:
                piece = self.tokenize(w)
                for p in piece:
                    self.vg_vocab.add(p)
Example #10
0
    def classify(self,
                 image: ImageType,
                 text: str,
                 image_tensor=None,
                 zero_image=False,
                 zero_text=False):
        """Classifies a given image and text in it into Hateful/Non-Hateful.
        Image can be a url or a local path or you can directly pass a PIL.Image.Image
        object. Text needs to be a sentence containing all text in the image.

        Args:
            image (ImageType): Image to be classified
            text (str): Text in the image
            zero_image: zero out the image features when classifying
            zero_text: zero out the text features when classifying

        Returns:
            {"label": 0, "confidence": 0.56}
        """

        if image_tensor != None:
            image_tenosr = torch.unsqueeze(image_tenosr, 0)
            im_feature_0, im_info_0 = torchRay_feat_extract(image_tensor)
        else:
            if isinstance(image, str):
                if image.startswith("http"):
                    temp_file = tempfile.NamedTemporaryFile()
                    download(image,
                             *os.path.split(temp_file.name),
                             disable_tqdm=True)
                    image = tv_helpers.default_loader(temp_file.name)
                    temp_file.close()
                else:
                    image = tv_helpers.default_loader(image)
            _, _, im_feature_0, im_info_0 = self.feature_extractor.extract_features(
                image_dir=image, save_single=False)

        text = self.processor_dict["text_processor"]({"text": text})
        sample = Sample()
        sample.text = text["text"]
        if "input_ids" in text:
            sample.update(text)

        # extract feature
        #_, _, im_feature_0, im_info_0 = self.feature_extractor.extract_features(
        #    image_dir=image, save_single=False
        #)

        # re-format the sample list
        sample_im_info = Sample()

        # process the bounding boxes for vilbert
        if self.model_name == "vilbert":
            bbox = np.array(im_info_0["bbox"])
            image_w = im_info_0["image_width"]
            image_h = im_info_0["image_height"]
            new_bbox = np.zeros((bbox.shape[0], 5), dtype=bbox.dtype)

            new_bbox[:, 0] = bbox[:, 0] / image_w
            new_bbox[:, 1] = bbox[:, 1] / image_h
            new_bbox[:, 2] = (bbox[:, 2]) / image_w
            new_bbox[:, 3] = (bbox[:, 3]) / image_h
            new_bbox[:, 4] = ((bbox[:, 2] - bbox[:, 0]) *
                              (bbox[:, 3] - bbox[:, 1]) / (image_w * image_h))

            sample_im_info.bbox = torch.from_numpy(new_bbox)
        else:
            sample_im_info.bbox = torch.from_numpy(np.array(im_info_0["bbox"]))

        sample_im_info.num_boxes = torch.from_numpy(
            np.array(im_info_0["num_boxes"]))
        sample_im_info.objects = torch.from_numpy(
            np.array(im_info_0["objects"]))
        sample_im_info.image_width = torch.from_numpy(
            np.array(im_info_0["image_width"]))
        sample_im_info.image_height = torch.from_numpy(
            np.array(im_info_0["image_height"]))
        sample_im_info.cls_prob = torch.from_numpy(
            np.array(im_info_0["cls_prob"]))
        sample_list_info = SampleList([sample_im_info])

        sample.image_feature_0 = im_feature_0
        sample.dataset_name = "hateful_memes"

        sample_list = SampleList([sample])
        sample_list.image_info_0 = sample_list_info
        device = next(self.model.parameters()).device
        sample_list = sample_list.to(device)

        output = self.model(sample_list)
        scores = nn.functional.softmax(output["scores"], dim=1)

        if image_tensor != None:
            return scores
        confidence, label = torch.max(scores, dim=1)

        return {"label": label.item(), "confidence": confidence.item()}