Beispiel #1
0
 def __init__(
     self,
     data_root: str,
     csv: str,
     split: str,
     tokenizer: SentencePieceBPETokenizer,
     image_transform: Callable = T.DEFAULT_IMAGE_TRANSFORM,
     padded_length: int = 256,
     max_caption_length: int = 50,
     use_single_caption: bool = False,
     percentage: float = 100.0,
 ):
     self.data_root = data_root
     self.padded_length = padded_length
     info_df = pd.read_csv(os.path.join(data_root, csv), delimiter="|")
     self.video_list = []
     for index, row in info_df.iterrows():
         self.video_list.append((index, row['name'], [row['orth']]))
     self.image_transform = image_transform
     self.caption_transform = alb.Compose([
         T.NormalizeCaption(),
         T.TokenizeCaption(tokenizer),
         T.TruncateCaptionTokens(max_caption_length),
     ])
     self.use_single_caption = use_single_caption
     self.padding_idx = tokenizer.token_to_id("<unk>")
Beispiel #2
0
    def __init__(
        self,
        data_root: str,
        split: str,
        tokenizer: SentencePieceBPETokenizer,
        image_transform: Callable = T.DEFAULT_IMAGE_TRANSFORM,
        mask_proportion: float = 0.15,
        mask_probability: float = 0.80,
        replace_probability: float = 0.10,
        max_caption_length: int = 30,
        use_single_caption: bool = False,
        percentage: float = 100.0,
    ):
        lmdb_path = os.path.join(data_root, 'virtex',
                                 f"serialized_{split}.lmdb")
        self.reader = LmdbReader(lmdb_path, percentage=percentage)

        self.image_transform = image_transform
        self.caption_transform = alb.Compose([
            T.NormalizeCaption(),
            T.TokenizeCaption(tokenizer),
            T.TruncateCaptionTokens(max_caption_length),
        ])
        self.use_single_caption = use_single_caption
        self.padding_idx = tokenizer.token_to_id("<unk>")

        # Handles to commonly used variables for word masking.
        self._vocab_size = tokenizer.get_vocab_size()
        self._mask_index = tokenizer.token_to_id("[MASK]")
        self._mask_proportion = mask_proportion
        self._mask_prob = mask_probability
        self._repl_prob = replace_probability
Beispiel #3
0
    def __init__(
        self,
        data_root: str,
        split: str,
        tokenizer: SentencePieceBPETokenizer,
        image_transform: Callable = T.DEFAULT_IMAGE_TRANSFORM,
        max_caption_length: int = 30,
    ):
        lmdb_path = os.path.join(data_root, f"serialized_{split}.lmdb")
        self.reader = LmdbReader(lmdb_path)

        self.image_transform = image_transform
        self.caption_transform = alb.Compose([
            T.NormalizeCaption(),
            T.TokenizeCaption(tokenizer),
            T.TruncateCaptionTokens(max_caption_length),
        ])
        self.padding_idx = tokenizer.token_to_id("<unk>")
Beispiel #4
0
    def __init__(self,
                 data_root: str,
                 split: str,
                 tokenizer: SentencePieceBPETokenizer,
                 image_transform: Callable = T.DEFAULT_IMAGE_TRANSFORM,
                 max_caption_length: int = 77,
                 percentage: float = 100.0,
                 all_captions: bool = False,
                 include_image: bool = True):
        super().__init__()
        self.percentage = percentage
        self.tokenizer = tokenizer
        self.image_transform = image_transform
        self.caption_transform = alb.Compose([
            T.NormalizeCaption(),
            T.TokenizeCaption(tokenizer),
            T.TruncateCaptionTokens(max_caption_length),
        ])
        self.padding_idx = tokenizer.pad_id
        self.data_root = data_root
        self.split = split

        self._all_captions = all_captions
        self._include_image = include_image