Beispiel #1
0
 def val_data_loader(val_file):
     val_dataset = MemeDataset(filepath=val_file,
                               text_only=True,
                               text_padding=tokenizer_func)
     return data.DataLoader(val_dataset,
                            batch_size=config['batch_size'],
                            num_workers=config['num_workers'],
                            collate_fn=val_dataset.get_collate_fn())
Beispiel #2
0
 def test_data_loader(test_file):
     test_dataset = MemeDataset(filepath=test_file,
                                text_only=True,
                                text_padding=tokenizer_func,
                                return_ids=True)
     return data.DataLoader(test_dataset,
                            batch_size=config['batch_size'],
                            num_workers=config['num_workers'],
                            collate_fn=test_dataset.get_collate_fn())
Beispiel #3
0
 def val_data_loader(val_file):
     val_dataset = MemeDataset(
         filepath=val_file,
         feature_dir=config['feature_path'],
         preload_images=False,
         debug=True,
         text_padding=tokenizer_func,
         confidence_threshold=config['object_conf_thresh'])
     return data.DataLoader(val_dataset,
                            batch_size=config['batch_size'],
                            num_workers=config['num_workers'],
                            collate_fn=val_dataset.get_collate_fn())
Beispiel #4
0
 def train_data_loader(train_file):
     if config['debug']:
         train_file = os.path.join(config["data_path"], "dev_seen.jsonl")
     train_dataset = MemeDataset(filepath=train_file,
                                 text_only=True,
                                 text_padding=tokenizer_func)
     return data.DataLoader(
         train_dataset,
         batch_size=config['batch_size'],
         num_workers=config['num_workers'],
         collate_fn=train_dataset.get_collate_fn(),
         pin_memory=
         True,  # shuffle is mutually exclusive with sampler. It is shuffled anyways
         sampler=ConfounderSampler(
             train_dataset, repeat_factor=config["confounder_repeat"]))
Beispiel #5
0
 def train_data_loader(train_file):
     train_dataset = MemeDataset(
         filepath=train_file,
         feature_dir=config['feature_path'],
         preload_images=False,
         debug=True,
         text_padding=tokenizer_func,
         confidence_threshold=config['object_conf_thresh'])
     return data.DataLoader(
         train_dataset,
         batch_size=config['batch_size'],
         num_workers=config['num_workers'],
         collate_fn=train_dataset.get_collate_fn(),
         pin_memory=
         True,  # shuffle is mutually exclusive with sampler. It is shuffled anyways
         sampler=ConfounderSampler(
             train_dataset, repeat_factor=config["confounder_repeat"]))
Beispiel #6
0
    val_dataset = MemeDataset(filepath=os.path.join(config['data_path'],
                                                    'dev_seen.jsonl'),
                              feature_dir=config['feature_path'],
                              text_padding=tokenizer_func,
                              filter_text=config["filter_text"])
    test_dataset = MemeDataset(filepath=os.path.join(config['data_path'],
                                                     'test_seen.jsonl'),
                               feature_dir=config['feature_path'],
                               text_padding=tokenizer_func,
                               filter_text=config["filter_text"])

    config['train_loader'] = data.DataLoader(
        train_dataset,
        batch_size=config['batch_size'],
        num_workers=config['num_workers'],
        collate_fn=train_dataset.get_collate_fn(),
        shuffle=True,
        pin_memory=True)
    config['val_loader'] = data.DataLoader(
        val_dataset,
        batch_size=config['batch_size'],
        num_workers=config['num_workers'],
        collate_fn=val_dataset.get_collate_fn())
    config['test_loader'] = data.DataLoader(
        test_dataset,
        batch_size=config['batch_size'],
        num_workers=config['num_workers'],
        collate_fn=test_dataset.get_collate_fn())

    try:
        trainer = TrainerUniter(config)