Beispiel #1
0
    def __init__(self):
        self.train_tuple = get_tuple(args.train,
                                     bs=args.batch_size,
                                     shuffle=True,
                                     drop_last=True)
        if args.valid != "":
            valid_bsize = 2048 if args.multiGPU else 512
            self.valid_tuple = get_tuple(args.valid,
                                         bs=valid_bsize,
                                         shuffle=False,
                                         drop_last=False,
                                         skip_semantics=True)
        else:
            self.valid_tuple = None

        self.model = GQAModel(self.train_tuple.dataset.num_answers)

        # Load pre-trained weights
        if args.load_lxmert is not None:
            self.model.lxrt_encoder.load(args.load_lxmert)
        if args.load_lxmert_qa is not None:
            load_lxmert_qa(args.load_lxmert_qa,
                           self.model,
                           label2ans=self.train_tuple.dataset.label2ans)

        # GPU options
        self.model = self.model.cuda()
        if args.multiGPU:
            self.model.lxrt_encoder.multi_gpu()

        # Losses and optimizer
        self.bce_loss = nn.BCEWithLogitsLoss()
        self.mce_loss = nn.CrossEntropyLoss(ignore_index=-1)
        if 'bert' in args.optim:
            batch_per_epoch = len(self.train_tuple.loader)
            t_total = int(batch_per_epoch * args.epochs)
            print("Total Iters: %d" % t_total)
            from lxrt.optimization import BertAdam
            self.optim = BertAdam(list(self.model.parameters()),
                                  lr=args.lr,
                                  warmup=0.1,
                                  t_total=t_total)
            if args.task_nsp_qfpm or args.task_mlm_qfpm:
                self.task_optim = BertAdam(list(self.model.parameters()),
                                           lr=args.lr,
                                           warmup=0.1,
                                           t_total=t_total)
        else:
            self.optim = args.optimizer(list(self.model.parameters()), args.lr)

        self.output = args.output
        os.makedirs(self.output, exist_ok=True)
Beispiel #2
0
    def __init__(self, folder="/", load=True):
        # Datasets
        if load:
            self.train_tuple = get_data_tuple(args.train,
                                              bs=args.batch_size,
                                              shuffle=True,
                                              drop_last=True,
                                              folder=folder)
            if args.valid != "":
                self.valid_tuple = get_data_tuple(args.valid,
                                                  bs=128,
                                                  shuffle=False,
                                                  drop_last=False,
                                                  folder=folder,
                                                  nops=args.nops)
            else:
                self.valid_tuple = None

        # Model


#         self.model = VQAModel(self.train_tuple.dataset.num_answers)
        is_cp = False
        if "vqacpv2" in folder:
            is_cp = True
        if not is_cp:
            self.model = VQAModel(3129)
        else:
            self.model = VQAModel(16039)

        # Load pre-trained weights
        if args.load_lxmert is not None:
            self.model.lxrt_encoder.load(args.load_lxmert)
        if args.load_lxmert_qa is not None:
            load_lxmert_qa(args.load_lxmert_qa,
                           self.model,
                           label2ans=self.train_tuple.dataset.label2ans)

        # GPU options
        self.model = self.model.cuda()
        if args.multiGPU:
            self.model.lxrt_encoder.multi_gpu()

        # Loss and Optimizer
        self.bce_loss = nn.BCEWithLogitsLoss()
        if load:
            if 'bert' in args.optim:
                batch_per_epoch = len(self.train_tuple.loader)
                t_total = int(batch_per_epoch * args.epochs)
                print("BertAdam Total Iters: %d" % t_total)
                from lxrt.optimization import BertAdam
                self.optim = BertAdam(list(self.model.parameters()),
                                      lr=args.lr,
                                      warmup=0.1,
                                      t_total=t_total)
            else:
                self.optim = args.optimizer(self.model.parameters(), args.lr)
            # Output Directory
            self.output = args.output
            os.makedirs(self.output, exist_ok=True)
Beispiel #3
0
    def __init__(self):
        # Datasets
        self.train_tuple = get_data_tuple(args.train,
                                          bs=args.batch_size,
                                          shuffle=True,
                                          drop_last=True)
        if args.bert_type == 'ft':
            bs_infer = 256
        else:
            bs_infer = 1024
        if args.valid != "":
            self.valid_tuple = get_data_tuple(args.valid,
                                              bs=bs_infer,
                                              shuffle=False,
                                              drop_last=False)
        else:
            self.valid_tuple = None

        print("args.lr is {0}".format(args.lr))

        # Model
        self.model = VQAModel(self.train_tuple.dataset.num_answers)

        # Load pre-trained weights
        if args.load_lxmert is not None:
            self.model.lxrt_encoder.load(args.load_lxmert)
        if args.load_lxmert_qa is not None:
            load_lxmert_qa(args.load_lxmert_qa,
                           self.model,
                           label2ans=self.train_tuple.dataset.label2ans)

        # GPU options
        self.model = self.model.cuda()
        if args.multiGPU:
            self.model.lxrt_encoder.multi_gpu()

        # Loss and Optimizer
        self.bce_loss = nn.BCEWithLogitsLoss()
        if 'bert' in args.optim:
            # if type(args.lr) == type("sdfg"):
            #     args.lr = float(args.lr)

            batch_per_epoch = len(self.train_tuple.loader)
            t_total = int(batch_per_epoch * args.epochs)
            print("BertAdam Total Iters: %d" % t_total)
            from lxrt.optimization import BertAdam
            self.optim = BertAdam(list(self.model.parameters()),
                                  lr=args.lr,
                                  warmup=0.1,
                                  t_total=t_total,
                                  schedule=args.lr_schedule,
                                  args=args)

        else:
            self.optim = args.optimizer(self.model.parameters(), args.lr)

        # Output Directory
        self.output = args.output
        os.makedirs(self.output, exist_ok=True)
Beispiel #4
0
    def train(self, train_tuple: DataTuple, eval_tuple: DataTuple):
        train_ld = train_tuple.loader

        # Optimizer
        from lxrt.optimization import BertAdam

        batch_per_epoch = len(train_ld)
        t_total = int(batch_per_epoch * args.epochs)
        warmup_ratio = 0.05
        warmup_iters = int(t_total * warmup_ratio)
        print("Batch per epoch: %d" % batch_per_epoch)
        print("Total Iters: %d" % t_total)
        print("Warm up Iters: %d" % warmup_iters)
        optim = BertAdam(
            self.model.parameters(), lr=args.lr, warmup=warmup_ratio, t_total=t_total
        )

        # Train
        best_eval_loss = 9595.0
        for epoch in range(args.epochs):
            # Train
            self.model.train()
            total_loss = 0.0
            total_losses = 0.0
            uid2ans = {}
            for batch in tqdm(train_ld, total=len(train_ld)):
                loss, losses, logit = self.train_batch(optim, batch)
                total_loss += loss
                total_losses += losses
                if args.task_qa:
                    score, label = logit.max(1)
                    for datum, l in zip(batch, label.cpu().numpy()):
                        uid = datum.uid
                        ans = train_tuple.dataset.answer_table.id2ans(l)
                        uid2ans[uid] = ans

            print(epoch, total_loss, batch_per_epoch)
            print(
                "The training loss for Epoch %d is %0.4f"
                % (epoch, total_loss / batch_per_epoch)
            )
            losses_str = str(total_losses)
            # for name, loss in zip(LOSSES_NAME, total_losses):
            #    losses_str += "%s: %0.4f " % (name, loss / batch_per_epoch)
            print(losses_str)
            if args.task_qa:
                train_tuple.evaluator.evaluate(uid2ans, pprint=True)

            # Eval
            avg_eval_loss = self.evaluate_epoch(eval_tuple, iters=-1)

            # Save
            if avg_eval_loss < best_eval_loss:
                best_eval_loss = avg_eval_loss
                self.save("BEST_EVAL_LOSS")
            self.save("Epoch%02d" % (epoch + 1))
 def setup_optimizers(self):
     if 'bert' in args.optim:
         batch_per_epoch = len(self.train_tuple.loader)
         t_total = int(batch_per_epoch * args.epochs)
         print("Total Iters: %d" % t_total)
         from lxrt.optimization import BertAdam
         self.optim = BertAdam(list(self.model.parameters()),
                               lr=args.lr,
                               warmup=0.1,
                               t_total=t_total)
     else:
         self.optim = args.optimizer(list(self.model.parameters()), args.lr)
Beispiel #6
0
    def __init__(self):
        # Datasets
        self.train_tuple = get_data_tuple(
            args.train, bs=args.batch_size, shuffle=True, drop_last=True
        )
        if args.valid != "":
            self.valid_tuple = get_data_tuple(
                args.valid, bs=1024,
                shuffle=False, drop_last=False
            )
        else:
            self.valid_tuple = None
        
        # Model
        self.momentum = 0.99995
        self.model = VQAModel(self.train_tuple.dataset.num_answers)
        self.siam_model = copy.deepcopy(self.model)

        # Load pre-trained weights
        if args.load_lxmert is not None:
            self.model.lxrt_encoder.load(args.load_lxmert)
            self.siam_model.lxrt_encoder.load(args.load_lxmert)
        if args.load_lxmert_qa is not None:
            load_lxmert_qa(args.load_lxmert_qa, self.model,
                           label2ans=self.train_tuple.dataset.label2ans)
            load_lxmert_qa(args.load_lxmert_qa, self.siam_model,
                       label2ans=self.train_tuple.dataset.label2ans)
        
        # GPU options
        self.model = self.model.cuda()
        self.siam_model = self.siam_model.cuda()
        if args.multiGPU:
            self.model.lxrt_encoder.multi_gpu()
            self.siam_model.lxrt_encoder.multi_gpu()


        # Loss and Optimizer
        self.bce_loss = nn.BCEWithLogitsLoss()
        if 'bert' in args.optim:
            batch_per_epoch = len(self.train_tuple.loader)
            t_total = int(batch_per_epoch * args.epochs)
            print("BertAdam Total Iters: %d" % t_total)
            from lxrt.optimization import BertAdam
            self.optim = BertAdam(list(self.model.parameters()),
                                  lr=args.lr,
                                  warmup=0.1,
                                  t_total=t_total)
        else:
            self.optim = args.optimizer(self.model.parameters(), args.lr)
        
        # Output Directory
        self.output = args.output
        os.makedirs(self.output, exist_ok=True)
    def __init__(self,folder="/",load=True):
        # Datasets
        if load:
            self.train_tuple = get_data_tuple(
                args.train, bs=args.batch_size, shuffle=True, drop_last=True,folder=folder
            )
            if args.valid != "":
                self.valid_tuple = get_data_tuple(
                    args.valid, bs=128,
                    shuffle=False, drop_last=False, folder=folder,nops=args.nops
                )
            else:
                self.valid_tuple = None
        
            get_bias(self.train_tuple.dataset,self.valid_tuple.dataset)
        # Model
#         self.model = VQAModel(self.train_tuple.dataset.num_answers)
        label2ans = json.load(open("/data/datasets/vqa_mutant/data/vqa/mutant_l2a/mutant_label2ans.json"))
        self.model = VQAModel(len(label2ans))

        self.debias = LearnedMixin(w=0.36,hid_dim=self.model.lxrt_encoder.dim)

        # Load pre-trained weights
        if args.load_lxmert is not None:
            self.model.lxrt_encoder.load(args.load_lxmert)
        if args.load_lxmert_qa is not None:
            load_lxmert_qa(args.load_lxmert_qa, self.model,
                           label2ans=self.train_tuple.dataset.label2ans)
        
        # GPU options
        self.model = self.model.cuda()
        self.debias = self.debias.cuda()
        if args.multiGPU:
            self.model.lxrt_encoder.multi_gpu()

        # Loss and Optimizer
        self.bce_loss = nn.BCEWithLogitsLoss()
        if load :
            if 'bert' in args.optim:
                batch_per_epoch = len(self.train_tuple.loader)
                t_total = int(batch_per_epoch * args.epochs)
                print("BertAdam Total Iters: %d" % t_total)
                from lxrt.optimization import BertAdam
                self.optim = BertAdam(list(self.model.parameters()),
                                      lr=args.lr,
                                      warmup=0.1,
                                      t_total=t_total)
            else:
                self.optim = args.optimizer(self.model.parameters(), args.lr)
            # Output Directory
            self.output = args.output
            os.makedirs(self.output, exist_ok=True)
Beispiel #8
0
    def __init__(self):

        if args.train_json != "-1":
            self.train_tuple = get_tuple(
                args.train_json, bs=args.batch_size, shuffle=True, drop_last=True
            )
        else:
            self.train_tuple = None
        
        if args.valid_json != "-1":
            valid_bsize = 2048 if args.multiGPU else 512
            self.valid_tuple = get_tuple(
                args.valid_json, bs=valid_bsize,
                shuffle=False, drop_last=False
            )
        else:
            self.valid_tuple = None

        n_answers = len(json.load(open(args.ans2label)))

        self.model = ClassifierModel(n_answers, model_type=args.model_type)

        # Load pre-trained weights
        if args.load_lxmert is not None:
            self.model.lxrt_encoder.load(args.load_lxmert)

        # GPU options
        self.model = self.model.cuda()
        if args.multiGPU:
            self.model.lxrt_encoder.multi_gpu()

        # Losses and optimizer, only if training
        if args.train_json != "-1":
            self.bce_loss = nn.BCEWithLogitsLoss()
            self.mce_loss = nn.CrossEntropyLoss(ignore_index=-1)
            if 'bert' in args.optim:
                batch_per_epoch = len(self.train_tuple.loader)
                t_total = int(batch_per_epoch * args.epochs)
                print("Total Iters: %d" % t_total)
                from lxrt.optimization import BertAdam
                self.optim = BertAdam(list(self.model.parameters()),
                                      lr=args.lr,
                                      warmup=0.1,
                                      t_total=t_total)
            else:
                self.optim = args.optimizer(list(self.model.parameters()), args.lr)

        self.output = args.output_dir
        self.best_name = None
        
        os.makedirs(self.output, exist_ok=True)
Beispiel #9
0
    def prepare_model(self):
        # GPU options
        self.model = self.model.cuda()
        if args.multiGPU:
            self.model.lxrt_encoder.multi_gpu()

        # Loss and Optimizer
        self.bce_loss = nn.BCEWithLogitsLoss()
        if 'bert' in args.optim:
            batch_per_epoch = len(self.train_tuple.loader)
            t_total = int(batch_per_epoch * args.epochs)
            print("BertAdam Total Iters: %d" % t_total)
            from lxrt.optimization import BertAdam
            self.optim = BertAdam(list(self.model.parameters()),
                                  lr=args.lr,
                                  warmup=0.1,
                                  t_total=t_total)
        else:
            self.optim = args.optimizer(self.model.parameters(), args.lr)
Beispiel #10
0
    def __init__(self):
        self.train_tuple = get_tuple(args.train,
                                     bs=args.batch_size,
                                     shuffle=True,
                                     drop_last=True)
        if args.valid != "":
            valid_bsize = 2048 if args.multiGPU else 512
            self.valid_tuple = get_tuple(args.valid,
                                         bs=valid_bsize,
                                         shuffle=False,
                                         drop_last=False)
        else:
            self.valid_tuple = None

        self.model = NLVR2Model()

        # Load pre-trained weights
        if args.load_lxmert is not None:
            self.model.lxrt_encoder.load(args.load_lxmert)

        # GPU options
        if args.multiGPU:
            self.model.lxrt_encoder.multi_gpu()
        self.model = self.model.cuda()

        # Losses and optimizer
        self.mce_loss = nn.CrossEntropyLoss(ignore_index=-1)
        if "bert" in args.optim:
            batch_per_epoch = len(self.train_tuple.loader)
            t_total = int(batch_per_epoch * args.epochs)
            print("Total Iters: %d" % t_total)
            from lxrt.optimization import BertAdam

            self.optim = BertAdam(list(self.model.parameters()),
                                  lr=args.lr,
                                  warmup=0.1,
                                  t_total=t_total)
        else:
            self.optim = args.optimizer(list(self.model.parameters()), args.lr)

        self.output = args.output
        os.makedirs(self.output, exist_ok=True)
Beispiel #11
0
    def __init__(self, folder="/", load=True):
        # Datasets
        if load:
            self.train_tuple = get_data_tuple(args.train,
                                              bs=args.batch_size,
                                              shuffle=True,
                                              drop_last=True,
                                              folder=folder)
            if args.valid != "":
                self.valid_tuple = get_data_tuple(args.valid,
                                                  bs=512,
                                                  shuffle=False,
                                                  drop_last=False,
                                                  folder=folder)
            else:
                self.valid_tuple = None

        # Model
#         self.model = VQAModel(self.train_tuple.dataset.num_answers)
        self.model = VQAModel(len(self.train_tuple.dataset.label2ans),
                              fn_type=args.fn_type)

        # Load pre-trained weights
        if args.load_lxmert is not None:
            self.model.lxrt_encoder.load(args.load_lxmert)
        if args.load_lxmert_qa is not None:
            load_lxmert_qa(args.load_lxmert_qa,
                           self.model,
                           label2ans=self.train_tuple.dataset.label2ans)

        # GPU options
        self.model = self.model.cuda()
        if args.multiGPU:
            self.model.lxrt_encoder.multi_gpu()

        # Load IndexList of Answer to Type Map
        self.indexlist = json.load(
            open(
                "/data/datasets/vqa_mutant/data/vqa/mutant_l2a/mutant_merge_indexlist.json"
            ))

        print("Length of Masks", len(self.indexlist), flush=True)

        indextensor = torch.cuda.LongTensor(self.indexlist)
        self.mask0 = torch.eq(indextensor, 0).float()
        self.mask1 = torch.eq(indextensor, 1).float()
        self.mask2 = torch.eq(indextensor, 2).float()
        self.mask3 = torch.eq(indextensor, 3).float()

        self.mask_cache = {}

        # Loss and Optimizer

        self.logsoftmax = nn.LogSoftmax()
        self.sigmoid = nn.Sigmoid()
        self.softmax = nn.Softmax()

        self.bceloss = nn.BCELoss()
        self.nllloss = nn.NLLLoss()

        self.bce_loss = nn.BCEWithLogitsLoss()
        self.ce_loss = nn.CrossEntropyLoss()

        if load:
            if 'bert' in args.optim:
                batch_per_epoch = len(self.train_tuple.loader)
                t_total = int(batch_per_epoch * args.epochs)
                print("BertAdam Total Iters: %d" % t_total)
                from lxrt.optimization import BertAdam
                self.optim = BertAdam(list(self.model.parameters()),
                                      lr=args.lr,
                                      warmup=0.1,
                                      t_total=t_total)
            else:
                self.optim = args.optimizer(self.model.parameters(), args.lr)
            # Output Directory
            self.output = args.output
            os.makedirs(self.output, exist_ok=True)
Beispiel #12
0
    def train(self, train_tuple: DataTuple, eval_tuple: DataTuple):
        train_ld = train_tuple.loader

        # Optimizer
        from lxrt.optimization import BertAdam
        batch_per_epoch = len(train_ld)
        t_total = int(batch_per_epoch * args.epochs)
        warmup_ratio = 0.05
        warmup_iters = int(t_total * warmup_ratio)
        print("Batch per epoch: %d" % batch_per_epoch)
        print("Total Iters: %d" % t_total)
        print("Warm up Iters: %d" % warmup_iters)
        optim = BertAdam(self.model.parameters(),
                         lr=args.lr,
                         warmup=warmup_ratio,
                         t_total=t_total)
        start_epoch = 0

        if args.fp16:
            try:
                from apex import amp
            except ImportError:
                raise ImportError(
                    "Please install apex from https://www.github.com/nvidia/apex to use fp16 training."
                )
            self.model, optim = amp.initialize(self.model,
                                               optim,
                                               opt_level='O1')

        # GPU Options
        if args.multiGPU:
            self.model = nn.DataParallel(self.model)

        if args.start_from > 0 and args.pretraining_index == 0:
            start_path = os.path.join(
                args.output,
                "Epoch%s_LXRT.pth" % format(int(args.start_from), '02'))
            print('start training from {0}'.format(start_path))
            state = torch.load(start_path)
            self.model.load_state_dict(state['state_dict'])
            optim.load_state_dict(state['optimizer'], strict=False)
            start_epoch = args.start_from
            del state
            torch.cuda.empty_cache()
        elif args.start_from > 0 and args.pretraining_index == 1:
            start_path = os.path.join(
                args.output,
                "Epoch%s_LXRT.pth" % format(int(args.start_from), '02'))
            print('start training from {0}'.format(start_path))
            state = torch.load(start_path)
            self.model.load_state_dict(state['state_dict'], strict=False)
            del state
            torch.cuda.empty_cache()

        # Train
        best_eval_loss = 9595.
        for epoch in range(start_epoch, args.epochs):
            # Train
            self.model.train()
            total_loss = 0.
            total_losses = 0.
            uid2ans = {}
            for batch in tqdm(train_ld, total=len(train_ld)):
                loss, losses, logit = self.train_batch(optim, batch)
                total_loss += loss
                total_losses += losses

                if args.task_qa:
                    score, label = logit.max(1)
                    for datum, l in zip(batch, label.cpu().numpy()):
                        uid = datum.uid
                        ans = train_tuple.dataset.answer_table.id2ans(l)
                        uid2ans[uid] = ans

            print("The training loss for Epoch %d is %0.4f" %
                  (epoch, total_loss / batch_per_epoch))
            log_str = "\nThe training loss for Epoch %d is %0.4f" % (
                epoch, total_loss / batch_per_epoch)
            losses_str = "\nThe losses are "
            log_str += "\nThe losses are "
            for name, loss in zip(LOSSES_NAME, total_losses):
                losses_str += "%s: %0.4f " % (name, loss / batch_per_epoch)
                log_str += "\n %s: %0.4f " % (name, loss / batch_per_epoch)
            print(losses_str)
            with open(self.output + "/log.log", 'a') as f:
                f.write(log_str)
                f.flush()
            if args.task_qa:
                train_tuple.evaluator.evaluate(uid2ans, pprint=True)

            # Eval
            avg_eval_loss = self.evaluate_epoch(eval_tuple, iters=-1)

            state = {
                'state_dict': self.model.state_dict(),
                'optimizer': optim.state_dict(),
            }

            # Save
            if avg_eval_loss < best_eval_loss:
                best_eval_loss = avg_eval_loss
                self.save("BEST_EVAL_LOSS", state)
            if args.pretraining_index == 0:
                self.save("Epoch%02d" % (epoch + 1), state)
            elif args.pretraining_index == 1:
                self.save("Epoch%02d" % (epoch + 1 + args.start_from), state)
    def __init__(self):
        self.train_tuple = get_tuple(args.train,
                                     bs=args.batch_size,
                                     shuffle=True,
                                     drop_last=True)
        if args.valid != "":
            valid_bsize = 2048 if args.multiGPU else 512
            self.valid_tuple = get_tuple(args.valid,
                                         bs=valid_bsize,
                                         shuffle=False,
                                         drop_last=False)
        else:
            self.valid_tuple = None

        self.model = GQAModel(self.train_tuple.dataset.num_answers)

        # Load pre-trained weights
        if args.load_lxmert is not None:
            self.model.lxrt_encoder.load(args.load_lxmert)
        if args.load_lxmert_qa is not None:
            self.new_ans_label = load_lxmert_qa(
                args.load_lxmert_qa,
                self.model,
                label2ans=self.train_tuple.dataset.label2ans)

        # GPU options
        self.model = self.model.cuda()
        if args.multiGPU:
            self.model.lxrt_encoder.multi_gpu()

        # Losses and optimizer
        self.bce_loss = nn.BCEWithLogitsLoss()
        self.mce_loss = nn.CrossEntropyLoss(ignore_index=-1)
        # self.KL_loss = nn.KLDivLoss(reduction='none')
        if 'bert' in args.optim:
            batch_per_epoch = len(self.train_tuple.loader)
            t_total = int(batch_per_epoch * args.epochs)
            print("Total Iters: %d" % t_total)
            from lxrt.optimization import BertAdam
            self.optim = BertAdam(list(self.model.parameters()),
                                  lr=args.lr,
                                  warmup=0.1,
                                  t_total=t_total)
        else:
            self.optim = args.optimizer(list(self.model.parameters()), args.lr)

        self.output = args.output
        os.makedirs(self.output, exist_ok=True)

        # Tensorboard
        self.boards_dir = os.path.join('boards', self.output)
        if not os.path.exists(self.boards_dir):
            os.makedirs(self.boards_dir)
        self.writerTbrd = SummaryWriter(self.boards_dir)

        # get Glove projection for all answers
        if args.answer_loss == 'glove':
            path_glove = './data/GloVe/GloVeDict.pkl'
            with open(path_glove, 'rb') as f:
                glove_dic = pickle.load(f)
            glove_dim = glove_dic['the'].shape[-1]
            print("Loading Glove%d answer's vector" % glove_dim)
            self.labelans2glove = []
            self.valid_ans_embed = [1] * len(
                self.train_tuple.dataset.label2ans)
            for label, ans in enumerate(self.train_tuple.dataset.label2ans):
                ans = ans.split(' ')
                glove_ans = []
                for w in ans:
                    #print(w)
                    try:
                        glove_ans.append(glove_dic[w])
                    except KeyError:
                        #print('Full ans: %s' % ans)
                        #input(' ')
                        self.valid_ans_embed[label] = 0
                        glove_ans.append(np.zeros(glove_dim))
                #print(glove_ans)
                glove_ans = torch.tensor(glove_ans).mean(-2)
                self.labelans2glove.append(torch.tensor(glove_ans))
            #print(self.labelans2glove)
            print(
                'Ratio of valid ans embedding: %f' %
                (float(sum(self.valid_ans_embed)) / len(self.valid_ans_embed)))
            self.labelans2glove = torch.stack(
                self.labelans2glove).float().cuda()
            self.cosineSim = torch.nn.CosineSimilarity(dim=1, eps=1e-08)
    def finetune(self, train_tuple, eval_tuple):
        # log
        output_1 = os.path.join(self.output, 'finetune_1')
        os.makedirs(output_1, exist_ok=True)
        output_2 = os.path.join(self.output, 'finetune_2')
        os.makedirs(output_2, exist_ok=True)

        # Tensorboard
        boards_dir_1 = os.path.join(self.boards_dir, 'finetune_1')
        if not os.path.exists(boards_dir_1):
            os.makedirs(boards_dir_1)
        boards_dir_2 = os.path.join(self.boards_dir, 'finetune_2')
        if not os.path.exists(boards_dir_2):
            os.makedirs(boards_dir_2)

        # Params
        lr_1 = args.lr
        lr_2 = args.lr / 10
        epochs_1 = 4  #int(args.epochs / 3)
        epochs_2 = args.epochs - epochs_1

        # Step 0: evaluate pretraining
        if self.valid_tuple is not None:  # Do Validation
            valid_score = self.evaluate(eval_tuple)
            print("Before finetune: Valid %0.2f\n" % (valid_score * 100.))

        # Step 0.1: finetune new ans only
        # new_ans_params = []
        # for name, p in self.model.named_parameters():
        #     if "logit_fc.3" in name:
        #         for idx in range(p.size(0)):
        #             if idx in self.new_ans_label:
        #                 new_ans_params.append({'params': p[idx]})

        # args.epochs = epochs_0
        # from lxrt.optimization import BertAdam
        # self.optim = BertAdam(new_ans_params,
        #                       lr=lr_1,
        #                       warmup=0.0,
        #                       t_total=-1)
        # print('### Start finetuning new ans...')
        # self.train(train_tuple, eval_tuple)

        # First step, only updates answer head

        #self.optim = torch.optim.Adamax(list(self.model.parameters()), lr_1)
        #self.optim = torch.optim.SGD(list(self.model.parameters()), lr_1)
        args.epochs = epochs_1
        batch_per_epoch = len(self.train_tuple.loader)
        t_total = int(batch_per_epoch * epochs_1)
        print("Total Iters: %d" % t_total)
        from lxrt.optimization import BertAdam
        self.optim = BertAdam(
            list(self.model.parameters()),
            lr=lr_1,
            warmup=0.0,  #!0.034
            t_total=-1)
        # loaded_optim = torch.load("%s_LXRT.pth" % args.load_lxmert_qa)['optimizer']
        # self.optim.load_state_dict(loaded_optim)
        # for group in loaded_optim.param_groups:
        #     for p in group['params']:
        #         if p in loaded_optim['state']:
        #             self.optim.state[p] = loaded_optim.state[p]

        self.writerTbrd = SummaryWriter(boards_dir_1)
        self.output = output_1

        for name, p in self.model.named_parameters():
            if "logit_fc" in name:
                p.requires_grad = True
            else:
                p.requires_grad = False

        print('### Start finetuning step 1...')
        self.train(train_tuple, eval_tuple)

        # Second step, finetune all
        for name, p in self.model.named_parameters():
            p.requires_grad = True

        if 'bert' in args.optim:
            batch_per_epoch = len(self.train_tuple.loader)
            t_total = int(batch_per_epoch * epochs_2)
            print("Total Iters: %d" % t_total)
            from lxrt.optimization import BertAdam
            self.optim = BertAdam(list(self.model.parameters()),
                                  lr=lr_2,
                                  warmup=0.1,
                                  t_total=t_total,
                                  lr_min=1e-7)
        else:
            self.optim = args.optimizer(list(self.model.parameters()), lr_2)
        args.epochs = epochs_2
        self.writerTbrd = SummaryWriter(boards_dir_2)
        self.output = output_2

        print('### Start finetuning step 2...')
        self.train(train_tuple, eval_tuple)
    def __init__(self, attention=False):
        # Datasets
        print("Fetching data")
        self.train_tuple = get_data_tuple(args.train,
                                          bs=args.batch_size,
                                          shuffle=True,
                                          drop_last=True,
                                          dataset_name="test")
        print("Got data")
        print("fetching val data")
        if args.valid != "":
            self.valid_tuple = get_data_tuple(args.valid,
                                              bs=args.batch_size,
                                              shuffle=False,
                                              drop_last=False,
                                              dataset_name="test")
            print("got data")
        else:
            self.valid_tuple = None
        print("Got data")

        # Model
        print("Making model")
        self.model = VQAModel(self.train_tuple.dataset.num_answers, attention)
        print("Ready model")
        # Print model info:
        print("Num of answers:")
        print(self.train_tuple.dataset.num_answers)
        # print("Model info:")
        # print(self.model)

        # Load pre-trained weights
        if args.load_lxmert is not None:
            self.model.lxrt_encoder.load(args.load_lxmert)
        if args.load_lxmert_qa is not None:
            load_lxmert_qa(args.load_lxmert_qa,
                           self.model,
                           label2ans=self.train_tuple.dataset.label2ans)

        # GPU options
        self.model = self.model.cuda()
        if args.multiGPU:
            self.model.lxrt_encoder.multi_gpu()

        # Loss and Optimizer
        self.bce_loss = nn.BCEWithLogitsLoss()
        if 'bert' in args.optim:
            batch_per_epoch = len(self.train_tuple.loader)
            t_total = int(batch_per_epoch * args.epochs)
            print("BertAdam Total Iters: %d" % t_total)
            from lxrt.optimization import BertAdam
            self.optim = BertAdam(list(self.model.parameters()),
                                  lr=args.lr,
                                  warmup=0.1,
                                  t_total=t_total)
        else:
            self.optim = args.optimizer(self.model.parameters(), args.lr)

        # Output Directory
        self.output = args.output
        os.makedirs(self.output, exist_ok=True)
Beispiel #16
0
    def train(self):  #, eval_tuple: DataTuple):
        # train_ld = train_tuple.loader

        # Optimizer
        from lxrt.optimization import BertAdam
        batch_per_epoch = len(train_dset) // args.batch_size + bool(
            len(train_dset) % args.batch_size)
        t_total = int(batch_per_epoch * args.epochs)
        warmup_ratio = 0.05
        warmup_iters = int(t_total * warmup_ratio)
        print("Batch per epoch: %d" % batch_per_epoch)
        print("Total Iters: %d" % t_total)
        print("Warm up Iters: %d" % warmup_iters)
        optim = BertAdam(self.model.parameters(),
                         lr=args.lr,
                         warmup=warmup_ratio,
                         t_total=t_total)

        # Train
        best_eval_loss = 9595.
        for epoch in range(args.epochs):
            # Train
            self.model.train()
            total_loss = 0.
            total_losses = 0.
            uid2ans = {}
            # for training subset (shuffle order across epochs)
            for i in tqdm(range(NUM_MINISETS), total=NUM_MINISETS):
                # create dataloader train_ld for this subset
                dtuple = get_tuple(args.train + '_%d' % i,
                                   args.batch_size,
                                   shuffle=True,
                                   drop_last=True)
                dl = dtuple.loader
                for batch in tqdm(dl, total=len(dl)):
                    loss, losses, logit = self.train_batch(
                        optim, batch, dtuple.torchdset)
                    total_loss += loss
                    total_losses += losses

                    if args.task_qa:
                        score, label = logit.max(1)
                        for datum, l in zip(batch, label.cpu().numpy()):
                            uid = datum.uid
                            ans = dtuple.dataset.answer_table.id2ans(l)
                            uid2ans[uid] = ans

            print("The training loss for Epoch %d is %0.4f" %
                  (epoch, total_loss / batch_per_epoch))
            losses_str = "The losses are "
            for name, loss in zip(LOSSES_NAME, total_losses):
                losses_str += "%s: %0.4f " % (name, loss / batch_per_epoch)
            print(losses_str)
            if args.task_qa:
                dtuple.evaluator.evaluate(uid2ans, pprint=True)

            # Eval
            dtuple = get_tuple(args.valid,
                               valid_batch_size,
                               shuffle=False,
                               drop_last=False)  #, topk=5000)
            avg_eval_loss = self.evaluate_epoch(dtuple, iters=-1)

            # Save
            if avg_eval_loss < best_eval_loss:
                best_eval_loss = avg_eval_loss
                self.save("BEST_EVAL_LOSS")
            self.save("Epoch%02d" % (epoch + 1))
Beispiel #17
0
    def __init__(self, folder="/", load=True):
        # Datasets
        if load:
            self.train_tuple = get_data_tuple(args.train,
                                              bs=args.batch_size,
                                              shuffle=True,
                                              drop_last=True,
                                              folder=folder)
            if args.valid != "":
                self.valid_tuple = get_data_tuple(args.valid,
                                                  bs=128,
                                                  shuffle=False,
                                                  drop_last=False,
                                                  folder=folder,
                                                  nops=args.nops)
            else:
                self.valid_tuple = None

        # Model
        self.model = VQAModel(self.train_tuple.dataset.num_answers)
        #         is_cp=False
        #         if "vqacpv2" in folder:
        #             is_cp=True
        #         if not is_cp:
        #             self.model = VQAModel(3129)
        #         else:
        #             self.model = VQAModel(16039)

        # Load pre-trained weights
        if args.load_lxmert is not None:
            self.model.lxrt_encoder.load(args.load_lxmert)
        if args.load_lxmert_qa is not None:
            load_lxmert_qa(args.load_lxmert_qa,
                           self.model,
                           label2ans=self.train_tuple.dataset.label2ans)

        # GPU options
        self.model = self.model.cuda()
        if args.multiGPU:
            self.model.lxrt_encoder.multi_gpu()

        ans_embed = np.load(
            "/data/datasets/vqa_mutant/data/vqa/mutant_l2a/answer_embs.npy"
        ) + 1e-8
        ans_embed = torch.tensor(ans_embed).cuda()
        self.ans_embed = torch.nn.functional.normalize(ans_embed, dim=1)
        self.embed_cache = {}

        # Loss and Optimizer
        self.bce_loss = nn.BCEWithLogitsLoss()
        if load:
            if 'bert' in args.optim:
                batch_per_epoch = len(self.train_tuple.loader)
                t_total = int(batch_per_epoch * args.epochs)
                print("BertAdam Total Iters: %d" % t_total)
                from lxrt.optimization import BertAdam
                self.optim = BertAdam(list(self.model.parameters()),
                                      lr=args.lr,
                                      warmup=0.1,
                                      t_total=t_total)
            else:
                self.optim = args.optimizer(self.model.parameters(), args.lr)
            # Output Directory
            self.output = args.output
            os.makedirs(self.output, exist_ok=True)

        self.cos = nn.CosineSimilarity()
Beispiel #18
0
    def train(self, train_tuple: DataTuple, eval_tuple: DataTuple):
        train_ld = train_tuple.loader

        # Optimizer
        from lxrt.optimization import BertAdam
        batch_per_epoch = len(train_ld)
        t_total = int(batch_per_epoch * args.epochs)
        warmup_ratio = args.get("warmup_ratio", 0.05)

        print("Total Iters: %d" % t_total)
        if args.get("t_total", None):
            t_total = args.t_total
            print("!! Changing to specified t_toal in args: {}".format(t_total))
        self.t_total = t_total
        warmup_iters = int(t_total * warmup_ratio)

        print("Batch per epoch: %d" % batch_per_epoch)
        print("Warm up Iters: %d" % warmup_iters)
        self.optim = BertAdam(self.model.parameters(), lr=args.lr, warmup=warmup_ratio, t_total=t_total)

        if args.load is not None:
            self.load(args.load, t_total = t_total)

        gradient_accumulation_steps = args.get("gradient_accumulation_steps", 1)
        # Train
        best_eval_loss = 9595.
        report_every = args.get("report_every", 100)

        custom_train_meter = TrainingMeter()
        
        for epoch in range(args.epochs):
            # Train
            self.model.train()
            total_loss = 0.
            total_losses = 0.
            uid2ans = {}

            for batch_id, batch in enumerate(tqdm(train_ld, total=len(train_ld))):
                if args.get("skip_training", False):
                    break

                loss, losses, logit, losses_dict = self.train_batch(self.optim, batch)
                total_loss += loss
                try:
                    total_losses += losses
                except:
                    pass

                if args.task_qa and batch[0].sent is not None:
                    assert(0) # Not used in our experiment

                    score, label = logit.max(1)
                    for datum, l in zip(batch, label.cpu().numpy()):
                        uid = datum.uid
                        ans = train_tuple.dataset.answer_table.id2ans(l)
                        uid2ans[uid] = ans
                
                for key, value in losses_dict.items():
                    losses_dict[key] = value.mean().item()  # make the losses scalar
                
                if "Masked LM" in losses_dict and losses_dict["Masked LM"] == 0:
                    del losses_dict["Masked LM"]

                custom_train_meter.update(losses_dict)

                if batch_id % report_every == 0 and batch_id > 0:
                    print("Folder: {} \n Epoch {} Iter: {}/{}".format(args.output, epoch, batch_id, len(train_ld)))
                    #print(pd.DataFrame(train_results[-report_every:]).mean())
                    custom_train_meter.report()
                    custom_train_meter.clean()
                    print()
                
                if args.get("save_step", -1) != -1 and self.global_step != 0 and (self.global_step // gradient_accumulation_steps) % args.save_step == 0:
                    self.save("Step{}".format(self.global_step))
                self.global_step += 1
            
            print("The training loss for Epoch %d is %0.4f" % (epoch, total_loss / batch_per_epoch))

            if args.task_qa:
                train_tuple.evaluator.evaluate(uid2ans, pprint=True)

            # Eval
            avg_eval_loss = self.evaluate_epoch(eval_tuple, iters=-1)

            if args.get("eval_on_train", False):
                print("On train set")
                self.evaluate_epoch(train_tuple, iters=-1)


            if avg_eval_loss < best_eval_loss:
                best_eval_loss = avg_eval_loss
                self.save("BEST_EVAL_LOSS")
            self.save("Epoch%02d" % (epoch+1))