Example #1
0
class CLS_Net(nn.Module):
    def __init__(self, numOfLabels, device='cpu', pretrain = True):
        super().__init__()
        config = RobertaConfig.from_pretrained("vinai/phobert-base")
        if pretrain:
            self.roberta = RobertaModel.from_pretrained("vinai/phobert-base")
        else:
            self.roberta = RobertaModel(config)

        self.fc1 = nn.Linear(768, 128)
        self.fc2 = nn.Linear(128, numOfLabels)
        self.device = device

    def forward(self, x):
        if self.device == "cuda":
            x = x.to(self.device, non_blocking=True)
        else:
            x = x.to(self.device)
        
        if self.training:
            self.roberta.train()
            enc, _ = self.roberta(x)
        else:
            self.roberta.eval()
            with torch.no_grad():
                enc, _ = self.roberta(x)
        
        # CLS
        enc = enc[:, 0, :]
        fc1 = self.fc1(enc)
        fc2  = self.fc2(fc1)
        return fc2
Example #2
0
class Net(nn.Module):
    def __init__(self, device='cpu', pretrain = True):
        super().__init__()
        config = RobertaConfig.from_pretrained("vinai/phobert-base")
        if pretrain:
            self.roberta = RobertaModel.from_pretrained("vinai/phobert-base")
        else:
            self.roberta = RobertaModel(config)

        self.fc = nn.Linear(768, 300)
        self.device = device
       

    def forward(self, x):
        if self.device == "cuda":
            x = x.to(self.device, non_blocking=True)
        else:
            x = x.to(self.device)
        
        if self.training:
            self.roberta.train()
            enc = self.roberta(x)
        else:
            self.roberta.eval()
            with torch.no_grad():
                enc = self.roberta(x)
        
        # CLS
        enc = enc[0][:, 0, :]

        result  = self.fc(enc)
        return result
Example #3
0
def main():
    if not os.path.exists("./checkpoints"):
        os.mkdir("checkpoints")

    parser = argparse.ArgumentParser()
    def str2bool(v):
        if v.lower() in ('yes', 'true', 't', 'y', '1'):
            return True
        elif v.lower() in ('no', 'false', 'f', 'n', '0'):
            return False
        else:
            raise argparse.ArgumentTypeError('Unsupported value encountered.')
    parser.add_argument(
        "--eval_mode",
        default=False,
        type=str2bool,
        required=False,
        help="Test or train the model",
    )
    parser.add_argument(
        "--baseline",
        default=False,
        type=str2bool,
        required=False,
        help="use the baseline or the transformers model",
    )
    parser.add_argument(
        "--load_weights",
        default=True,
        type=str2bool,
        required=False,
        help="Load the pretrained weights or randomly initialize the model",
    )
    parser.add_argument(
        "--iter_per",
        default=4,
        type=int,
        required=False,
        help="cumulative gradient iteration cycle",
    )

    args = parser.parse_args()
    directory_identifier = args.__str__().replace(" ", "") \
        .replace("iter_per=1,", "") \
        .replace("iter_per=2,", "") \
        .replace("iter_per=4,", "") \
        .replace("iter_per=8,", "") \
        .replace("iter_per=16,", "") \
        .replace("iter_per=32,", "") \
        .replace("iter_per=64,", "") \
        .replace("iter_per=128,", "") \
        .replace("eval_mode=True", "eval_mode=False")


    tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
    suffix = "roberta"
    if args.baseline:
        suffix = "naive"
        tokenizer = NaiveTokenizer()
    try:
        dataset, test_dataset, tokenizer = torch.load(open("checkpoints/dataset-%s.pyc" % suffix, "rb"))
        dev_dataset, _, _ = torch.load(open("checkpoints/dataset-%s.pyc" % suffix, "rb"))
    except:
        dataset = DisasterTweetsClassificationDataset(tokenizer, "data/train.csv", "train")
        test_dataset = DisasterTweetsClassificationDataset(tokenizer, "data/test.csv", "test")
        torch.save((dataset, test_dataset, tokenizer), open("checkpoints/dataset-%s.pyc" % suffix, "wb"))
        dev_dataset, _, _  = torch.load(open("checkpoints/dataset-%s.pyc" % suffix, "rb"))
    dev_dataset.eval()

    if args.baseline:
        encoder = nn.LSTM(256, 256, 1, batch_first=True, )
        model = NaiveLSTMBaselineClassifier()
    else:
        if args.load_weights:
            encoder = RobertaModel.from_pretrained("roberta-base")
            model = RoBERTaClassifierHead(encoder.config)
        else:
            config = RobertaConfig.from_pretrained("roberta-base")
            encoder = RobertaModel(config=config)
            model = RoBERTaClassifierHead(config)
    encoder.cuda()
    model.cuda()
    dataloader = datautils.DataLoader(
        dataset, batch_size=64 // args.iter_per, shuffle=True,
        num_workers=16, drop_last=False, pin_memory=True
    )
    dev_dataloader = datautils.DataLoader(
        dev_dataset, batch_size=64 // args.iter_per, shuffle=True,
        num_workers=16, drop_last=False, pin_memory=True
    )
    test_dataloader = datautils.DataLoader(
        test_dataset, batch_size=64 // args.iter_per, shuffle=False,
        num_workers=16, drop_last=False, pin_memory=True
    )

    if args.eval_mode:
        correct = 0
        all = 0
        encoder_, model_ = torch.load("checkpoints/%s" % directory_identifier)
        encoder.load_state_dict(encoder_)
        model.load_state_dict(model_)
        encoder.eval()
        with torch.no_grad():
            for ids, mask, label in dev_dataloader:
                ids, mask, label = ids.cuda(), mask.cuda(), label.cuda()
                prediction = model(encoder, ids, mask).argmax(dim=-1)
                correct += (prediction == label).to(torch.long).sum().item()
                all += mask.shape[0]
        print("dev acc:", correct / all)
        opt = AdamW(lr=1e-6, weight_decay=0.05, params=list(encoder.parameters()) + list(model.parameters()))
        encoder.train()
        iter_num = 0
        LOSS = []
        for _ in range(5):
            iterator = tqdm.tqdm(dev_dataloader)
            for ids, mask, label in iterator:
                ids, mask, label = ids.cuda(), mask.cuda(), label.cuda()
                log_prediction = model(encoder, ids, mask)
                loss = -log_prediction[torch.arange(ids.size(0)).cuda(), label].mean()
                if iter_num % args.iter_per == 0:
                    opt.zero_grad()
                (loss / args.iter_per).backward()
                LOSS.append(loss.item())
                if len(LOSS) > 10:
                    iterator.write("loss=%f" % np.mean(LOSS))
                    LOSS = []
                if iter_num % args.iter_per == args.iter_per - 1:
                    opt.step()
                iter_num += 1
        encoder.eval()
        with torch.no_grad():
            for ids, mask, label in dev_dataloader:
                ids, mask, label = ids.cuda(), mask.cuda(), label.cuda()
                prediction = model(encoder, ids, mask).argmax(dim=-1)
                correct += (prediction == label).to(torch.long).sum().item()
                all += mask.shape[0]
        print("dev acc rectified:", correct / all)

        with torch.no_grad():
            with open("submission.csv", "w") as fout:
                print("id,target", file=fout)
                for id, ids, mask in test_dataloader:
                    ids, mask = ids.cuda(), mask.cuda()
                    prediction = model(encoder, ids, mask).argmax(dim=-1)
                    for i in range(id.size(0)):
                        print("%d,%d" % (id[i], prediction[i]), file=fout)
        exit()
    if args.baseline:
        lr = 5e-4
    elif args.load_weights:
        lr = 1e-6
    else:
        lr = 5e-6
    opt = AdamW(lr=lr, weight_decay=0.10 if args.baseline else 0.05, params=list(encoder.parameters())+list(model.parameters()))
    flog = open("checkpoints/log-%s.txt" % directory_identifier, "w")
    flog.close()
    flogeval = open("checkpoints/evallog-%s.txt" % directory_identifier, "w")
    flogeval.close()
    iter_num = 0
    for epoch_idx in range(5 if args.baseline else 10):
        flog = open("checkpoints/log-%s.txt" % directory_identifier, "a")
        flogeval = open("checkpoints/evallog-%s.txt" % directory_identifier, "a")
        LOSS = []
        encoder.train()
        iterator = tqdm.tqdm(dataloader)
        for ids, mask, label in iterator:
            ids, mask, label = ids.cuda(), mask.cuda(), label.cuda()
            log_prediction = model(encoder, ids, mask)
            loss = -log_prediction[torch.arange(ids.size(0)).cuda(), label].mean()
            if iter_num % args.iter_per == 0:
                opt.zero_grad()
            (loss / args.iter_per).backward()
            LOSS.append(loss.item())
            if len(LOSS) > 10:
                iterator.write("loss=%f" % np.mean(LOSS))
                print("%f" % np.mean(LOSS), file=flog)
                LOSS = []
            if iter_num % args.iter_per == args.iter_per - 1:
                opt.step()
            iter_num += 1
        EVALLOSS = []
        encoder.eval()
        iterator = tqdm.tqdm(dev_dataloader)
        with torch.no_grad():
            for ids, mask, label in iterator:
                ids, mask, label = ids.cuda(), mask.cuda(), label.cuda()
                log_prediction = model(encoder, ids, mask)
                loss = -log_prediction[torch.arange(ids.size(0)).cuda(), label].mean()
                EVALLOSS.append(loss.item())
        iterator.write("evalloss-%d=%f" % (epoch_idx, np.mean(EVALLOSS)))
        print("%f" % np.mean(EVALLOSS), file=flogeval)

        flog.close()
        flogeval.close()
        torch.save((encoder.state_dict(), model.state_dict()), "checkpoints/%s" % directory_identifier)