Esempio n. 1
0
    tokenizer = BertTokenizer.from_pretrained(model_config['path'],
                                              do_lower_case=True)
    cv_loaders = get_data_loaders_cv(
        rv_path='../data/TRAIN/Train_laptop_reviews.csv',
        lb_path='../data/TRAIN/Train_laptop_labels.csv',
        tokenizer=tokenizer,
        batch_size=args.bs,
        type='laptop',
        folds=FOLDS)

    BEST_THRESHS = [0.1] * FOLDS
    BEST_F1 = [0] * FOLDS
    for cv_idx, (train_loader, val_loader) in enumerate(cv_loaders):
        model = OpinioNet.from_pretrained(model_config['path'],
                                          version=model_config['version'],
                                          focal=model_config['focal'])
        model.load_state_dict(
            torch.load('../models/pretrained_' + model_config['name']))
        model.cuda()
        optimizer = Adam(model.parameters(), lr=model_config['lr'])
        scheduler = GradualWarmupScheduler(optimizer,
                                           total_epoch=10 * len(train_loader))
        best_val_f1 = 0
        best_val_loss = float('inf')

        for e in range(EP):

            print('Epoch [%d/%d] train:' % (e, EP))
            train_loss, train_f1, train_pr, train_rc = train_epoch(
                model, train_loader, optimizer, scheduler, type='laptop')
Esempio n. 2
0

if __name__ == '__main__':
    EP = 100
    SAVING_DIR = '../models/'
    tokenizer = BertTokenizer.from_pretrained(
        '/home/zydq/.torch/models/bert/chinese-bert_chinese_wwm_pytorch',
        do_lower_case=True)
    train_loader, val_loader = get_data_loaders(
        rv_path='../data/TRAIN/Train_reviews.csv',
        lb_path='../data/TRAIN/Train_labels.csv',
        tokenizer=tokenizer,
        batch_size=12,
        val_split=0.15)

    model = OpinioNet.from_pretrained(
        '/home/zydq/.torch/models/bert/chinese-bert_chinese_wwm_pytorch')
    model.cuda()
    optimizer = Adam(model.parameters(), lr=5e-6)
    scheduler = GradualWarmupScheduler(optimizer, total_epoch=2)
    best_val_f1 = 0
    best_val_loss = float('inf')
    for e in range(EP):

        print('Epoch [%d/%d] train:' % (e, EP))
        train_loss, train_f1, train_pr, train_rc = train_epoch(
            model, train_loader, optimizer, scheduler)
        print("loss %.5f, f1 %.5f, pr %.5f, rc %.5f" %
              (train_loss, train_f1, train_pr, train_rc))

        print('Epoch [%d/%d] eval:' % (e, EP))
        val_loss, val_f1, val_pr, val_rc = eval_epoch(model, val_loader)
Esempio n. 3
0
	return total_loss, total_f1, total_pr, total_rc


if __name__ == '__main__':
	EP = 100
	SAVING_DIR = '../models/'
	tokenizer = BertTokenizer.from_pretrained('/home/zydq/.torch/models/bert/chinese_roberta_wwm_ext_L-12_H-768_A-12',
											  do_lower_case=True)
	# tokenizer = BertTokenizer.from_pretrained('/home/zydq/.tf/bert/chinese_roberta_wwm_ext_L-12_H-768_A-12',
	# 										  do_lower_case=True)

	makeup_train_loader, makeup_val_loader, laptop_train_loader, laptop_val_loader, corpus_loader = \
	get_data_loaders_round2(tokenizer, batch_size=12)


	model = OpinioNet.from_pretrained('/home/zydq/.torch/models/bert/chinese_roberta_wwm_ext_L-12_H-768_A-12')
	# model = OpinioNet.from_pretrained('/home/zydq/.tf/bert/chinese_roberta_wwm_ext_L-12_H-768_A-12', from_tf=True)
	model.cuda()
	optimizer = Adam(model.parameters(), lr=6e-6)
	scheduler = GradualWarmupScheduler(optimizer, total_epoch=2*max(len(makeup_train_loader), len(corpus_loader)))
	best_val_f1 = 0
	best_val_loss = float('inf')
	for e in range(EP):

		print('Epoch [%d/%d] train:' % (e, EP))
		train_loss, train_lm_loss, train_f1, train_pr, train_rc = train_epoch(model, makeup_train_loader, laptop_train_loader, corpus_loader, optimizer, scheduler)
		print("loss %.5f, lm loss %.5f f1 %.5f, pr %.5f, rc %.5f" % (train_loss, train_lm_loss, train_f1, train_pr, train_rc))

		print('Epoch [%d/%d] makeup eval:' % (e, EP))
		val_loss, val_f1, val_pr, val_rc = eval_epoch(model, makeup_val_loader, type='makeup')
		print("makeup_val: loss %.5f, f1 %.5f, pr %.5f, rc %.5f" % (val_loss, val_f1, val_pr, val_rc))
Esempio n. 4
0
    ]
    tokenizer = BertTokenizer.from_pretrained(
        '/home/zydq/.torch/models/bert/chinese-bert_chinese_wwm_pytorch',
        do_lower_case=True)
    test_dataset = ReviewDataset('../data/TEST/Test_reviews.csv', None,
                                 tokenizer)
    test_loader = DataLoader(test_dataset,
                             12,
                             collate_fn=test_dataset.batchify,
                             shuffle=False,
                             num_workers=5)

    ret = None
    for name in MODELS:
        model_path = osp.join(SAVING_DIR, name)
        model = OpinioNet.from_pretrained(
            '/home/zydq/.torch/models/bert/chinese-bert_chinese_wwm_pytorch')
        model.load_state_dict(torch.load(model_path))
        model.cuda()
        ret = accum_result(ret, eval_epoch(model, test_loader))
        del model
    ret = average_result(ret, len(MODELS))
    ret = OpinioNet.nms_filter(ret, THRESH)
    raw = [s[0][0] for s in test_dataset.samples]
    result = gen_submit(ret, raw)
    import time

    result.to_csv('../submit/ensemble-' + str(round(time.time())) + '.csv',
                  header=False,
                  index=False)
    print(len(result['id'].unique()), result.shape[0])
Esempio n. 5
0
        tokenizer = BertTokenizer.from_pretrained(model_config['path'],
                                                  do_lower_case=True)
        test_dataset = ReviewDataset(args.rv, args.lb, tokenizer, 'laptop')
        test_loader = DataLoader(test_dataset,
                                 args.bs,
                                 collate_fn=test_dataset.batchify,
                                 shuffle=False,
                                 num_workers=5)

        if not raw:
            raw = [s[0][0] for s in test_dataset.samples]
        if not lb and args.lb:
            lb = [s[0][1] for s in test_dataset.samples]

        model = OpinioNet.from_pretrained(model_config['path'],
                                          version=model_config['version'],
                                          focal=model_config['focal'])
        print(weight_name)
        model.load_state_dict(torch.load('../models/' + weight_name))
        model.cuda()
        ret = accum_result(ret, eval_epoch(model, test_loader, thresh))
        del model
    ret = average_result(ret, num_model)
    ret = OpinioNet.nms_filter(ret, 0.28)

    if args.lb:

        def f1_score(P, G, S):
            pr = S / P
            rc = S / G
            f1 = 2 * pr * rc / (pr + rc)
Esempio n. 6
0
                             type='laptop',
                             folds=FOLDS))
        for model_name, model_config in PRETRAINED_MODELS.items()
    ])

    PRED = []
    for cv_idx in range(FOLDS):
        cv_model_num = 0
        cvret = None
        for model_name, model_config in PRETRAINED_MODELS.items():
            tokenizer = tokenizers[model_name]
            _, val_loader = cv_loaders[model_name][cv_idx]

            try:
                model = OpinioNet.from_pretrained(
                    model_config['path'],
                    version=model_config['version'],
                    focal=model_config['focal'])
                weight_name = model_config['name'] + '_cv' + str(cv_idx)
                weight = torch.load('../models/' + weight_name)
            except FileNotFoundError:
                continue
            print(weight_name)
            model.load_state_dict(weight)
            model.cuda()
            try:
                thresh = thresh_dict[weight_name]['thresh']
            except:
                thresh = 0.5
            cvret = accum_result(cvret, eval_epoch(model, val_loader, thresh))
            cv_model_num += 1
            del model