Esempio n. 1
0
        if not raw:
            raw = [s[0][0] for s in test_dataset.samples]
        if not lb and args.lb:
            lb = [s[0][1] for s in test_dataset.samples]

        model = OpinioNet.from_pretrained(model_config['path'],
                                          version=model_config['version'],
                                          focal=model_config['focal'])
        print(weight_name)
        model.load_state_dict(torch.load('../models/' + weight_name))
        model.cuda()
        ret = accum_result(ret, eval_epoch(model, test_loader, thresh))
        del model
    ret = average_result(ret, num_model)
    ret = OpinioNet.nms_filter(ret, 0.28)

    if args.lb:

        def f1_score(P, G, S):
            pr = S / P
            rc = S / G
            f1 = 2 * pr * rc / (pr + rc)
            return f1, pr, rc

        def evaluate_sample(gt, pred):
            gt = set(gt)
            pred = set(pred)
            p = len(pred)
            g = len(gt)
            s = len(gt.intersection(pred))
Esempio n. 2
0
    tokenizer = BertTokenizer.from_pretrained(
        '/home/zydq/.torch/models/bert/chinese-bert_chinese_wwm_pytorch',
        do_lower_case=True)
    test_dataset = ReviewDataset('../data/TEST/Test_reviews.csv', None,
                                 tokenizer)
    test_loader = DataLoader(test_dataset,
                             12,
                             collate_fn=test_dataset.batchify,
                             shuffle=False,
                             num_workers=5)

    ret = None
    for name in MODELS:
        model_path = osp.join(SAVING_DIR, name)
        model = OpinioNet.from_pretrained(
            '/home/zydq/.torch/models/bert/chinese-bert_chinese_wwm_pytorch')
        model.load_state_dict(torch.load(model_path))
        model.cuda()
        ret = accum_result(ret, eval_epoch(model, test_loader))
        del model
    ret = average_result(ret, len(MODELS))
    ret = OpinioNet.nms_filter(ret, THRESH)
    raw = [s[0][0] for s in test_dataset.samples]
    result = gen_submit(ret, raw)
    import time

    result.to_csv('../submit/ensemble-' + str(round(time.time())) + '.csv',
                  header=False,
                  index=False)
    print(len(result['id'].unique()), result.shape[0])
Esempio n. 3
0
    #
    # 	P += p
    # 	G += g
    # 	S += s
    # f1, pr, rc = f1_score(P, G, S)
    # print("f1 %.5f, pr %.5f, rc %.5f, th %.5f" % (f1, pr, rc, 0.3))

    threshs = list(np.arange(0.1, 0.9, 0.025))
    best_f1, best_pr, best_rc = 0, 0, 0
    best_thresh = 0.1
    P, G, S = 0, 0, 0
    BEST_PRED = PRED_COPY
    for th in threshs:
        P, G, S = 0, 0, 0
        PRED_COPY = copy.deepcopy(PRED)
        PRED_COPY = OpinioNet.nms_filter(PRED_COPY, th)
        for b in range(len(PRED_COPY)):
            gt = LB[b]
            pred = [x[0] for x in PRED_COPY[b]]
            p, g, s = evaluate_sample(gt, pred)

            P += p
            G += g
            S += s
        f1, pr, rc = f1_score(P, G, S)
        if f1 > best_f1:
            best_f1, best_pr, best_rc = f1, pr, rc
            best_thresh = th
            BEST_PRED = copy.deepcopy(PRED_COPY)

    print("f1 %.5f, pr %.5f, rc %.5f, th %.5f" %