Example #1
0
def subAimFunc(args):
    assert models_predictions
    i = args[0]
    Vars = args[1]
    f2 = sum(Vars[i, :]) * 1e-5

    # Normalise weights
    # weights = preprocessing.normalize(np.reshape(indv.solution, (1, -1)), axis=1, norm='l1')
    indv_models_predictions = (seq(
        zip(*(models_predictions.items(), Vars[i, :]
              ))).filter(lambda x: x[1] == 1).map(lambda x: x[0])).dict()
    if not indv_models_predictions:
        f1 = 0.
        return [f1, f2]

    ensemble_preds = collections.OrderedDict()
    ensemble_odds = collections.OrderedDict()
    for qid in qid_answers.keys():
        ensemble_preds[qid] = (seq(indv_models_predictions.values()).enumerate(
        ).map(lambda x: [x[0], x[1]['eval_all_nbest'][qid][0]]).map(
            lambda x: [x[1]['text'], x[1]['probability']]).sorted(
                key=lambda x: x[1]).reverse().map(lambda x: x[0])).list()[0]
        ensemble_odds[qid] = np.mean(
            (seq(indv_models_predictions.values()).enumerate().map(
                lambda x: x[1]['squad_null_odds'][qid])).list())
    eval_r = main2(dev['data'], ensemble_preds, ensemble_odds)
    f1 = (eval_r['best_exact'] + eval_r['best_f1']) / 2
    return [f1, f2]
Example #2
0
def ensemble_fitness(indv):
    assert models_predictions

    # Normalise weights
    # weights = preprocessing.normalize(np.reshape(indv.solution, (1, -1)), axis=1, norm='l1')
    indv_models_predictions = (seq(
        zip(*(models_predictions.items(), indv.solution
              ))).filter(lambda x: x[1] >= 0.5).map(lambda x: x[0])).dict()

    cof = list(map(lambda x: float(x >= 0.5), indv.solution))
    if not indv_models_predictions:
        return -sum(cof)

    ensemble_preds = collections.OrderedDict()
    ensemble_odds = collections.OrderedDict()
    for qid in qid_answers.keys():
        ensemble_preds[qid] = (seq(indv_models_predictions.values()).enumerate(
        ).map(lambda x: [x[0], x[1]['eval_all_nbest'][qid][0]]).map(
            lambda x: [x[1]['text'], x[1]['probability']]).sorted(
                key=lambda x: x[1]).reverse().map(lambda x: x[0])).list()[0]
        ensemble_odds[qid] = np.mean(
            (seq(indv_models_predictions.values()).enumerate().map(
                lambda x: x[1]['squad_null_odds'][qid])).list())
    eval_r = main2(dev['data'], ensemble_preds, ensemble_odds)
    fitness = eval_r['best_exact'] + eval_r['best_f1'] + -sum(cof) * 0.001
    return fitness
Example #3
0
def subAimFunc(args):
    assert models_predictions
    i = args[0]
    Vars = args[1]
    model_selections = Vars[i, :len(models_predictions)].tolist()

    model_cofs = seq(
        Vars[i, len(models_predictions):]).map(lambda x: x / 10).list()

    def get_model_cof(model_name):
        if "xlnet" in model_name:
            return model_cofs[1]
        elif "albert" in model_name:
            return model_cofs[0]
        else:
            return 1

    f2 = sum(model_selections) * 1e-5

    # Normalise weights
    # weights = preprocessing.normalize(np.reshape(indv.solution, (1, -1)), axis=1, norm='l1')
    indv_models_predictions = (seq(
        zip(*(models_predictions.items(), model_selections
              ))).filter(lambda x: x[1] == 1).map(lambda x: x[0])).dict()

    # indv_models_cofs = seq(zip(model_cofs, model_selections)).filter(lambda x: x[1] == 1).map(lambda x: x[0]).list()
    if not indv_models_predictions:
        f1 = 0.
        return [f1, f2]

    ensemble_preds = collections.OrderedDict()
    ensemble_odds = collections.OrderedDict()
    for qid in qid_answers.keys():
        # preds_scores = (seq(indv_models_predictions.items())
        #                 .enumerate()
        #                 .map(lambda x: [get_model_cof(x[1][0]), x[1][1]['eval_all_nbest'][qid]])
        #                 .map(lambda x: [(y['text'], x[0] * y['probability']) for y in x[1]])
        #                 .flatten()
        #                 ).dict()
        # compare = collections.defaultdict(lambda: 0.)
        # for pred, score in preds_scores.items():
        #     compare[pred] += score
        # compare = seq(compare.items()).sorted(lambda x: x[1]).reverse().list()
        # ensemble_preds[qid] = compare[0][0]
        # ensemble_odds[qid] = np.mean((seq(indv_models_predictions.items())
        #                               .enumerate()
        #                               .map(lambda x: get_model_cof(x[1][0]) * x[1][1]['squad_null_odds'][qid])
        #                               ).list())
        ensemble_preds[qid] = (seq(indv_models_predictions.items()).map(
            lambda x: [get_model_cof(x[0]), x[1]['eval_all_nbest'][qid][0]]
        ).map(lambda x: [x[1]['text'], x[0] * x[1]['probability']]).sorted(
            key=lambda x: x[1]).reverse().map(lambda x: x[0])).list()[0]
        ensemble_odds[qid] = np.mean(
            (seq(indv_models_predictions.items()).map(lambda x: get_model_cof(
                x[0]) * x[1]['squad_null_odds'][qid])).list())
    eval_r = main2(dev['data'], ensemble_preds, ensemble_odds)
    f1 = (eval_r['best_exact'] + eval_r['best_f1']) / 2
    return [f1, f2]
Example #4
0
    def vote_with_post_processing():
        bagging_preds = collections.OrderedDict()
        bagging_odds = collections.OrderedDict()

        def post_process(question, candi, weight=1):
            question = question.lower()
            if not candi['text']:
                return candi
            first_token = candi['text'].split()[0]
            th = 0.
            if "when" in question:
                if first_token in [
                        'before', 'after', 'about', 'around', 'from', 'during'
                ]:
                    candi['probability'] += th
            elif "where" in question:
                if first_token in [
                        'in', 'at', 'on', 'behind', 'from', 'through',
                        'between', 'throughout'
                ]:
                    candi['probability'] += th
            elif "whose" in question:
                if "'s" in candi['text']:
                    candi['probability'] += th
            elif "which" in question:
                if first_token == "the":
                    candi['probability'] += th
            candi['probability'] *= weight
            return candi

        cof = 0.2

        for qid in qid_answers:
            question = qid_questions[qid]
            post_process_candidates = (seq(zip(
                all_nbest, models)).map(lambda x: (x[0][
                    qid], cof if 'lr_epoch_results' in x[1] else 1.)).map(
                        lambda x: seq(x[0]).map(lambda y: post_process(
                            question, y, x[1])).list()).flatten()).list()
            preds_probs = collections.defaultdict(lambda: [])
            for pred in post_process_candidates:
                preds_probs[pred['text']].append(pred['probability'])
            for pred in post_process_candidates:
                preds_probs[pred['text']] = np.mean(
                    preds_probs[pred['text']]).__float__()
            bagging_preds[qid] = (seq(preds_probs.items()).sorted(
                lambda x: x[1]).reverse().map(lambda x: x[0])).list()[0]
            bagging_odds[qid] = np.mean([
                odds[qid] * cof if 'lr_epoch_results' in model else odds[qid]
                for odds, model in zip(all_odds, models)
            ])

        r = main2(
            json.load(open('dev-v2.0.json', 'r', encoding='utf-8'))['data'],
            bagging_preds, bagging_odds)
        print(f"{models}, {r}")