def subAimFunc(args): assert models_predictions i = args[0] Vars = args[1] f2 = sum(Vars[i, :]) * 1e-5 # Normalise weights # weights = preprocessing.normalize(np.reshape(indv.solution, (1, -1)), axis=1, norm='l1') indv_models_predictions = (seq( zip(*(models_predictions.items(), Vars[i, :] ))).filter(lambda x: x[1] == 1).map(lambda x: x[0])).dict() if not indv_models_predictions: f1 = 0. return [f1, f2] ensemble_preds = collections.OrderedDict() ensemble_odds = collections.OrderedDict() for qid in qid_answers.keys(): ensemble_preds[qid] = (seq(indv_models_predictions.values()).enumerate( ).map(lambda x: [x[0], x[1]['eval_all_nbest'][qid][0]]).map( lambda x: [x[1]['text'], x[1]['probability']]).sorted( key=lambda x: x[1]).reverse().map(lambda x: x[0])).list()[0] ensemble_odds[qid] = np.mean( (seq(indv_models_predictions.values()).enumerate().map( lambda x: x[1]['squad_null_odds'][qid])).list()) eval_r = main2(dev['data'], ensemble_preds, ensemble_odds) f1 = (eval_r['best_exact'] + eval_r['best_f1']) / 2 return [f1, f2]
def ensemble_fitness(indv): assert models_predictions # Normalise weights # weights = preprocessing.normalize(np.reshape(indv.solution, (1, -1)), axis=1, norm='l1') indv_models_predictions = (seq( zip(*(models_predictions.items(), indv.solution ))).filter(lambda x: x[1] >= 0.5).map(lambda x: x[0])).dict() cof = list(map(lambda x: float(x >= 0.5), indv.solution)) if not indv_models_predictions: return -sum(cof) ensemble_preds = collections.OrderedDict() ensemble_odds = collections.OrderedDict() for qid in qid_answers.keys(): ensemble_preds[qid] = (seq(indv_models_predictions.values()).enumerate( ).map(lambda x: [x[0], x[1]['eval_all_nbest'][qid][0]]).map( lambda x: [x[1]['text'], x[1]['probability']]).sorted( key=lambda x: x[1]).reverse().map(lambda x: x[0])).list()[0] ensemble_odds[qid] = np.mean( (seq(indv_models_predictions.values()).enumerate().map( lambda x: x[1]['squad_null_odds'][qid])).list()) eval_r = main2(dev['data'], ensemble_preds, ensemble_odds) fitness = eval_r['best_exact'] + eval_r['best_f1'] + -sum(cof) * 0.001 return fitness
def subAimFunc(args): assert models_predictions i = args[0] Vars = args[1] model_selections = Vars[i, :len(models_predictions)].tolist() model_cofs = seq( Vars[i, len(models_predictions):]).map(lambda x: x / 10).list() def get_model_cof(model_name): if "xlnet" in model_name: return model_cofs[1] elif "albert" in model_name: return model_cofs[0] else: return 1 f2 = sum(model_selections) * 1e-5 # Normalise weights # weights = preprocessing.normalize(np.reshape(indv.solution, (1, -1)), axis=1, norm='l1') indv_models_predictions = (seq( zip(*(models_predictions.items(), model_selections ))).filter(lambda x: x[1] == 1).map(lambda x: x[0])).dict() # indv_models_cofs = seq(zip(model_cofs, model_selections)).filter(lambda x: x[1] == 1).map(lambda x: x[0]).list() if not indv_models_predictions: f1 = 0. return [f1, f2] ensemble_preds = collections.OrderedDict() ensemble_odds = collections.OrderedDict() for qid in qid_answers.keys(): # preds_scores = (seq(indv_models_predictions.items()) # .enumerate() # .map(lambda x: [get_model_cof(x[1][0]), x[1][1]['eval_all_nbest'][qid]]) # .map(lambda x: [(y['text'], x[0] * y['probability']) for y in x[1]]) # .flatten() # ).dict() # compare = collections.defaultdict(lambda: 0.) # for pred, score in preds_scores.items(): # compare[pred] += score # compare = seq(compare.items()).sorted(lambda x: x[1]).reverse().list() # ensemble_preds[qid] = compare[0][0] # ensemble_odds[qid] = np.mean((seq(indv_models_predictions.items()) # .enumerate() # .map(lambda x: get_model_cof(x[1][0]) * x[1][1]['squad_null_odds'][qid]) # ).list()) ensemble_preds[qid] = (seq(indv_models_predictions.items()).map( lambda x: [get_model_cof(x[0]), x[1]['eval_all_nbest'][qid][0]] ).map(lambda x: [x[1]['text'], x[0] * x[1]['probability']]).sorted( key=lambda x: x[1]).reverse().map(lambda x: x[0])).list()[0] ensemble_odds[qid] = np.mean( (seq(indv_models_predictions.items()).map(lambda x: get_model_cof( x[0]) * x[1]['squad_null_odds'][qid])).list()) eval_r = main2(dev['data'], ensemble_preds, ensemble_odds) f1 = (eval_r['best_exact'] + eval_r['best_f1']) / 2 return [f1, f2]
def vote_with_post_processing(): bagging_preds = collections.OrderedDict() bagging_odds = collections.OrderedDict() def post_process(question, candi, weight=1): question = question.lower() if not candi['text']: return candi first_token = candi['text'].split()[0] th = 0. if "when" in question: if first_token in [ 'before', 'after', 'about', 'around', 'from', 'during' ]: candi['probability'] += th elif "where" in question: if first_token in [ 'in', 'at', 'on', 'behind', 'from', 'through', 'between', 'throughout' ]: candi['probability'] += th elif "whose" in question: if "'s" in candi['text']: candi['probability'] += th elif "which" in question: if first_token == "the": candi['probability'] += th candi['probability'] *= weight return candi cof = 0.2 for qid in qid_answers: question = qid_questions[qid] post_process_candidates = (seq(zip( all_nbest, models)).map(lambda x: (x[0][ qid], cof if 'lr_epoch_results' in x[1] else 1.)).map( lambda x: seq(x[0]).map(lambda y: post_process( question, y, x[1])).list()).flatten()).list() preds_probs = collections.defaultdict(lambda: []) for pred in post_process_candidates: preds_probs[pred['text']].append(pred['probability']) for pred in post_process_candidates: preds_probs[pred['text']] = np.mean( preds_probs[pred['text']]).__float__() bagging_preds[qid] = (seq(preds_probs.items()).sorted( lambda x: x[1]).reverse().map(lambda x: x[0])).list()[0] bagging_odds[qid] = np.mean([ odds[qid] * cof if 'lr_epoch_results' in model else odds[qid] for odds, model in zip(all_odds, models) ]) r = main2( json.load(open('dev-v2.0.json', 'r', encoding='utf-8'))['data'], bagging_preds, bagging_odds) print(f"{models}, {r}")