def answers_generator(filename_gen, truth, epoch_from_filename=False, use_shortname=True): for fn in filename_gen: answers = read_answers_file(fn) if use_shortname: fn = get_shortname(fn, epoch_from_filename) yield (fn, answers) results = defaultdict(list)
def generate_ensembles(filename_gen, ensemble_size, truth, sample_size=14, replace=False, randomise=False, epoch_from_filename=False, cat1_centre=None, cat1_radius=0, include=always, exclude=never, iterations=1): answers_gen = answers_generator(filename_gen, truth, epoch_from_filename, False) scores = get_sorted_scores(answers_gen, truth, cat1_centre, cat1_radius, exclude, epoch_from_filename=epoch_from_filename) results = [] for i in range(iterations): if randomise: random.shuffle(scores) cutoff = sample_size singles = {} essentials = set() single_lines = [] if include is not always: for c, fn, shortname in scores: if include(shortname): singles[shortname] = read_answers_file(fn) cutoff -= 1 single_lines.append((c[0], shortname)) essentials.add(shortname) if cutoff < 0: cutoff = 0 for c, fn, shortname in scores[:cutoff]: singles[shortname] = read_answers_file(fn) single_lines.append((c[0], shortname)) ensembles = [] if replace: combos = itertools.combinations_with_replacement else: combos = itertools.combinations for names in combos(singles.keys(), ensemble_size): ensemble = {} if essentials and not essentials.intersection(names): continue for n in names: answers = singles.get(n) for k, v in answers.items(): score = ensemble.get(k, 0.0) ensemble[k] = score + v for k, v in ensemble.items(): ensemble[k] = v / ensemble_size if cat1_centre is None: centre = search_for_centre(ensemble, truth) else: centre = evaluate_fixed_cat1(ensemble, truth, cat1_centre, cat1_radius) ensembles.append((centre[0], names)) ensembles.sort() results.append((single_lines, singles, ensembles)) return results