Example #1
0
def answers_generator(filename_gen, truth, epoch_from_filename=False,
                      use_shortname=True):
    for fn in filename_gen:
        answers = read_answers_file(fn)
        if use_shortname:
            fn = get_shortname(fn, epoch_from_filename)
        yield (fn, answers)

    results = defaultdict(list)
Example #2
0
def answers_generator(filename_gen,
                      truth,
                      epoch_from_filename=False,
                      use_shortname=True):
    for fn in filename_gen:
        answers = read_answers_file(fn)
        if use_shortname:
            fn = get_shortname(fn, epoch_from_filename)
        yield (fn, answers)

    results = defaultdict(list)
Example #3
0
def generate_ensembles(filename_gen,
                       ensemble_size,
                       truth,
                       sample_size=14,
                       replace=False,
                       randomise=False,
                       epoch_from_filename=False,
                       cat1_centre=None,
                       cat1_radius=0,
                       include=always,
                       exclude=never,
                       iterations=1):
    answers_gen = answers_generator(filename_gen, truth, epoch_from_filename,
                                    False)

    scores = get_sorted_scores(answers_gen,
                               truth,
                               cat1_centre,
                               cat1_radius,
                               exclude,
                               epoch_from_filename=epoch_from_filename)
    results = []

    for i in range(iterations):
        if randomise:
            random.shuffle(scores)
        cutoff = sample_size
        singles = {}
        essentials = set()
        single_lines = []
        if include is not always:
            for c, fn, shortname in scores:
                if include(shortname):
                    singles[shortname] = read_answers_file(fn)
                    cutoff -= 1
                    single_lines.append((c[0], shortname))
                    essentials.add(shortname)

        if cutoff < 0:
            cutoff = 0

        for c, fn, shortname in scores[:cutoff]:
            singles[shortname] = read_answers_file(fn)
            single_lines.append((c[0], shortname))

        ensembles = []
        if replace:
            combos = itertools.combinations_with_replacement
        else:
            combos = itertools.combinations

        for names in combos(singles.keys(), ensemble_size):
            ensemble = {}
            if essentials and not essentials.intersection(names):
                continue

            for n in names:
                answers = singles.get(n)
                for k, v in answers.items():
                    score = ensemble.get(k, 0.0)
                    ensemble[k] = score + v
            for k, v in ensemble.items():
                ensemble[k] = v / ensemble_size

            if cat1_centre is None:
                centre = search_for_centre(ensemble, truth)
            else:
                centre = evaluate_fixed_cat1(ensemble, truth, cat1_centre,
                                             cat1_radius)

            ensembles.append((centre[0], names))

        ensembles.sort()
        results.append((single_lines, singles, ensembles))

    return results
Example #4
0
def generate_ensembles(filename_gen, ensemble_size, truth,
                       sample_size=14, replace=False, randomise=False,
                       epoch_from_filename=False,
                       cat1_centre=None, cat1_radius=0,
                       include=always, exclude=never, iterations=1):
    answers_gen = answers_generator(filename_gen, truth,
                                    epoch_from_filename, False)

    scores = get_sorted_scores(answers_gen, truth, cat1_centre, cat1_radius,
                               exclude, epoch_from_filename=epoch_from_filename)
    results = []

    for i in range(iterations):
        if randomise:
            random.shuffle(scores)
        cutoff = sample_size
        singles = {}
        essentials = set()
        single_lines = []
        if include is not always:
            for c, fn, shortname in scores:
                if include(shortname):
                    singles[shortname] = read_answers_file(fn)
                    cutoff -= 1
                    single_lines.append((c[0], shortname))
                    essentials.add(shortname)

        if cutoff < 0:
            cutoff = 0

        for c, fn, shortname in scores[:cutoff]:
            singles[shortname] = read_answers_file(fn)
            single_lines.append((c[0], shortname))

        ensembles = []
        if replace:
            combos = itertools.combinations_with_replacement
        else:
            combos = itertools.combinations

        for names in combos(singles.keys(), ensemble_size):
            ensemble = {}
            if essentials and not essentials.intersection(names):
                continue

            for n in names:
                answers = singles.get(n)
                for k, v in answers.items():
                    score = ensemble.get(k, 0.0)
                    ensemble[k] = score + v
            for k, v in ensemble.items():
                ensemble[k] = v / ensemble_size

            if cat1_centre is None:
                centre = search_for_centre(ensemble, truth)
            else:
                centre = evaluate_fixed_cat1(ensemble, truth, cat1_centre,
                                             cat1_radius)

            ensembles.append((centre[0], names))

        ensembles.sort()
        results.append((single_lines, singles, ensembles))

    return results