Exemple #1
0
def minrho_run():
    pb = ProgressBar(len(NUM_DROP))
    sys.stderr.write("Beginning minrho eval.\n")
    pb.errput()

    for n in NUM_DROP:
        this_rand = lambda d: replace_subjects(d, n)
        cleaner = lambda d: remove_most_deviant_subjects(d, n)
        params = {'n': n, 'cleaner': 'minrho'}

        for row in run_experiment(this_rand, [cleaner], [params]):
            yield row
            pb.incr_and_errput()
Exemple #2
0
def zscore_run(randomizer, randomizer_name):
    pb = ProgressBar(len(ZSCORES) * len(NOISES))
    sys.stderr.write("Beginning zscore eval with %s randomization.\n" % randomizer_name)
    pb.errput()
    for percent_noise in NOISES:
        this_rand = lambda d: randomizer(d, percent_noise)

        cleaners = [zscore and RemoveDeviantRatings(zscore).scores or BaselineCleaner().scores
                    for zscore in ZSCORES]
        parameters = [dict(cleaner='zscore', p=percent_noise, randomizer=randomizer_name, zscore=str(zscore))
                      for zscore in ZSCORES]

        for row in run_experiment(this_rand, cleaners, parameters):
            yield row
            pb.incr_and_errput()
Exemple #3
0
def svd_run(randomizer, randomizer_name):
    pb = ProgressBar((K + 1) * len(NOISES))
    sys.stderr.write("Beginning SVD eval with %s randomization.\n" % randomizer_name)
    pb.errput()
    for percent_noise in NOISES:
        this_rand = lambda d: randomizer(d, percent_noise)

        parameters = [{
            'cleaner': 'svd',
            'p_noise': percent_noise,
            'randomizer': randomizer_name,
            'k': str(k)
        } for k in [None] + range(1, K + 1)]
        cleaners = [BaselineCleaner().scores] + [c.scores for c in create_svd_cleaners(K)]

        for row in run_experiment(this_rand, cleaners, parameters):
            yield row
            pb.incr_and_errput()
from progress import ProgressBar

REPEATS = 100


heads, mods, whole, assoc = load_data()
concatted = pd.concat([heads, mods], ignore_index=True)

agg_concat_orig = combine_measures(aggregate_ratings(concatted))['mean']
agg_whole_orig = aggregate_ratings(whole)['mean']

output = []

NUM_DROP = range(1, 26)
pb = ProgressBar(len(NUM_DROP) * REPEATS)
pb.errput()
for n in NUM_DROP:
    this_row = {}
    for i in xrange(REPEATS):
        noisy_concat = replace_subjects(concatted, n)
        noisy_whole = replace_subjects(whole, n)
        clean_concat = remove_most_deviant_subjects(noisy_concat, n)
        clean_whole = remove_most_deviant_subjects(noisy_whole, n)

        agg_concat = combine_measures(aggregate_ratings(noisy_concat))['mean']
        agg_whole = aggregate_ratings(noisy_whole)['mean']

        agg_cl_concat = combine_measures(aggregate_ratings(clean_concat))['mean']
        agg_cl_whole = aggregate_ratings(clean_whole)['mean']

        pairs = {
Exemple #5
0
synsets = mappings.Synset[mappings.Synset.notnull()]
synsets = [y for x in synsets.map(lambda z: z.split()) for y in x]

def fetch_image_urls(synset):
    data = fetch.fetch_data(MAPPING_URL % synset)
    image_mappings = [y.split() for y in data.split("\r\n") if y]
    return image_mappings

def fetch_hypos(synset):
    data = fetch.fetch_data(HYPO_URL % synset)
    return data.replace("-", "").split("\r\n")


pb = ProgressBar(len(synsets))
pb.errput()
for synset in synsets:
    image_urls = fetch_image_urls(synset)
    if len(image_urls) == 0:
        children_synsets = fetch_hypos(synset)
        children_urls = [fetch_image_urls(cs) for cs in children_synsets]
        image_urls = [y for x in children_urls for y in x]

    for imgid, url in image_urls:
        print "%s\t%s\t%s" % (synset, imgid, url)

    pb.incr_and_errput()



Exemple #6
0
from noisify import *
from progress import ProgressBar

REPEATS = 100

heads, mods, whole, assoc = load_data()
concatted = pd.concat([heads, mods], ignore_index=True)

agg_concat_orig = combine_measures(aggregate_ratings(concatted))['mean']
agg_whole_orig = aggregate_ratings(whole)['mean']

output = []

NUM_DROP = range(1, 26)
pb = ProgressBar(len(NUM_DROP) * REPEATS)
pb.errput()
for n in NUM_DROP:
    this_row = {}
    for i in xrange(REPEATS):
        noisy_concat = replace_subjects(concatted, n)
        noisy_whole = replace_subjects(whole, n)
        clean_concat = remove_most_deviant_subjects(noisy_concat, n)
        clean_whole = remove_most_deviant_subjects(noisy_whole, n)

        agg_concat = combine_measures(aggregate_ratings(noisy_concat))['mean']
        agg_whole = aggregate_ratings(noisy_whole)['mean']

        agg_cl_concat = combine_measures(
            aggregate_ratings(clean_concat))['mean']
        agg_cl_whole = aggregate_ratings(clean_whole)['mean']