Beispiel #1
0
def dropsubj_run():
    pb = ProgressBar(len(PERCENT_REMOVE))
    for p in PERCENT_REMOVE:
        this_rand = lambda d: replace_percent_subjects(d, p)
        cleaner = lambda d: remove_percent_deviant_subjects(d, p)
        params = {'p': p, 'cleaner': 'devsubj'}

        for row in run_experiment(this_rand, [cleaner], [params]):
            yield row
            pb.incr_and_errput()
Beispiel #2
0
def minrho_run():
    pb = ProgressBar(len(NUM_DROP))
    sys.stderr.write("Beginning minrho eval.\n")
    pb.errput()

    for n in NUM_DROP:
        this_rand = lambda d: replace_subjects(d, n)
        cleaner = lambda d: remove_most_deviant_subjects(d, n)
        params = {'n': n, 'cleaner': 'minrho'}

        for row in run_experiment(this_rand, [cleaner], [params]):
            yield row
            pb.incr_and_errput()
Beispiel #3
0
def zscore_run(randomizer, randomizer_name):
    pb = ProgressBar(len(ZSCORES) * len(NOISES))
    sys.stderr.write("Beginning zscore eval with %s randomization.\n" % randomizer_name)
    pb.errput()
    for percent_noise in NOISES:
        this_rand = lambda d: randomizer(d, percent_noise)

        cleaners = [zscore and RemoveDeviantRatings(zscore).scores or BaselineCleaner().scores
                    for zscore in ZSCORES]
        parameters = [dict(cleaner='zscore', p=percent_noise, randomizer=randomizer_name, zscore=str(zscore))
                      for zscore in ZSCORES]

        for row in run_experiment(this_rand, cleaners, parameters):
            yield row
            pb.incr_and_errput()
Beispiel #4
0
def svd_run(randomizer, randomizer_name):
    pb = ProgressBar((K + 1) * len(NOISES))
    sys.stderr.write("Beginning SVD eval with %s randomization.\n" % randomizer_name)
    pb.errput()
    for percent_noise in NOISES:
        this_rand = lambda d: randomizer(d, percent_noise)

        parameters = [{
            'cleaner': 'svd',
            'p_noise': percent_noise,
            'randomizer': randomizer_name,
            'k': str(k)
        } for k in [None] + range(1, K + 1)]
        cleaners = [BaselineCleaner().scores] + [c.scores for c in create_svd_cleaners(K)]

        for row in run_experiment(this_rand, cleaners, parameters):
            yield row
            pb.incr_and_errput()
Beispiel #5
0
def fetch_image_urls(synset):
    data = fetch.fetch_data(MAPPING_URL % synset)
    image_mappings = [y.split() for y in data.split("\r\n") if y]
    return image_mappings

def fetch_hypos(synset):
    data = fetch.fetch_data(HYPO_URL % synset)
    return data.replace("-", "").split("\r\n")


pb = ProgressBar(len(synsets))
pb.errput()
for synset in synsets:
    image_urls = fetch_image_urls(synset)
    if len(image_urls) == 0:
        children_synsets = fetch_hypos(synset)
        children_urls = [fetch_image_urls(cs) for cs in children_synsets]
        image_urls = [y for x in children_urls for y in x]

    for imgid, url in image_urls:
        print "%s\t%s\t%s" % (synset, imgid, url)

    pb.incr_and_errput()