def minrho_run(): pb = ProgressBar(len(NUM_DROP)) sys.stderr.write("Beginning minrho eval.\n") pb.errput() for n in NUM_DROP: this_rand = lambda d: replace_subjects(d, n) cleaner = lambda d: remove_most_deviant_subjects(d, n) params = {'n': n, 'cleaner': 'minrho'} for row in run_experiment(this_rand, [cleaner], [params]): yield row pb.incr_and_errput()
def zscore_run(randomizer, randomizer_name): pb = ProgressBar(len(ZSCORES) * len(NOISES)) sys.stderr.write("Beginning zscore eval with %s randomization.\n" % randomizer_name) pb.errput() for percent_noise in NOISES: this_rand = lambda d: randomizer(d, percent_noise) cleaners = [zscore and RemoveDeviantRatings(zscore).scores or BaselineCleaner().scores for zscore in ZSCORES] parameters = [dict(cleaner='zscore', p=percent_noise, randomizer=randomizer_name, zscore=str(zscore)) for zscore in ZSCORES] for row in run_experiment(this_rand, cleaners, parameters): yield row pb.incr_and_errput()
def svd_run(randomizer, randomizer_name): pb = ProgressBar((K + 1) * len(NOISES)) sys.stderr.write("Beginning SVD eval with %s randomization.\n" % randomizer_name) pb.errput() for percent_noise in NOISES: this_rand = lambda d: randomizer(d, percent_noise) parameters = [{ 'cleaner': 'svd', 'p_noise': percent_noise, 'randomizer': randomizer_name, 'k': str(k) } for k in [None] + range(1, K + 1)] cleaners = [BaselineCleaner().scores] + [c.scores for c in create_svd_cleaners(K)] for row in run_experiment(this_rand, cleaners, parameters): yield row pb.incr_and_errput()
from progress import ProgressBar REPEATS = 100 heads, mods, whole, assoc = load_data() concatted = pd.concat([heads, mods], ignore_index=True) agg_concat_orig = combine_measures(aggregate_ratings(concatted))['mean'] agg_whole_orig = aggregate_ratings(whole)['mean'] output = [] NUM_DROP = range(1, 26) pb = ProgressBar(len(NUM_DROP) * REPEATS) pb.errput() for n in NUM_DROP: this_row = {} for i in xrange(REPEATS): noisy_concat = replace_subjects(concatted, n) noisy_whole = replace_subjects(whole, n) clean_concat = remove_most_deviant_subjects(noisy_concat, n) clean_whole = remove_most_deviant_subjects(noisy_whole, n) agg_concat = combine_measures(aggregate_ratings(noisy_concat))['mean'] agg_whole = aggregate_ratings(noisy_whole)['mean'] agg_cl_concat = combine_measures(aggregate_ratings(clean_concat))['mean'] agg_cl_whole = aggregate_ratings(clean_whole)['mean'] pairs = {
synsets = mappings.Synset[mappings.Synset.notnull()] synsets = [y for x in synsets.map(lambda z: z.split()) for y in x] def fetch_image_urls(synset): data = fetch.fetch_data(MAPPING_URL % synset) image_mappings = [y.split() for y in data.split("\r\n") if y] return image_mappings def fetch_hypos(synset): data = fetch.fetch_data(HYPO_URL % synset) return data.replace("-", "").split("\r\n") pb = ProgressBar(len(synsets)) pb.errput() for synset in synsets: image_urls = fetch_image_urls(synset) if len(image_urls) == 0: children_synsets = fetch_hypos(synset) children_urls = [fetch_image_urls(cs) for cs in children_synsets] image_urls = [y for x in children_urls for y in x] for imgid, url in image_urls: print "%s\t%s\t%s" % (synset, imgid, url) pb.incr_and_errput()
from noisify import * from progress import ProgressBar REPEATS = 100 heads, mods, whole, assoc = load_data() concatted = pd.concat([heads, mods], ignore_index=True) agg_concat_orig = combine_measures(aggregate_ratings(concatted))['mean'] agg_whole_orig = aggregate_ratings(whole)['mean'] output = [] NUM_DROP = range(1, 26) pb = ProgressBar(len(NUM_DROP) * REPEATS) pb.errput() for n in NUM_DROP: this_row = {} for i in xrange(REPEATS): noisy_concat = replace_subjects(concatted, n) noisy_whole = replace_subjects(whole, n) clean_concat = remove_most_deviant_subjects(noisy_concat, n) clean_whole = remove_most_deviant_subjects(noisy_whole, n) agg_concat = combine_measures(aggregate_ratings(noisy_concat))['mean'] agg_whole = aggregate_ratings(noisy_whole)['mean'] agg_cl_concat = combine_measures( aggregate_ratings(clean_concat))['mean'] agg_cl_whole = aggregate_ratings(clean_whole)['mean']