예제 #1
0
import pandas as pd

from pandas.core.reshape import melt
from eval_cleanups import load_data, decrange, RemoveDeviantRatings, combine_measures, BaselineCleaner
from standard_cleanup import aggregate_ratings
from noisify import *
from scipy.stats import spearmanr, norm
from progress import ProgressBar

REPEATS = 100


heads, mods, whole, assoc = load_data()
concatted = pd.concat([heads, mods], ignore_index=True)

agg_concat_orig = combine_measures(aggregate_ratings(concatted))['mean']
agg_whole_orig = aggregate_ratings(whole)['mean']

# ------ RANDOMIZE BY ZSCORES TEST --------

def mass_outside(zstar):
    return 2 * norm().cdf(-abs(zstar))

output = []
# first zscores
ZSCORES = [None] + decrange(4.0, 1.0, -0.25)
NOISES = [0.0, 0.01, 0.05, 0.1, 0.25, 0.5]

pb = ProgressBar(len(ZSCORES) * REPEATS * len(NOISES))
pb.errput()
for zscore in ZSCORES:
예제 #2
0
    # judgements
    good_compounds = reduce(
        set.intersection,
        [set(x.compound) for x in [heads, mods, whole, assoc]])
    heads, mods, whole, assoc = [
        d[d.compound.map(good_compounds.__contains__)]
        for d in [heads, mods, whole, assoc]
    ]

    return heads, mods, whole, assoc


if __name__ == '__main__':
    # go ahead and sort the whole judgements and assoc measures
    heads, mods, whole_orig, assoc = load_data()
    whole = aggregate_ratings(whole_orig)
    concatted = pd.concat([heads, mods], ignore_index=True)
    concatted_uncleaned_together = combine_measures(
        aggregate_ratings(concatted), 'prod').sort('compound')

    setups = []
    setups += [BaselineCleaner()]
    setups += [
        RemoveDeviantSubjectCleaner(r) for r in decrange(0.10, 0.6, 0.05)
    ]
    setups += [RemoveDeviantRatings(z) for z in decrange(1.0, 4.0, 0.25)]
    #setups += [RebinCleaner(b) for b in ["1144477","1444447","1114777","1122233","1222223","1112333"]]
    setups += create_svd_cleaners(20)
    #setups += [FillCleaner(0), FillCleaner(1), FillCleaner(7)]

    results = []
예제 #3
0
    good_compounds = reduce(
        set.intersection,
        [set(x.compound) for x in [heads, mods, whole, assoc]]
    )
    heads, mods, whole, assoc = [
        d[d.compound.map(good_compounds.__contains__)]
        for d in [heads, mods, whole, assoc]
    ]

    return heads, mods, whole, assoc


if __name__ == '__main__':
    # go ahead and sort the whole judgements and assoc measures
    heads, mods, whole_orig, assoc = load_data()
    whole = aggregate_ratings(whole_orig)
    concatted = pd.concat([heads, mods], ignore_index=True)
    concatted_uncleaned_together = combine_measures(aggregate_ratings(concatted), 'prod').sort('compound')

    setups = []
    setups += [BaselineCleaner()]
    setups += [RemoveDeviantSubjectCleaner(r) for r in decrange(0.10, 0.6, 0.05)]
    setups += [RemoveDeviantRatings(z) for z in decrange(1.0, 4.0, 0.25)]
    #setups += [RebinCleaner(b) for b in ["1144477","1444447","1114777","1122233","1222223","1112333"]]
    setups += create_svd_cleaners(20)
    #setups += [FillCleaner(0), FillCleaner(1), FillCleaner(7)]

    results = []
    parameters = set()

    CONCAT_BEFORE = True