def total_rmse(): group_count = DataReader.nym_count() item_count = R.shape[1] total_rmse = 0 item_lam = lam.sum(axis=0) highest_n = 500 large_items = np.argpartition(item_lam, -highest_n)[-highest_n:] with msg('Splitting group ratings'): group_ratings = [] for group in range(group_count): group_ratings.append(R[P[group]]) with msg('Getting rmse(s)'): count = 0 for nth_item, item in enumerate(large_items): for group in range(group_count): mean = Rtilde[group, item] # if mean < 3.5 and mean > 2.5: # if mean > 4: if True: count += 1 data = group_ratings[group][:, item].data var = Rvar[group, item] if var == 0: var = 0.01 total_rmse += get_rmse(data, mean, var) if (nth_item) % 10 == 0: mean_rmse = total_rmse / (count) print(f'[{nth_item}, {count}] Mean RMSE: {mean_rmse}')
def plot_nym_stat(thresh=thresh_default, inv=False, savefig=False, outfile=outfile_default, begin=None, num=None, stat_option=stat_option_default): stat_name = stat_options[stat_option] if inv: stat_name = f'inverse {stat_name}' fig, ax = plt.subplots() ax.set( # ylim=(0, None), title=f'{stat_name} of each group by item number (thresh no. ratings >= {thresh})', xlabel='item number', ylabel=stat_name) cm = plt.get_cmap('gist_rainbow') colors = [cm(1.*i/Data.nym_count()) for i in range(Data.nym_count())] begin = 0 if begin is None else begin end = None if num is None else begin + num nym_stats = Data.get_nym_stats()[:, begin : (None if num is None else begin+num),:] for nym_n in range(Data.nym_count()): nym_n_stats = nym_stats[nym_n] with msg(f'plotting nym #{nym_n} {stat_name}'): valids = (nym_n_stats[:,3] >= thresh) print(f'{valids.sum()} of {len(valids)} valid (thresh = {thresh})') x = nym_n_stats[:,0][valids] if stat_option is 1: y = nym_n_stats[:,1][valids] elif stat_option is 2: y = nym_n_stats[:,2][valids] elif stat_option is 3: y = np.sqrt(nym_n_stats[:,2][valids]) if inv: y[y > 0] = 1 / y[y > 0] s = np.sqrt(nym_n_stats[:,3][valids]) ax.scatter(x, y, s=s, facecolors='none', edgecolors=colors[nym_n], label=f'group {nym_n}') ax.legend() if savefig: with msg('Saving "{}" to "{}"'.format(ax.title.get_text(), outfile)): ax.get_figure().savefig(outfile, dpi=150) plt.clf() else: plt.show()
import argparse import os from plot_item_dist import barplot_rating_dist from datareader import DataReader as Data parser = argparse.ArgumentParser(description="Plot distributions of each group for an item.") parser.add_argument('item', help='item number to plot') args = parser.parse_args() item = args.item fig_dir = f'figures/item{item}/' if not os.path.exists(fig_dir): os.makedirs(fig_dir) for nym_n in range(Data.nym_count()): barplot_rating_dist(item, group=nym_n, savefig=f'{fig_dir}item{item}_group{nym_n}_dist.png')