コード例 #1
0
def total_rmse():
    group_count = DataReader.nym_count()
    item_count = R.shape[1]
    total_rmse = 0

    item_lam = lam.sum(axis=0)
    highest_n = 500
    large_items = np.argpartition(item_lam, -highest_n)[-highest_n:]

    with msg('Splitting group ratings'):
        group_ratings = []
        for group in range(group_count):
            group_ratings.append(R[P[group]])

    with msg('Getting rmse(s)'):
        count = 0
        for nth_item, item in enumerate(large_items):
            for group in range(group_count):
                mean = Rtilde[group, item]
                # if mean < 3.5 and mean > 2.5:
                # if mean > 4:
                if True:
                    count += 1
                    data = group_ratings[group][:, item].data
                    var = Rvar[group, item]
                    if var == 0: var = 0.01
                    total_rmse += get_rmse(data, mean, var)

            if (nth_item) % 10 == 0:
                mean_rmse = total_rmse / (count)
                print(f'[{nth_item}, {count}] Mean RMSE: {mean_rmse}')
コード例 #2
0
def plot_nym_stat(thresh=thresh_default, inv=False, savefig=False, outfile=outfile_default, begin=None, num=None, stat_option=stat_option_default):
	stat_name = stat_options[stat_option]
	if inv: stat_name = f'inverse {stat_name}'
	
	fig, ax = plt.subplots()
	ax.set(
		# ylim=(0, None),
		title=f'{stat_name} of each group by item number (thresh no. ratings >= {thresh})',
		xlabel='item number',
		ylabel=stat_name)
	
	cm = plt.get_cmap('gist_rainbow')
	colors = [cm(1.*i/Data.nym_count()) for i in range(Data.nym_count())]

	begin = 0 if begin is None else begin
	end = None if num is None else begin + num 
	nym_stats = Data.get_nym_stats()[:, begin : (None if num is None else begin+num),:]

	for nym_n in range(Data.nym_count()):
		nym_n_stats = nym_stats[nym_n]
		with msg(f'plotting nym #{nym_n} {stat_name}'):

			valids = (nym_n_stats[:,3] >= thresh)
			print(f'{valids.sum()} of {len(valids)} valid (thresh = {thresh})')

			x = nym_n_stats[:,0][valids]
			if stat_option is 1:
				y = nym_n_stats[:,1][valids]
			elif stat_option is 2:
				y = nym_n_stats[:,2][valids]
			elif stat_option is 3:
				y = np.sqrt(nym_n_stats[:,2][valids])

			if inv: y[y > 0] = 1 / y[y > 0]
			s = np.sqrt(nym_n_stats[:,3][valids])

			ax.scatter(x, y, s=s, facecolors='none', edgecolors=colors[nym_n], label=f'group {nym_n}')
	ax.legend()

	if savefig:
		with msg('Saving "{}" to "{}"'.format(ax.title.get_text(), outfile)):
			ax.get_figure().savefig(outfile, dpi=150)
			plt.clf()
	else:
		plt.show()
コード例 #3
0
import argparse
import os
from plot_item_dist import barplot_rating_dist
from datareader import DataReader as Data

parser = argparse.ArgumentParser(description="Plot distributions of each group for an item.")
parser.add_argument('item', help='item number to plot')
args = parser.parse_args()

item = args.item
fig_dir = f'figures/item{item}/'
if not os.path.exists(fig_dir):
    os.makedirs(fig_dir)

for nym_n in range(Data.nym_count()):
	barplot_rating_dist(item, group=nym_n, savefig=f'{fig_dir}item{item}_group{nym_n}_dist.png')