def train(self, test_users=None, b=None): block_size = 5000 self.model = {} self.test_users = test_users if test_users is None: test_users = range(self.n_users) tu = {} if b is not None: for u in test_users: tu[u] = set(test_users[u][:b]) else: for u in test_users: tu[u] = set(test_users[u]) for i, u in enumerate(tu): if self.verbose and (i + 1) % 100 == 0: print("%d/%d" % (i + 1, len(test_users))) utils.fl() #Xp = list(self.data.get_items(u)) #get list of songs Xp = [int(self.K_indexing[str(idx)]) for idx in tu[u]] mtx_indexes = [a / block_size for a in Xp] pos_indexes = [a % block_size for a in Xp] npos = len(Xp) kp = np.zeros((npos, npos)) for j in range(npos): #kp[j] = self.K[Xp[j],Xp].todense() kp[j] = self.K[mtx_indexes[j]][pos_indexes[j], Xp] kp = co.matrix(kp) kn = self.q_[Xp, :] I = self.lambda_p * utc.identity(npos) P = kp + I q = -kn G = -utc.identity(npos) h = utc.zeroes_vec(npos) A = utc.ones_vec(npos).T b = co.matrix(1.0) solver.options['show_progress'] = False sol = solver.qp(P, q, G, h, A, b) band = np.zeros((npos, len(self.inverse_indexing))) for j in range(npos): band[j] = self.K[mtx_indexes[j]][pos_indexes[j], :] self.model[u] = (co.matrix(band.T) * (sol['x'])) - self.q_ #print self.model[u] # endfor return self
def train(self, test_users=None, b=None, selection=None): self.model = {} self.test_users = test_users if test_users is None: test_users = range(self.n_users) tu = {} if b is not None: for u in test_users: tu[u] = set(test_users[u][:b]) else: for u in test_users: tu[u] = set(test_users[u]) for i, u in enumerate(tu): if self.verbose and (i + 1) % 100 == 0: print("%d/%d" % (i + 1, len(test_users))) utils.fl() Xp = [idx for idx in tu[u]] npos = len(Xp) kp = np.zeros((npos, npos)) for j in range(npos): kp[j] = self.K[Xp[j], Xp].todense() kp = co.matrix(kp) kn = self.q_[Xp, :] I = self.lambda_p * utc.identity(npos) P = kp + I q = -kn G = -utc.identity(npos) h = utc.zeroes_vec(npos) A = utc.ones_vec(npos).T b = co.matrix(1.0) solver.options['show_progress'] = False sol = solver.qp(P, q, G, h, A, b) #print(selection[u][0:5]) #utils.fl() tmp = (self.K[np.array(Xp)[:, None], selection[u]].T).dot( sol['x']) - self.q_[[a for a in selection[u]]] self.model[u] = np.zeros(len(self.items_list)) - np.inf #print(tmp.shape) self.model[u][selection[u]] = tmp[:, 0] #for i, v in enumerate(selection[u]): # self.model[u][v] = tmp[i] #print self.model[u] # endfor return self
def build_selection(k, p2s_test, global_popularity, t2t_id, S, p2t, lengths, test_seed, P, power): selection = {} for user in p2s_test[test_seed]: if (p2t[str(user)] == "" or p2t[str(user)] not in t2t_id): selection[user] = global_popularity[:k] else: row = S[t2t_id[p2t[str(user)]], :]**power / lengths selection[user] = utils.scores_sorter(P.dot(row), k) print("selection built") utils.fl() return selection
def multiline_xy_plot(x, ys, xlabel, ylabel, legend_labels, filename, y_errorbars=None, save_figure = True): assert isinstance(ys, list) cmap = brewer2mpl.get_map('Set1', 'qualitative', 9) p = Ppl(cmap, alpha=1) fig, ax = plt.subplots(1) ax.set_xlabel(fl(xlabel)) ax.set_ylabel(fl(ylabel)) ax.yaxis.get_major_formatter().set_powerlimits((0, 1)) for i, y in enumerate(ys): #print "Errorbar: %s"%y_errorbar #if y_errorbars is not None: ax.errorbar(x, y, yerr=y_errorbar, fmt='o') #print legend_labels[i] p.plot(ax, x, y, linewidth=2, label = legend_labels[i]) p.legend(ax, loc=0, frameon = False) if save_figure: fig.savefig(filename) return ax, fig
def plot_image_matrix(filename, jaccard_matrix, x_ticklabels = None, y_ticklabels = None): colormap = brewer2mpl.get_map('Set1', 'qualitative', 9) p = Ppl(colormap, alpha=1) fig, ax = plt.subplots(1) masked_jaccard = np.ma.masked_where(np.isnan(jaccard_matrix), jaccard_matrix) p.pcolormesh(fig, ax, masked_jaccard) print x_ticklabels print y_ticklabels #ax.imshow(jaccard_matrix, interpolation = 'none') #ax.set_xticks([]) yticks = range(jaccard_matrix.shape[0]+1) xticks = range(jaccard_matrix.shape[1]+1) ax.set_yticks(yticks) ax.set_xticks(xticks) #x_ticklabels.reverse() ax.set_yticklabels(y_ticklabels) ax.set_xticklabels(x_ticklabels) ax.set_ylabel(fl('ratioagent')) ax.set_xlabel(fl('ratiolatency')) fig.savefig(filename) return fig, ax
def make_scatter_plot_for_labelled_data(data_frame, x_name, y_name, labels, filename, colormap, x_function = 'dummy', y_function = 'dummy', legend = False, point_size = 5, omit_largest = 0, labels_to_plot = []): ### Originally created for issue_28 if not labels_to_plot: labels_to_plot = set(labels) assert omit_largest < max(set(labels)), "omit_largest must be smaller than number of clusters" colors = colormap.mpl_colors def dummy(a): return a p = Ppl(colormap, alpha=1) fig, ax = plt.subplots(1) #ax.set_autoscale_on(False) ax.set_xlim([eval(x_function)(min(data_frame[x_name])), eval(x_function)(max(data_frame[x_name]))]) ax.set_ylim([eval(y_function)(min(data_frame[y_name])), eval(y_function)(max(data_frame[y_name]))]) #x_label = x_name.capitalize().replace('_', ' ') if x_function == 'log': x_label += ' (log)' #y_label = y_name.capitalize().replace('_', ' ') if y_function == 'log': y_label += ' (log)' ax.set_xlabel(fl(x_name)) ax.set_ylabel(fl(y_name)) ax.xaxis.get_major_formatter().set_powerlimits((0, 1)) ax.yaxis.get_major_formatter().set_powerlimits((0, 1)) # Show the whole color range cluster_size = map(lambda l: len(labels[labels == l]), set(labels)) sizes, groups = zip(*sorted(zip(cluster_size, set(labels)), reverse=True)) #print sizes, groups for order_to_plot, group in enumerate(list(groups)[-(len(groups)-omit_largest):]): #print order_to_plot, sizes[order_to_plot], group, cluster_size[group] if group in labels_to_plot: #print 'Plotting points in group %s'%group x = eval(x_function)(data_frame[labels == group][x_name]) y = eval(y_function)(data_frame[labels == group][y_name]) p.scatter(ax, x, y, label='C%s: %s'%(group, list(sizes)[order_to_plot]), s=point_size, linewidth=0, zorder=order_to_plot, color=colors[group]) if legend: legend = p.legend(ax, loc=0, fancybox=True, markerscale=5, frameon=False) legend.set_zorder(100) #ax.set_title('prettyplotlib `scatter` example\nshowing default color cycle and scatter params') fig.savefig(filename)
def mkplot(fitness): mask = f[fitness] < f[fitness].quantile(0.95) mask &= f.overshoot < 5 masked_fit = f[mask][fitness] masked_par = p[mask] counts, bins = np.histogram(masked_fit, bins = 50) ws = (bins[1] - bins[0]) means = concat(map(lambda bin: masked_par[(masked_fit > bin - ws) & (masked_fit < bin + ws)].mean(), bins), axis=1).transpose() means_as_list = map(lambda x: list(means[x]), means.columns) xlabel = fl(fitness) ylabel = '' legend_labels = map(fl, means.columns) filename = folder + '%s.png'%fitness multiline_xy_plot(bins, means_as_list, xlabel, ylabel, legend_labels, filename)
def calc_and_plot(ratio_direction): for fitness in f.columns: ssmm_ys = list() sc_ys = list() legend_labels = list() for ratio_lower, ratio_upper in zip_to_tuples(ratio_range): ratio_mask = (ratio_lower < p.ratio) & (p.ratio < ratio_upper) ssmm_lat_range = concat(map(lambda l: f[get_ssmmlat_mask(l,l+20) & ratio_mask].mean(), ssmmlatencyrange), axis=1).transpose() ssmm_ys.append(ssmm_lat_range[fitness]) sc_lat_range = concat(map(lambda l: f[get_sclat_mask(l,l+20) & ratio_mask].mean(), sclatencyrange), axis=1).transpose() sc_ys.append(sc_lat_range[fitness]) legend_labels.append(r'$\displaystyle %s < %s < %s$'%(round(ratio_lower,1), fl(ratio_direction, mathmode = False), round(ratio_upper,1))) filename = '%s_%s_%s_mmlatency.png'%(folder, ratio_direction, fitness) multiline_xy_plot(ssmm_lat_range.index, ssmm_ys, xlabel = 'ssmm_latency_mu', ylabel = fitness, legend_labels = legend_labels, filename = filename) filename = '%s_%s_%s_sclatency.png'%(folder, ratio_direction, fitness) multiline_xy_plot(sc_lat_range.index, sc_ys, xlabel = 'sc_latency_mu', ylabel = fitness, legend_labels = legend_labels, filename = filename)
sys.path.append( os.path.dirname(os.path.dirname(os.path.dirname( os.path.abspath(__file__))))) import utils ''' File used to build the Kernel between songs: songs are represented with playlists they belongs to. In each element of the kernel matrix we will have the cosine similarity between representation of two songs ''' #s2p = utils.jload("../data/validation/s2p.json") s2p = utils.jload("./s2p.json") for s in s2p: s2p[s] = set(s2p[s]) print "Creating s2d...", utils.fl() s2d = {} for ss in s2p: s = int(ss) if s not in s2d: s2d[s] = {} for p in s2p[ss]: if p not in s2d[s]: s2d[s][p] = 1. else: s2d[s][p] += 1. print "done!" utils.fl() print "Creating norms...", utils.fl()
def train(self, test_users=None, bucket=None): t = time.time() self.model = {} Kar = self.Klist[1].todense() Kal = self.Klist[2].todense() self.test_users = test_users if test_users is None: self.test_users = range(self.n_users) if bucket is not None: tu = {u: set(self.test_users[u][:bucket]) for u in self.test_users} else: tu = {u: set(self.test_users[u]) for u in self.test_users} for i, u in enumerate(tu): if self.verbose and (i + 1) % 10 == 0: print("%d/%d" % (i + 1, len(self.test_users))) print(str(time.time() - t)) utils.fl() Xp = [int(self.s2k[idx]) for idx in tu[u]] idxar = [self.k2ar[p] for p in Xp] idxal = [self.k2al[p] for p in Xp] npos = len(Xp) kp = np.zeros((npos, npos)) for j in range(npos): kp[j] = self.mu[0] * self.Klist[0][Xp[j], Xp].todense().reshape( (npos, )) kp[j] = kp[j] + self.mu[1] * Kar[idxar[j], idxar].reshape( (npos, )) kp[j] = kp[j] + self.mu[2] * Kal[idxal[j], idxal].reshape( (npos, )) kp = co.matrix(kp) kn = self.q_[Xp, :] I = self.lambda_p * utc.identity(npos) P = kp + I q = -kn G = -utc.identity(npos) h = utc.zeroes_vec(npos) A = utc.ones_vec(npos).T b = co.matrix(1.0) solver.options['show_progress'] = False sol = solver.qp(P, q, G, h, A, b) K = co.matrix(self.mu[0] * self.Klist[0][Xp, :].todense()) idxar = np.array(idxar).reshape((len(idxar), 1)) idxal = np.array(idxal).reshape((len(idxal), 1)) K = K + co.matrix(self.mu[1] * Kar[idxar, self.k2ar]) K = K + co.matrix(self.mu[2] * Kal[idxal, self.k2al]) self.model[u] = (K.T * sol['x']) - self.q_ return self
def train(self, test_users=None, b=None): t = time.time() self.model = {} self.test_users = test_users if test_users is None: test_users = range(self.n_users) tu = {} if b is not None: for u in test_users: tu[u] = set(test_users[u][:b]) else: for u in test_users: tu[u] = set(test_users[u]) for i, u in enumerate(tu): if self.verbose and (i + 1) % 10 == 0: print("%d/%d" % (i + 1, len(test_users))) print(str(time.time() - t)) utils.fl() Xp = [int(self.K_indexing[str(idx)]) for idx in tu[u]] npos = len(Xp) kp = np.zeros((npos, npos)) for j in range(npos): kp[j] = self.K[Xp[j], Xp].todense() ''' for row in range(kp.shape[0]): x = len(self.u2i[int(self.inverse_indexing[Xp[row]])]) sx = math.sqrt(self.identity_disjunctive(self.N, x)) xN = (x / N) for col in range(row+1, kp.shape[0]): if kp[row,col] == 0.: y = len(self.u2i[int(self.inverse_indexing[Xp[col]])]) sz = math.sqrt(self.identity_disjunctive(self.N, z)) kp[row,col] = (2. / (sx*sz)) * xN * (z / (N-1.)) kp[row,col] = kp[col,row] ''' for row in range(kp.shape[0]): x = len(self.u2i[self.inverse_indexing[Xp[row]]]) for col in range(row + 1, kp.shape[0]): if kp[row, col] == 0.: z = len(self.u2i[self.inverse_indexing[Xp[col]]]) mx, mn = (x, z) if x >= z else (z, x) kp[row, col] = self.imp_dic["%d,%d" % (mx, mn)] kp[col, row] = kp[row, col] kp = co.matrix(kp) kn = self.q_[Xp, :] I = self.lambda_p * utc.identity(npos) P = kp + I q = -kn G = -utc.identity(npos) h = utc.zeroes_vec(npos) A = utc.ones_vec(npos).T b = co.matrix(1.0) solver.options['show_progress'] = False sol = solver.qp(P, q, G, h, A, b) K_imp = co.matrix(self.K[Xp, :].todense()) for row in range(K_imp.size[0]): x = len(self.u2i[self.inverse_indexing[Xp[row]]]) for col in range(K_imp.size[1]): if K_imp[row, col] == 0.: z = len(self.u2i[self.inverse_indexing[col]]) mx, mn = (x, z) if x >= z else (z, x) K_imp[row, col] = self.imp_dic["%d,%d" % (mx, mn)] self.model[u] = (K_imp.T * sol['x']) - self.q_ #import numpy.random as rnd #with open("../files/"+rnd.randint()+".txt") as F: # endfor return self
def train(self, test_users=None, b=None, test_titles=None, c=0): self.model = {} self.test_users = test_users self.c = c if test_users is None: test_users = range(self.n_users) tu = {} if b is not None: for u in test_users: tu[u] = set(test_users[u][:b]) else: for u in test_users: tu[u] = set(test_users[u]) for i, u in enumerate(tu): t = time.time() beta = self.Beta[test_titles[u]] beta_K = self.K.dot(beta.T).todense() if self.verbose and (i + 1) % 1 == 0: print("%d/%d" % (i + 1, len(test_users))) utils.fl() Xp = np.array([idx for idx in tu[u]]) npos = len(Xp) + 1 kp = np.zeros((npos, npos)) #print(self.K[Xp[:,None],Xp].todense()) kp[:npos - 1, :npos - 1] = self.K[Xp[:, None], Xp].todense() kp[npos - 1, :] = np.concatenate( (self.K[Xp, :].dot(beta.T).todense(), np.ones( (1, 1)))).flatten() kp[:npos - 1, npos - 1] = kp[npos - 1, :npos - 1] q0 = (co.matrix(beta.todense()) * self.q_)[0, 0] kp = co.matrix(kp) kn = co.matrix( np.concatenate( (np.array(self.q_[list(Xp), :]), np.array([[q0]])))) I = self.lambda_p * utc.identity(npos) P = kp + I q = -kn G = np.vstack((-np.eye(npos), np.zeros(npos))) G[npos, -1] = 1 G = co.matrix(G) h = np.zeros(npos + 1) h[-1] = self.c h = co.matrix(h) A = utc.ones_vec(npos).T b = co.matrix(1.0) solver.options['show_progress'] = False sol = solver.qp(P, q, G, h, A, b) self.model[u] = np.array((self.K[Xp, :].T).dot(sol['x'][:-1]) + beta_K * sol['x'][-1] - self.q_).flatten() #print self.model[u] print(time.time() - t) # endfor return self