def get_unique_locations(obs_coords_0, obs_coords_1, mu_0=None, mu_1=None): if issparse(obs_coords_0) or issparse(obs_coords_1): uidxs_0 = [] pref_vu = [] for r, row in enumerate(obs_coords_0): print("%i out of %i" % (r, obs_coords_0.shape[0])) idx = row == obs_coords_0[uidxs_0] if not np.sum(idx): uidxs_0.append(r) pref_vu.append(len(uidxs_0) - 1) else: pref_vu.append(np.argwhere(idx)[0]) len_0 = obs_coords_0.shape[0] uidxs_1 = [] for r, row in enumerate(obs_coords_1): print("%i out of %i" % (r, obs_coords_0.shape[0])) idx = row == obs_coords_0[uidxs_0] if not np.sum(idx): idx = row == obs_coords_1[uidxs_1] if not np.sum(idx): uidxs_1.append(r + len_0) pref_vu.append(len(uidxs_1) - 1) else: pref_vu.append(np.argwhere(idx)[0] + len_0) else: pref_vu.append(np.argwhere(idx)[0]) # convert urows to a sparse matrix obs_coords = hstack((obs_coords_0[uidxs_0], obs_coords_1[uidxs_1]), format='csc') uidxs = np.concatenate((uidxs_0, np.array(uidxs_1) + len_0)) else: coord_rows_0 = coord_arr_to_1d(obs_coords_0) coord_rows_1 = coord_arr_to_1d(obs_coords_1) all_coord_rows = np.concatenate((coord_rows_0, coord_rows_1), axis=0) _, uidxs, pref_vu = np.unique( all_coord_rows, return_index=True, return_inverse=True) # get unique locations # Record the coordinates of all points that were compared obs_coords = np.concatenate((obs_coords_0, obs_coords_1), axis=0)[uidxs] # Record the indexes into the list of coordinates for the pairs that were compared pref_v = pref_vu[:obs_coords_0.shape[0]] pref_u = pref_vu[obs_coords_0.shape[0]:] if mu_0 is not None and mu_1 is not None: mu_vu = np.concatenate((mu_0, mu_1), axis=0)[uidxs] return obs_coords, pref_v, pref_u, mu_vu else: return obs_coords, pref_v, pref_u
def _count_observations(self, obs_coords, _, poscounts, totals): ''' obs_coords - a tuple with two elements, the first containing the list of coordinates for the first items in each pair, and the second containing the coordinates of the second item in the pair. ''' obs_coords_0 = np.array(obs_coords[0]) obs_coords_1 = np.array(obs_coords[1]) if obs_coords_0.ndim == 1: obs_coords_0 = obs_coords_0[:, np.newaxis] if obs_coords_1.ndim == 1: obs_coords_1 = obs_coords_1[:, np.newaxis] # duplicate locations should be merged and the number of duplicates counted #poscounts = poscounts.astype(int) totals = totals.astype(int) if self.features is not None: self.obs_uidxs = np.arange(self.features.shape[0]) self.pref_v = obs_coords_0.flatten() self.pref_u = obs_coords_1.flatten() self.n_obs = len(self.pref_v) self.obs_coords = self.features return poscounts, totals else: # TODO: This code could be merged with get_unique_locations() ravelled_coords_0 = coord_arr_to_1d(obs_coords_0)# Ravel the coordinates ravelled_coords_1 = coord_arr_to_1d(obs_coords_1) # get unique keys all_ravelled_coords = np.concatenate((ravelled_coords_0, ravelled_coords_1), axis=0) uravelled_coords, origidxs, keys = np.unique(all_ravelled_coords, return_index=True, return_inverse=True) keys_0 = keys[:len(ravelled_coords_0)] keys_1 = keys[len(ravelled_coords_0):] # SWAP PAIRS SO THEY ALL HAVE LOWEST COORD FIRST so we can count prefs for duplicate location pairs idxs_to_swap = keys_0 < keys_1 swap_coords_0 = keys_0[idxs_to_swap] poscounts[idxs_to_swap] = totals[idxs_to_swap] - poscounts[idxs_to_swap] keys_0[idxs_to_swap] = keys_1[idxs_to_swap] keys_1[idxs_to_swap] = swap_coords_0 grid_obs_counts = coo_matrix((totals, (keys_0, keys_1)) ).toarray() grid_obs_pos_counts = coo_matrix((poscounts, (keys_0, keys_1)) ).toarray() nonzero_v, nonzero_u = grid_obs_counts.nonzero() # coordinate key pairs with duplicate pairs removed nonzero_all = np.concatenate((nonzero_v, nonzero_u), axis=0) ukeys, pref_vu = np.unique(nonzero_all, return_inverse=True) # get unique locations self.obs_uidxs = origidxs[ukeys] # indexes of unique observation locations into the original input data # Record the coordinates of all points that were compared self.obs_coords = coord_arr_from_1d(uravelled_coords[ukeys], obs_coords_0.dtype, dims=(len(ukeys), obs_coords_0.shape[1])) # Record the indexes into the list of coordinates for the pairs that were compared self.pref_v = pref_vu[:len(nonzero_v)] self.pref_u = pref_vu[len(nonzero_v):] self.n_obs = len(self.pref_v) # Return the counts for each of the observed pairs pos_counts = grid_obs_pos_counts[nonzero_v, nonzero_u] total_counts = grid_obs_counts[nonzero_v, nonzero_u] return pos_counts, total_counts
for modelkey in models: model = models[modelkey] print(("--- Running model %s ---" % modelkey)) model.fit(pair1idxs[trainidxs], pair2idxs[trainidxs], item_features, prefs[trainidxs], optimize=False) print(("Final lower bound: %f" % model.lowerbound())) f_means[modelkey] = model.obs_f # Predict at all locations fpred, vpred = model.predict_f(item_features) # Compare the observation point values with the ground truth obs_coords_1d = coord_arr_to_1d(model.obs_coords) test_coords_1d = coord_arr_to_1d(item_features) f_obs = [f[(test_coords_1d==obs_coords_1d[i]).flatten()][0] for i in range(model.obs_coords.shape[0])] print(("Kendall's tau (observations): %.3f" % kendalltau(f_obs, model.obs_f.flatten())[0])) # Evaluate the accuracy of the predictions #print("RMSE of %f" % np.sqrt(np.mean((f-fpred)**2)) #print("NLPD of %f" % -np.sum(norm.logpdf(f, loc=fpred, scale=vpred**0.5)) print(("Kendall's tau (test): %.3f" % kendalltau(f_test, fpred)[0] )) t = (f[pair1idxs[testidxs]] > f[pair2idxs[testidxs]]).astype(int) rho_pred, var_rho_pred = model.predict(item_features, pair1idxs[testidxs], pair2idxs[testidxs]) rho_pred = rho_pred.flatten() t_pred = np.round(rho_pred) print(("Brier score of %.3f" % np.sqrt(np.mean((t-rho_pred)**2))))
# make sure the simulation is repeatable if fix_seeds: np.random.seed(1) N, nx, ny, labels, xvals, yvals, f, K = gen_synthetic_classifications() # separate training and test data Ctest = int(len(labels) * 0.1) testids = np.random.choice(labels.shape[0], Ctest, replace=False) testidxs = np.zeros(labels.shape[0], dtype=bool) testidxs[testids] = True trainidxs = np.invert(testidxs) xvals_test = xvals[testidxs].flatten() yvals_test = yvals[testidxs].flatten() _, uidxs = np.unique(coord_arr_to_1d(np.concatenate((xvals_test[:, np.newaxis], yvals_test[:, np.newaxis]), axis=1)), return_index=True) xvals_test = xvals_test[uidxs][:, np.newaxis] yvals_test = yvals_test[uidxs][:, np.newaxis] f_test = f[testidxs][uidxs] models = {} ls_initial = [112]#np.random.randint(1, 100, 2)#[10, 10] model = GPClassifierVB(2, z0=0.5, shape_s0=1, rate_s0=1, ls_initial=ls_initial) #model.verbose = True model.max_iter_VB = 1000 model.min_iter_VB = 5 model.uselowerbound = True model.delay = 1
# make sure the simulation is repeatable if fix_seeds: np.random.seed(1) N, nx, ny, labels, xvals, yvals, f, K = gen_synthetic_classifications() # separate training and test data Ctest = int(len(labels) * 0.1) testids = np.random.choice(labels.shape[0], Ctest, replace=False) testidxs = np.zeros(labels.shape[0], dtype=bool) testidxs[testids] = True trainidxs = np.invert(testidxs) xvals_test = xvals[testidxs].flatten() yvals_test = yvals[testidxs].flatten() _, uidxs = np.unique(coord_arr_to_1d(np.concatenate((xvals_test[:, np.newaxis], yvals_test[:, np.newaxis]), axis=1)), return_index=True) xvals_test = xvals_test[uidxs][:, np.newaxis] yvals_test = yvals_test[uidxs][:, np.newaxis] f_test = f[testidxs][uidxs] models = {} ls_initial = [112]#np.random.randint(1, 100, 2)#[10, 10] model = GPClassifierVB(2, z0=0.5, shape_s0=1, rate_s0=1, ls_initial=ls_initial) model.verbose = True model.max_iter_VB = 1000 model.min_iter_VB = 5 model.uselowerbound = True model.delay = 1