Esempio n. 1
0
def get_unique_locations(obs_coords_0, obs_coords_1, mu_0=None, mu_1=None):
    if issparse(obs_coords_0) or issparse(obs_coords_1):
        uidxs_0 = []
        pref_vu = []

        for r, row in enumerate(obs_coords_0):
            print("%i out of %i" % (r, obs_coords_0.shape[0]))
            idx = row == obs_coords_0[uidxs_0]
            if not np.sum(idx):
                uidxs_0.append(r)
                pref_vu.append(len(uidxs_0) - 1)
            else:
                pref_vu.append(np.argwhere(idx)[0])

        len_0 = obs_coords_0.shape[0]
        uidxs_1 = []
        for r, row in enumerate(obs_coords_1):
            print("%i out of %i" % (r, obs_coords_0.shape[0]))
            idx = row == obs_coords_0[uidxs_0]
            if not np.sum(idx):
                idx = row == obs_coords_1[uidxs_1]
                if not np.sum(idx):
                    uidxs_1.append(r + len_0)
                    pref_vu.append(len(uidxs_1) - 1)
                else:
                    pref_vu.append(np.argwhere(idx)[0] + len_0)
            else:
                pref_vu.append(np.argwhere(idx)[0])

        # convert urows to a sparse matrix
        obs_coords = hstack((obs_coords_0[uidxs_0], obs_coords_1[uidxs_1]),
                            format='csc')
        uidxs = np.concatenate((uidxs_0, np.array(uidxs_1) + len_0))
    else:
        coord_rows_0 = coord_arr_to_1d(obs_coords_0)
        coord_rows_1 = coord_arr_to_1d(obs_coords_1)
        all_coord_rows = np.concatenate((coord_rows_0, coord_rows_1), axis=0)
        _, uidxs, pref_vu = np.unique(
            all_coord_rows, return_index=True,
            return_inverse=True)  # get unique locations

        # Record the coordinates of all points that were compared
        obs_coords = np.concatenate((obs_coords_0, obs_coords_1),
                                    axis=0)[uidxs]

    # Record the indexes into the list of coordinates for the pairs that were compared
    pref_v = pref_vu[:obs_coords_0.shape[0]]
    pref_u = pref_vu[obs_coords_0.shape[0]:]

    if mu_0 is not None and mu_1 is not None:
        mu_vu = np.concatenate((mu_0, mu_1), axis=0)[uidxs]
        return obs_coords, pref_v, pref_u, mu_vu
    else:
        return obs_coords, pref_v, pref_u
Esempio n. 2
0
    def _count_observations(self, obs_coords, _, poscounts, totals):
        '''
        obs_coords - a tuple with two elements, the first containing the list of coordinates for the first items in each
        pair, and the second containing the coordinates of the second item in the pair.
        '''
        obs_coords_0 = np.array(obs_coords[0])
        obs_coords_1 = np.array(obs_coords[1])
        if obs_coords_0.ndim == 1:
            obs_coords_0 = obs_coords_0[:, np.newaxis]
        if obs_coords_1.ndim == 1:
            obs_coords_1 = obs_coords_1[:, np.newaxis]

        # duplicate locations should be merged and the number of duplicates counted
        #poscounts = poscounts.astype(int)
        totals = totals.astype(int)

        if self.features is not None:
            self.obs_uidxs = np.arange(self.features.shape[0])
            self.pref_v = obs_coords_0.flatten()
            self.pref_u = obs_coords_1.flatten()
            self.n_obs = len(self.pref_v)
            self.obs_coords = self.features
            return poscounts, totals
        else:
            # TODO: This code could be merged with get_unique_locations()
            ravelled_coords_0 = coord_arr_to_1d(obs_coords_0)# Ravel the coordinates
            ravelled_coords_1 = coord_arr_to_1d(obs_coords_1)

            # get unique keys
            all_ravelled_coords = np.concatenate((ravelled_coords_0, ravelled_coords_1), axis=0)
            uravelled_coords, origidxs, keys = np.unique(all_ravelled_coords, return_index=True, return_inverse=True)

            keys_0 = keys[:len(ravelled_coords_0)]
            keys_1 = keys[len(ravelled_coords_0):]

            # SWAP PAIRS SO THEY ALL HAVE LOWEST COORD FIRST so we can count prefs for duplicate location pairs
            idxs_to_swap = keys_0 < keys_1
            swap_coords_0 = keys_0[idxs_to_swap]
            poscounts[idxs_to_swap] = totals[idxs_to_swap] - poscounts[idxs_to_swap]

            keys_0[idxs_to_swap] = keys_1[idxs_to_swap]
            keys_1[idxs_to_swap] = swap_coords_0

            grid_obs_counts = coo_matrix((totals, (keys_0, keys_1)) ).toarray()
            grid_obs_pos_counts = coo_matrix((poscounts, (keys_0, keys_1)) ).toarray()

            nonzero_v, nonzero_u = grid_obs_counts.nonzero() # coordinate key pairs with duplicate pairs removed

            nonzero_all = np.concatenate((nonzero_v, nonzero_u), axis=0)
            ukeys, pref_vu = np.unique(nonzero_all, return_inverse=True) # get unique locations

            self.obs_uidxs = origidxs[ukeys] # indexes of unique observation locations into the original input data

            # Record the coordinates of all points that were compared
            self.obs_coords = coord_arr_from_1d(uravelled_coords[ukeys], obs_coords_0.dtype,
                                            dims=(len(ukeys), obs_coords_0.shape[1]))

            # Record the indexes into the list of coordinates for the pairs that were compared
            self.pref_v = pref_vu[:len(nonzero_v)]
            self.pref_u = pref_vu[len(nonzero_v):]
            self.n_obs = len(self.pref_v)

            # Return the counts for each of the observed pairs
            pos_counts = grid_obs_pos_counts[nonzero_v, nonzero_u]
            total_counts = grid_obs_counts[nonzero_v, nonzero_u]
            return pos_counts, total_counts
Esempio n. 3
0
 
 for modelkey in models:
     model = models[modelkey]
     
     print(("--- Running model %s ---" % modelkey))
     
     model.fit(pair1idxs[trainidxs], pair2idxs[trainidxs], item_features, prefs[trainidxs], optimize=False)
     print(("Final lower bound: %f" % model.lowerbound()))
     
     f_means[modelkey] = model.obs_f
     
     # Predict at all locations
     fpred, vpred = model.predict_f(item_features)
     
     # Compare the observation point values with the ground truth
     obs_coords_1d = coord_arr_to_1d(model.obs_coords)
     test_coords_1d = coord_arr_to_1d(item_features)
     f_obs = [f[(test_coords_1d==obs_coords_1d[i]).flatten()][0] for i in range(model.obs_coords.shape[0])]
     print(("Kendall's tau (observations): %.3f" % kendalltau(f_obs, model.obs_f.flatten())[0]))
         
     # Evaluate the accuracy of the predictions
     #print("RMSE of %f" % np.sqrt(np.mean((f-fpred)**2))
     #print("NLPD of %f" % -np.sum(norm.logpdf(f, loc=fpred, scale=vpred**0.5))
     print(("Kendall's tau (test): %.3f" % kendalltau(f_test, fpred)[0] ))
         
     t = (f[pair1idxs[testidxs]] > f[pair2idxs[testidxs]]).astype(int)
     rho_pred, var_rho_pred = model.predict(item_features, pair1idxs[testidxs], pair2idxs[testidxs])
     rho_pred = rho_pred.flatten()
     t_pred = np.round(rho_pred)
     
     print(("Brier score of %.3f" % np.sqrt(np.mean((t-rho_pred)**2))))
 # make sure the simulation is repeatable
 if fix_seeds:
     np.random.seed(1)
 
 N, nx, ny, labels, xvals, yvals, f, K = gen_synthetic_classifications()
     
 # separate training and test data
 Ctest = int(len(labels) * 0.1)
 testids = np.random.choice(labels.shape[0], Ctest, replace=False)
 testidxs = np.zeros(labels.shape[0], dtype=bool)
 testidxs[testids] = True
 trainidxs = np.invert(testidxs)
 
 xvals_test = xvals[testidxs].flatten()
 yvals_test = yvals[testidxs].flatten()
 _, uidxs = np.unique(coord_arr_to_1d(np.concatenate((xvals_test[:, np.newaxis], yvals_test[:, np.newaxis]), axis=1)), 
           return_index=True)
 xvals_test = xvals_test[uidxs][:, np.newaxis]
 yvals_test = yvals_test[uidxs][:, np.newaxis]
 f_test = f[testidxs][uidxs]
 
 models = {}
 
 ls_initial = [112]#np.random.randint(1, 100, 2)#[10, 10] 
 
 model = GPClassifierVB(2, z0=0.5, shape_s0=1, rate_s0=1, ls_initial=ls_initial)
 #model.verbose = True
 model.max_iter_VB = 1000
 model.min_iter_VB = 5
 model.uselowerbound = True
 model.delay = 1
Esempio n. 5
0
 # make sure the simulation is repeatable
 if fix_seeds:
     np.random.seed(1)
 
 N, nx, ny, labels, xvals, yvals, f, K = gen_synthetic_classifications()
     
 # separate training and test data
 Ctest = int(len(labels) * 0.1)
 testids = np.random.choice(labels.shape[0], Ctest, replace=False)
 testidxs = np.zeros(labels.shape[0], dtype=bool)
 testidxs[testids] = True
 trainidxs = np.invert(testidxs)
 
 xvals_test = xvals[testidxs].flatten()
 yvals_test = yvals[testidxs].flatten()
 _, uidxs = np.unique(coord_arr_to_1d(np.concatenate((xvals_test[:, np.newaxis], yvals_test[:, np.newaxis]), axis=1)), 
           return_index=True)
 xvals_test = xvals_test[uidxs][:, np.newaxis]
 yvals_test = yvals_test[uidxs][:, np.newaxis]
 f_test = f[testidxs][uidxs]
 
 models = {}
 
 ls_initial = [112]#np.random.randint(1, 100, 2)#[10, 10] 
 
 model = GPClassifierVB(2, z0=0.5, shape_s0=1, rate_s0=1, ls_initial=ls_initial)
 model.verbose = True
 model.max_iter_VB = 1000
 model.min_iter_VB = 5
 model.uselowerbound = True
 model.delay = 1