def logp(populations=self.populations,mu=self.mu): return -0.5 * get_chi2(populations, self.predictions, self.measurements, self.uncertainties, mu=mu)
def cross_validated_mcmc(predictions, measurements, uncertainties, model_factory, bootstrap_index_list, num_samples=50000, burn=None, thin=1): """Fit model on training data, evaluate on test data, and return the chi squared. Parameters ---------- predictions : ndarray, shape = (num_frames, num_measurements) predictions[j, i] gives the ith observabled predicted at frame j measurements : ndarray, shape = (num_measurements) measurements[i] gives the ith experimental measurement uncertainties : ndarray, shape = (num_measurements) uncertainties[i] gives the uncertainty of the ith experiment model_factory : lambda function A function that takes as input predictions, measurements, and uncertainties and generates a BELT model. bootstrap_index_list : list (of integer numpy arrays) bootstrap_index_list is a list numpy arrays of frame indices that mark the different training and test sets. With a single trajectory, bootstrap_index_list will look something like the following [np.array([0,1,2,... , n/2]), np.array([n / 2 + 1, ..., n - 1])] Returns ------- train_chi, test_chi : float Training and test scores of cross validated models. """ if burn is None: burn = num_samples / 2 all_indices = np.concatenate(bootstrap_index_list) test_chi = [] train_chi = [] for j, test_ind in enumerate( bootstrap_index_list ): # The test indices are input as the kfold splits of the data. train_ind = np.setdiff1d( all_indices, test_ind ) # The train data is ALL the rest of the data. Thus, train > test. test_data = predictions[test_ind].copy() train_data = predictions[train_ind].copy() test_prior_pops = np.ones_like(test_data[:, 0]) test_prior_pops /= test_prior_pops.sum() print("Building model for %d round of cross validation." % j) model = model_factory(train_data, measurements, uncertainties) model.sample(num_samples, burn=burn, thin=thin) train_chi2_j = [] # Calculate the chi2 error on training data for alpha in model.mcmc.trace("alpha"): p = get_populations_from_alpha( alpha, train_data, model.prior_pops) # Training set prior_pops has correct shape chi2 = get_chi2(p, train_data, measurements, uncertainties) train_chi2_j.append(chi2) test_chi2_j = [] # Calculate the chi2 error on test data for alpha in model.mcmc.trace("alpha"): p = get_populations_from_alpha( alpha, test_data, test_prior_pops) # Training set prior_pops has correct shape chi2 = get_chi2(p, test_data, measurements, uncertainties) test_chi2_j.append(chi2) test_chi.append(np.mean(test_chi2_j)) train_chi.append(np.mean(train_chi2_j)) test_chi = np.array(test_chi) train_chi = np.array(train_chi) return train_chi, test_chi
def cross_validated_mcmc(predictions, measurements, uncertainties, model_factory, bootstrap_index_list, num_samples=50000, burn=None,thin=1): """Fit model on training data, evaluate on test data, and return the chi squared. Parameters ---------- predictions : ndarray, shape = (num_frames, num_measurements) predictions[j, i] gives the ith observabled predicted at frame j measurements : ndarray, shape = (num_measurements) measurements[i] gives the ith experimental measurement uncertainties : ndarray, shape = (num_measurements) uncertainties[i] gives the uncertainty of the ith experiment model_factory : lambda function A function that takes as input predictions, measurements, and uncertainties and generates a BELT model. bootstrap_index_list : list (of integer numpy arrays) bootstrap_index_list is a list numpy arrays of frame indices that mark the different training and test sets. With a single trajectory, bootstrap_index_list will look something like the following [np.array([0,1,2,... , n/2]), np.array([n / 2 + 1, ..., n - 1])] Returns ------- train_chi, test_chi : float Training and test scores of cross validated models. """ if burn is None: burn = num_samples/2 all_indices = np.concatenate(bootstrap_index_list) test_chi = [] train_chi = [] for j, test_ind in enumerate(bootstrap_index_list): # The test indices are input as the kfold splits of the data. train_ind = np.setdiff1d(all_indices,test_ind) # The train data is ALL the rest of the data. Thus, train > test. test_data = predictions[test_ind].copy() train_data = predictions[train_ind].copy() test_prior_pops = np.ones_like(test_data[:,0]) test_prior_pops /= test_prior_pops.sum() print("Building model for %d round of cross validation." % j) model = model_factory(train_data, measurements, uncertainties) model.sample(num_samples, burn=burn, thin=thin) train_chi2_j = [] # Calculate the chi2 error on training data for alpha in model.mcmc.trace("alpha"): p = get_populations_from_alpha(alpha, train_data, model.prior_pops) # Training set prior_pops has correct shape chi2 = get_chi2(p, train_data, measurements, uncertainties) train_chi2_j.append(chi2) test_chi2_j = [] # Calculate the chi2 error on test data for alpha in model.mcmc.trace("alpha"): p = get_populations_from_alpha(alpha, test_data, test_prior_pops) # Training set prior_pops has correct shape chi2 = get_chi2(p, test_data, measurements, uncertainties) test_chi2_j.append(chi2) test_chi.append(np.mean(test_chi2_j)) train_chi.append(np.mean(train_chi2_j)) test_chi = np.array(test_chi) train_chi = np.array(train_chi) return train_chi, test_chi
def logp(populations=self.populations, mu=self.mu): return -0.5 * get_chi2(populations, self.predictions, self.measurements, self.uncertainties, mu=mu)