def cf(self, estimator='landy-szalay', n_iter=1, clobber=False, random_oversample=None, save_steps_file=None, name='cf'): #This uses the info we have plus the astroML correlation package # to compute the angular correlation function. #The idea is that this function will figure out what information # is available and call the appropriate (most efficient) function # with all the relevant information. #This function will store the values it calculates for missing info if (name in self._cfs.keys()) and not clobber: raise ValueError("CorrelationFunction.cf says: There's already" " a CF by that name. Please choose another or " "overwrite by calling with clobber=True") #Make sure that we have everything we need and fix anything missing that's fixable self.__check_cf_setup(random_oversample=random_oversample, need_subregions=False, check_trees=True) #Make a new CorrelationFunction instance and set the basic info #First make a dictionary of the arguments to pass because it's ugly info={'name' : name, 'cf_type' : 'no_error', 'ngals' : self._n_objects, 'theta_bin_object' : copy.deepcopy(self._theta_bins), 'estimator' : estimator } self._cfs[name] = cfclass.CorrelationFunction(**info) centers, edges = self._cfs[name].get_thetas(unit='degrees') nbins=len(centers) #Do the calculation cf=np.zeros(nbins) DD=np.zeros(nbins) print "AngularCatalog.cf says: doing a CF calculation without error estimation" iterations={} for it in np.arange(n_iter): this_cf, this_dd = corr.two_point_angular(self._ra[self._use], self._dec[self._use], edges, BT_D=self._data_tree, BT_R=self._random_tree, method=estimator, ra_R=self._ra_random, dec_R=self._dec_random, return_DD=True) iterations[it]=this_cf cf += this_cf DD = this_dd/2. if save_steps_file is not None: self._cfs[name].set_cf(cf/(it+1), np.zeros(nbins), iterations=iterations) self._cfs[name].set_DD(DD) self.save_cf(save_steps_file, cf_keys=name) if n_iter >1: self.generate_random_sample() #Divide out the number of iterations cf/=n_iter #Make sure we've stored everything properly even if we're not saving self._cfs[name].set_cf(cf, np.zeros(nbins), iterations=iterations)
def cf_block_bootstrap(self, n_boots=10, ignore_regions=None, estimator='landy-szalay', random_oversample=None, bootstrap_oversample=1, save_steps_file=None, name='block_bootstrap', clobber=False): #Use the subdivided mask to bootstrap on blocks rather than #single galaxies. if (name in self._cfs.keys()) and not clobber: raise ValueError("CorrelationFunction.cf_block_bootstrap says: " "There's already a CF by that name. Please " "choose another or overwrite by calling with " "clobber=True") #Check to make sure I have everything that I need self.__check_cf_setup(masked=True, need_subregions=True, random_oversample=random_oversample, check_trees=False) #Make a new CorrelationFunction instance and set the basic info #First make a dictionary of the arguments to pass because it's ugly info={'name' : name, 'cf_type' : 'jackknife', 'ngals' : self._n_objects, 'theta_bin_object' : copy.deepcopy(self._theta_bins), 'estimator' : estimator } self._cfs[name] = cfclass.CorrelationFunction(**info) centers, edges = self._cfs[name].get_thetas(unit='degrees') nbins = len(centers) print "block boots done with setup" #Figure out which subregions we should be using regions=np.asarray(list(set(self._subregion_number))) use_regions=[r for r in regions if (r not in ignore_regions) and (r != -1)] use_regions=np.array(use_regions) #Figure out where the randoms are random_subregions=self._image_mask.return_subregions(self._ra_random, self._dec_random) #Make a dictionary of arrays containing the indices of the members of each sub-region we need indices={} random_indices={} for r in use_regions: indices[r]=np.where(self._subregion_number == r)[0] random_indices[r]=np.where(random_subregions == r)[0] #Loop through the bootstraps block_bootstrap_boots={} n_choose=len(use_regions)*bootstrap_oversample temp = np.zeros((n_boots, nbins)) print "block boots looping through boots" for i in np.arange(n_boots): this_boot=rand.choice(use_regions, size=n_choose) this_boot_indices=np.array([], dtype=np.int) this_boot_random_indices=np.array([], dtype=np.int) for region in this_boot: this_boot_indices=np.concatenate((this_boot_indices, indices[region])) this_boot_random_indices=np.concatenate((this_boot_random_indices, random_indices[region])) # this_boot_indices=np.array( print "calculating boot", i temp[i] = corr.two_point_angular(self._ra[this_boot_indices], self._dec[this_boot_indices], edges, method=estimator, ra_R=self._ra_random[this_boot_random_indices], dec_R=self._dec_random[this_boot_random_indices]) block_bootstrap_boots[i] = temp[i] cf=np.nanmean(temp[0:i+1], axis=0) cf_err=np.nanstd(temp[0:i+1], axis=0) self._cfs[name].set_cf(cf, cf_err, iterations=bootstrap_boots) if (save_steps_file is not None): self.save_cfs(save_steps_file, cfkeys=[name])
def cf_jackknife(self, ignore_regions=[], estimator='landy-szalay', random_oversample=None, save_steps_file=None, name='jackknife', clobber=False): #This takes a divided mask and performs the correlation #function calculation on the field with each sub-region #removed in turn. if (name in self._cfs.keys()) and not clobber: raise ValueError("CorrelationFunction.cf_jackknife says: " "There's already a CF by that name. Please " "choose another or overwrite by calling with " "clobber=True") #Check to make sure we have everything we need self.__check_cf_setup(need_subregions=True, check_trees=False, random_oversample=random_oversample) #Make a new CorrelationFunction instance and set the basic info #First make a dictionary of the arguments to pass because it's ugly info={'name' : name, 'cf_type' : 'jackknife', 'ngals' : self._n_objects, 'theta_bin_object' : copy.deepcopy(self._theta_bins), 'estimator' : estimator } self._cfs[name] = cfclass.CorrelationFunction(**info) centers, edges = self._cfs[name].get_thetas(unit='degrees') #pull out the unique subregion numbers and figure out which to use regions=np.asarray(list(set(self._subregion_number))) use_regions=[r for r in regions if (r not in ignore_regions) and (r != -1)] use_regions=np.array(use_regions) n_jacks=len(use_regions) #Figure out where the randoms are random_subregions=self._image_mask.return_subregions(self._ra_random, self._dec_random) #Now loop through the regions that you should be using #and calculate the correlation function leaving out each jackknife_jacks = {} #Make a mask that takes out all the galaxies that aren't in use_regions valid_subregion = ma.masked_not_equal(self._subregion_number, -1).mask random_valid_subregion=ma.masked_not_equal(random_subregions, -1).mask for bad_reg in ignore_regions: this_mask = ma.masked_not_equal(self._subregion_number, bad_reg).mask valid_subregion = valid_subregion & this_mask this_mask = ma.masked_not_equal(random_subregions, bad_reg).mask random_valid_subregion = random_valid_subregion & this_mask temp = np.zeros((n_jacks, len(self._cf_thetas))) for i, r in enumerate(use_regions): #Make the mask for the data not_region_r = ma.masked_not_equal(self._subregion_number, r).mask this_jackknife = valid_subregion & not_region_r & self._use #Make the mask for the randoms random_not_region_r = ma.masked_not_equal(random_subregions, r).mask random_this_jackknife = random_not_region_r & random_valid_subregion #Do the calculation for this jackknife and store it print "calculating jackknife", i jackknife_jacks[r] = corr.two_point_angular(self._ra[this_jackknife], self._dec[this_jackknife], edges, method=estimator, ra_R = self._ra_random[random_this_jackknife], dec_R = self._dec_random[random_this_jackknife]) temp[i]=jackknife_jacks[r] if (save_steps_file is not None): jackknife_cf=np.nanmean(temp[0:i+1], axis=0) jackknife_cf_err=np.nanstd(temp[0:i+1], axis=0) self._cfs[name].set_cf(jackknife_cf, jackknife_cf_err, iterations=bootstrap_boots) self.save_cfs(save_steps_file, cf_keys=[name]) #Now that we have all of the jackknifes (jackknives?), calculate the mean # and variance. jackknife_cf=np.nanmean(temp, axis=0) jackknife_cf_err=np.nanstd(temp, axis=0) self._cfs[name].set_cf(jackknife_cf, jackknife_cf_err, iterations=bootstrap_boots)
def cf_bootstrap(self, n_boots=10, bootstrap_oversample=1, random_oversample=None, estimator='landy-szalay', save_steps_file=None, name='galaxy_bootstrap', clobber=False): #Calculate the correlation function with single-galaxy bootstrapping if (name in self._cfs.keys()) and not clobber: raise ValueError("CorrelationFunction.cf_bootstrap says: " "There's already a CF by that name. Please " "choose another or overwrite by calling with " "clobber=True") #Check that everything is set up self.__check_cf_setup(need_subregions=False, check_trees=False, random_oversample=random_oversample) #Make a new CorrelationFunction instance and set the basic info #First make a dictionary of the arguments to pass because it's ugly info={'name' : name, 'cf_type' : 'single_galaxy_bootstrap', 'ngals' : self._n_objects, 'theta_bin_object' : copy.deepcopy(self._theta_bins), 'estimator' : estimator } self._cfs[name] = cfclass.CorrelationFunction(**info) centers, edges = self._cfs[name].get_thetas(unit='degrees') nbins=len(centers) #Make an array so it's easy to average over the boots temp = np.zeros((n_boots, nbins)) #This RR will keep track of the RR counts so you don't have to #calculate them every time. rr=None #A holder for the boots that will be passed to the #CorrelationFunction as the iterations bootstrap_boots={} print ("AngularCatalog.cf_bootstrap says: doing a bootstrap " "CF calculation") #Loop through the boots for i in np.arange(n_boots): #Give a progress report print "calculating boot", i #Choose the right number of galaxies *with replacement* ind=np.random.randint(0, self._n_objects, bootstrap_oversample*self._n_objects) ra_b=self._ra[self._use][ind] dc_b=self._dec[self._use][ind] #Calculate this boot bootstrap_boots[i], rr = corr.two_point_angular(ra_b, dec_b, edges, BT_D=self._data_tree, BT_R=self._random_tree, method=estimator, ra_R=self._ra_random, dec_R=self._dec_random, RR=rr, return_RR=True) #Store what we have temp[i]=bootstrap_boots[i] if (save_steps_file is not None): bootstrap_cf=np.nanmean(temp[0:i+1], axis=0) bootstrap_cf_err=np.nanstd(temp[0:i+1], axis=0) self.save_cfs(save_steps_file, cf_keys=[name]) #Now we're done- do the final storage. bootstrap_cf=np.nanmean(temp, axis=0) bootstrap_cf_err=np.nanstd(temp, axis=0) self._cfs[name].set_cf(bootstrap_cf, bootstrap_cf_err, iterations=bootstrap_boots) self._cfs[name].set_counts(RR=rr)