def test_fits(): try: import fitsio except ImportError: print('Skipping FITS tests, since fitsio is not installed') return get_from_wiki('Aardvark.fit') file_name = os.path.join('data','Aardvark.fit') config = treecorr.read_config('Aardvark.yaml') config['verbose'] = 1 # Just test a few random particular values cat1 = treecorr.Catalog(file_name, config) np.testing.assert_equal(len(cat1.ra), 390935) np.testing.assert_equal(cat1.nobj, 390935) np.testing.assert_almost_equal(cat1.ra[0], 56.4195 * (pi/180.)) np.testing.assert_almost_equal(cat1.ra[390934], 78.4782 * (pi/180.)) np.testing.assert_almost_equal(cat1.dec[290333], 83.1579 * (pi/180.)) np.testing.assert_almost_equal(cat1.g1[46392], 0.0005066675) np.testing.assert_almost_equal(cat1.g2[46392], -0.0001006742) np.testing.assert_almost_equal(cat1.k[46392], -0.0008628797) # The catalog doesn't have x, y, or w, but test that functionality as well. del config['ra_col'] del config['dec_col'] config['x_col'] = 'RA' config['y_col'] = 'DEC' config['w_col'] = 'MU' config['flag_col'] = 'INDEX' config['ignore_flag'] = 64 cat2 = treecorr.Catalog(file_name, config) np.testing.assert_almost_equal(cat2.x[390934], 78.4782, decimal=4) np.testing.assert_almost_equal(cat2.y[290333], 83.1579, decimal=4) np.testing.assert_almost_equal(cat2.w[46392], 0.) # index = 1200379 np.testing.assert_almost_equal(cat2.w[46393], 0.9995946) # index = 1200386 # Test using a limited set of rows config['first_row'] = 101 config['last_row'] = 50000 cat3 = treecorr.Catalog(file_name, config) np.testing.assert_equal(len(cat3.x), 49900) np.testing.assert_equal(cat3.ntot, 49900) np.testing.assert_equal(cat3.nobj, sum(cat3.w != 0)) np.testing.assert_equal(cat3.sumw, sum(cat3.w)) np.testing.assert_equal(cat3.sumw, sum(cat2.w[100:50000])) np.testing.assert_almost_equal(cat3.g1[46292], 0.0005066675) np.testing.assert_almost_equal(cat3.g2[46292], -0.0001006742) np.testing.assert_almost_equal(cat3.k[46292], -0.0008628797) cat4 = treecorr.read_catalogs(config, key='file_name', is_rand=True)[0] np.testing.assert_equal(len(cat4.x), 49900) np.testing.assert_equal(cat4.ntot, 49900) np.testing.assert_equal(cat4.nobj, sum(cat4.w != 0)) np.testing.assert_equal(cat4.sumw, sum(cat4.w)) np.testing.assert_equal(cat4.sumw, sum(cat2.w[100:50000])) assert cat4.g1 is None assert cat4.g2 is None assert cat4.k is None
def test_direct_perp(): # This is the same as the above test, but using the perpendicular distance metric get_from_wiki('nn_perp_data.dat') get_from_wiki('nn_perp_rand.dat') ngal = 100 s = 10. numpy.random.seed(8675309) x1 = numpy.random.normal(312, s, (ngal,) ) y1 = numpy.random.normal(728, s, (ngal,) ) z1 = numpy.random.normal(-932, s, (ngal,) ) r1 = numpy.sqrt( x1*x1 + y1*y1 + z1*z1 ) dec1 = numpy.arcsin(z1/r1) ra1 = numpy.arctan2(y1,x1) cat1 = treecorr.Catalog(ra=ra1, dec=dec1, r=r1, ra_units='rad', dec_units='rad') x2 = numpy.random.normal(312, s, (ngal,) ) y2 = numpy.random.normal(728, s, (ngal,) ) z2 = numpy.random.normal(-932, s, (ngal,) ) r2 = numpy.sqrt( x2*x2 + y2*y2 + z2*z2 ) dec2 = numpy.arcsin(z2/r2) ra2 = numpy.arctan2(y2,x2) cat2 = treecorr.Catalog(ra=ra2, dec=dec2, r=r2, ra_units='rad', dec_units='rad') min_sep = 1. max_sep = 50. nbins = 50 dd = treecorr.NNCorrelation(min_sep=min_sep, max_sep=max_sep, nbins=nbins, bin_slop=0.) dd.process(cat1, cat2, metric='Rperp') print('dd.npairs = ',dd.npairs) log_min_sep = numpy.log(min_sep) log_max_sep = numpy.log(max_sep) true_npairs = numpy.zeros(nbins) bin_size = (log_max_sep - log_min_sep) / nbins for i in range(ngal): for j in range(ngal): rsq = (x1[i]-x2[j])**2 + (y1[i]-y2[j])**2 + (z1[i]-z2[j])**2 rsq -= (r1[i] - r2[j])**2 logr = 0.5 * numpy.log(rsq) k = int(numpy.floor( (logr-log_min_sep) / bin_size )) if k < 0: continue if k >= nbins: continue true_npairs[k] += 1 print('true_npairs = ',true_npairs) print('diff = ',dd.npairs - true_npairs) numpy.testing.assert_array_equal(dd.npairs, true_npairs) # Can also specify coords directly as x,y,z cat1 = treecorr.Catalog(x=x1, y=y1, z=z1) cat2 = treecorr.Catalog(x=x2, y=y2, z=z2) dd.process(cat1, cat2, metric='Rperp') numpy.testing.assert_array_equal(dd.npairs, true_npairs)
def test_fits(): get_from_wiki('Aardvark.fit') file_name = os.path.join('data','Aardvark.fit') config = treecorr.read_config('Aardvark.yaml') config['verbose'] = 1 # Just test a few random particular values cat1 = treecorr.Catalog(file_name, config) numpy.testing.assert_equal(len(cat1.ra), 390935) numpy.testing.assert_equal(cat1.nobj, 390935) numpy.testing.assert_almost_equal(cat1.ra[0], 56.4195 * (pi/180.)) numpy.testing.assert_almost_equal(cat1.ra[390934], 78.4782 * (pi/180.)) numpy.testing.assert_almost_equal(cat1.dec[290333], 83.1579 * (pi/180.)) numpy.testing.assert_almost_equal(cat1.g1[46392], 0.0005066675) numpy.testing.assert_almost_equal(cat1.g2[46392], -0.0001006742) numpy.testing.assert_almost_equal(cat1.k[46392], -0.0008628797) # The catalog doesn't have x, y, or w, but test that functionality as well. del config['ra_col'] del config['dec_col'] config['x_col'] = 'RA' config['y_col'] = 'DEC' config['w_col'] = 'MU' config['flag_col'] = 'INDEX' config['ignore_flag'] = 64 cat2 = treecorr.Catalog(file_name, config) numpy.testing.assert_almost_equal(cat2.x[390934], 78.4782, decimal=4) numpy.testing.assert_almost_equal(cat2.y[290333], 83.1579, decimal=4) numpy.testing.assert_almost_equal(cat2.w[46392], 0.) # index = 1200379 numpy.testing.assert_almost_equal(cat2.w[46393], 0.9995946) # index = 1200386 # Test using a limited set of rows config['first_row'] = 101 config['last_row'] = 50000 cat3 = treecorr.Catalog(file_name, config) numpy.testing.assert_equal(len(cat3.x), 49900) numpy.testing.assert_equal(cat3.ntot, 49900) numpy.testing.assert_equal(cat3.nobj, sum(cat3.w != 0)) numpy.testing.assert_equal(cat3.sumw, sum(cat3.w)) numpy.testing.assert_equal(cat3.sumw, sum(cat2.w[100:50000])) numpy.testing.assert_almost_equal(cat3.g1[46292], 0.0005066675) numpy.testing.assert_almost_equal(cat3.g2[46292], -0.0001006742) numpy.testing.assert_almost_equal(cat3.k[46292], -0.0008628797)
def test_fits(): get_from_wiki('Aardvark.fit') file_name = os.path.join('data', 'Aardvark.fit') config = treecorr.read_config('Aardvark.params') # Just test a few random particular values cat1 = treecorr.Catalog(file_name, config) numpy.testing.assert_equal(len(cat1.ra), 390935) numpy.testing.assert_equal(cat1.nobj, 390935) numpy.testing.assert_almost_equal(cat1.ra[0], 56.4195 * (pi / 180.)) numpy.testing.assert_almost_equal(cat1.ra[390934], 78.4782 * (pi / 180.)) numpy.testing.assert_almost_equal(cat1.dec[290333], 83.1579 * (pi / 180.)) numpy.testing.assert_almost_equal(cat1.g1[46392], 0.0005066675) numpy.testing.assert_almost_equal(cat1.g2[46392], -0.0001006742) numpy.testing.assert_almost_equal(cat1.k[46392], -0.0008628797) # The catalog doesn't have x, y, or w, but test that functionality as well. del config['ra_col'] del config['dec_col'] config['x_col'] = 'RA' config['y_col'] = 'DEC' config['w_col'] = 'MU' config['flag_col'] = 'INDEX' config['ignore_flag'] = 64 cat2 = treecorr.Catalog(file_name, config) numpy.testing.assert_almost_equal(cat2.x[390934], 78.4782, decimal=4) numpy.testing.assert_almost_equal(cat2.y[290333], 83.1579, decimal=4) numpy.testing.assert_almost_equal(cat2.w[46392], 0.) # index = 1200379 numpy.testing.assert_almost_equal(cat2.w[46393], 0.9995946) # index = 1200386 # Test using a limited set of rows config['first_row'] = 101 config['last_row'] = 50000 cat3 = treecorr.Catalog(file_name, config) numpy.testing.assert_equal(len(cat3.x), 49900) numpy.testing.assert_equal(cat3.ntot, 49900) numpy.testing.assert_equal(cat3.nobj, sum(cat3.w != 0)) numpy.testing.assert_equal(cat3.sumw, sum(cat3.w)) numpy.testing.assert_equal(cat3.sumw, sum(cat2.w[100:50000])) numpy.testing.assert_almost_equal(cat3.g1[46292], 0.0005066675) numpy.testing.assert_almost_equal(cat3.g2[46292], -0.0001006742) numpy.testing.assert_almost_equal(cat3.k[46292], -0.0008628797)
def setup(): from test_helper import get_from_wiki file_name = os.path.join('data', 'Aardvark.fit') patch_file = os.path.join('data', 'mpi_patches.fits') # Make sure we have Aardvark.fit get_from_wiki('Aardvark.fit') # And all the tests will use these patches. Make them once and save them. # For a real-life example, this might be made once and saved. # Or it might be made from a smaller version of the catalog: # either with the every_nth option, or maybe on a redmagic catalog or similar, # which would be smaller than the full source catalog, etc. if not os.path.exists(patch_file): part_cat = treecorr.Catalog(file_name, ra_col='RA', dec_col='DEC', ra_units='deg', dec_units='deg', npatch=8) part_cat.write_patch_centers(patch_file) del part_cat
def download(): # The download can be a bit slow, and the files need to be merged to get something # that includes both ra/dec and e1,e2. So we did this block once and uploaded # the results to the wiki. Normal test running can just get the result from the wiki. # Download the public DES SV files (if not already downloaded) host = 'http://desdr-server.ncsa.illinois.edu/despublic/sva1_files/' lens_file = 'redmagic_sva1_public_v6.3_faint.fits.gz' get_from_wiki(lens_file, host=host) lens_file = os.path.join('data', lens_file) ngmix_file = 'sva1_gold_r1.0_ngmix.fits.gz' get_from_wiki(ngmix_file, host=host) ngmix_file = os.path.join('data', ngmix_file) info_file = 'sva1_gold_r1.0_wlinfo.fits.gz' get_from_wiki(info_file, host=host) info_file = os.path.join('data', info_file) source_file = os.path.join('data', 'sva1_gold_r1.0_merged.fits') if not os.path.exists(source_file): print('Reading ngmix_data') ngmix_data = fitsio.read(ngmix_file) print('Reading info_data') info_data = fitsio.read(info_file) col_names = ['RA', 'DEC'] cols = [info_data[n] for n in col_names] # These come from wlinfo col_names += ['E_1', 'E_2', 'W'] cols += [ngmix_data[n] for n in col_names[2:]] # These are in ngmix # combine the two sensitivity estimates col_names += ['SENS'] cols += [(ngmix_data['SENS_1'] + ngmix_data['SENS_2']) / 2.] # Save time by cutting to only flag != 0 objects here. use = info_data['NGMIX_FLAG'] == 0 print('total number of galaxies = ', len(use)) print('number to use = ', np.sum(use)) cols = [col[use] for col in cols] print('writing merged file: ', source_file) treecorr.util.gen_write(source_file, col_names, cols, file_type='FITS') return source_file, lens_file
def test_fits(): try: import fitsio except ImportError: print('Skipping FITS tests, since fitsio is not installed') return get_from_wiki('Aardvark.fit') file_name = os.path.join('data','Aardvark.fit') config = treecorr.read_config('Aardvark.yaml') config['verbose'] = 1 config['kk_file_name'] = 'kk.fits' config['gg_file_name'] = 'gg.fits' # Just test a few random particular values cat1 = treecorr.Catalog(file_name, config) np.testing.assert_equal(len(cat1.ra), 390935) np.testing.assert_equal(cat1.nobj, 390935) np.testing.assert_almost_equal(cat1.ra[0], 56.4195 * (pi/180.)) np.testing.assert_almost_equal(cat1.ra[390934], 78.4782 * (pi/180.)) np.testing.assert_almost_equal(cat1.dec[290333], 83.1579 * (pi/180.)) np.testing.assert_almost_equal(cat1.g1[46392], 0.0005066675) np.testing.assert_almost_equal(cat1.g2[46392], -0.0001006742) np.testing.assert_almost_equal(cat1.k[46392], -0.0008628797) assert_raises(ValueError, treecorr.Catalog, file_name, config, ra_col='invalid') assert_raises(ValueError, treecorr.Catalog, file_name, config, dec_col='invalid') assert_raises(ValueError, treecorr.Catalog, file_name, config, r_col='invalid') assert_raises(ValueError, treecorr.Catalog, file_name, config, w_col='invalid') assert_raises(ValueError, treecorr.Catalog, file_name, config, wpos_col='invalid') assert_raises(ValueError, treecorr.Catalog, file_name, config, flag_col='invalid') assert_raises(ValueError, treecorr.Catalog, file_name, config, g1_col='invalid') assert_raises(ValueError, treecorr.Catalog, file_name, config, g2_col='invalid') assert_raises(ValueError, treecorr.Catalog, file_name, config, k_col='invalid') assert_raises(ValueError, treecorr.Catalog, file_name, config, ra_col='0') assert_raises(ValueError, treecorr.Catalog, file_name, config, dec_col='0') assert_raises(ValueError, treecorr.Catalog, file_name, config, x_col='x') assert_raises(ValueError, treecorr.Catalog, file_name, config, y_col='y') assert_raises(ValueError, treecorr.Catalog, file_name, config, z_col='z') assert_raises(ValueError, treecorr.Catalog, file_name, config, ra_col='0', dec_col='0') assert_raises(ValueError, treecorr.Catalog, file_name, config, g1_col='0') assert_raises(ValueError, treecorr.Catalog, file_name, config, g2_col='0') assert_raises(ValueError, treecorr.Catalog, file_name, config, k_col='0') assert_raises(TypeError, treecorr.Catalog, file_name, config, x_units='arcmin') assert_raises(TypeError, treecorr.Catalog, file_name, config, y_units='arcmin') del config['ra_units'] assert_raises(TypeError, treecorr.Catalog, file_name, config) del config['dec_units'] assert_raises(TypeError, treecorr.Catalog, file_name, config, ra_units='deg') # The catalog doesn't have x, y, or w, but test that functionality as well. del config['ra_col'] del config['dec_col'] config['x_col'] = 'RA' config['y_col'] = 'DEC' config['w_col'] = 'MU' config['flag_col'] = 'INDEX' config['ignore_flag'] = 64 cat2 = treecorr.Catalog(file_name, config) np.testing.assert_almost_equal(cat2.x[390934], 78.4782, decimal=4) np.testing.assert_almost_equal(cat2.y[290333], 83.1579, decimal=4) np.testing.assert_almost_equal(cat2.w[46392], 0.) # index = 1200379 np.testing.assert_almost_equal(cat2.w[46393], 0.9995946) # index = 1200386 assert_raises(ValueError, treecorr.Catalog, file_name, config, x_col='invalid') assert_raises(ValueError, treecorr.Catalog, file_name, config, y_col='invalid') assert_raises(ValueError, treecorr.Catalog, file_name, config, z_col='invalid') assert_raises(ValueError, treecorr.Catalog, file_name, config, ra_col='invalid') assert_raises(ValueError, treecorr.Catalog, file_name, config, dec_col='invalid') assert_raises(ValueError, treecorr.Catalog, file_name, config, r_col='invalid') assert_raises(ValueError, treecorr.Catalog, file_name, config, w_col='invalid') assert_raises(ValueError, treecorr.Catalog, file_name, config, wpos_col='invalid') assert_raises(ValueError, treecorr.Catalog, file_name, config, flag_col='invalid') assert_raises(ValueError, treecorr.Catalog, file_name, config, g1_col='invalid') assert_raises(ValueError, treecorr.Catalog, file_name, config, g2_col='invalid') assert_raises(ValueError, treecorr.Catalog, file_name, config, k_col='invalid') # Test using a limited set of rows config['first_row'] = 101 config['last_row'] = 50000 cat3 = treecorr.Catalog(file_name, config) np.testing.assert_equal(len(cat3.x), 49900) np.testing.assert_equal(cat3.ntot, 49900) np.testing.assert_equal(cat3.nobj, sum(cat3.w != 0)) np.testing.assert_equal(cat3.sumw, sum(cat3.w)) np.testing.assert_equal(cat3.sumw, sum(cat2.w[100:50000])) np.testing.assert_almost_equal(cat3.g1[46292], 0.0005066675) np.testing.assert_almost_equal(cat3.g2[46292], -0.0001006742) np.testing.assert_almost_equal(cat3.k[46292], -0.0008628797) cat4 = treecorr.read_catalogs(config, key='file_name', is_rand=True)[0] np.testing.assert_equal(len(cat4.x), 49900) np.testing.assert_equal(cat4.ntot, 49900) np.testing.assert_equal(cat4.nobj, sum(cat4.w != 0)) np.testing.assert_equal(cat4.sumw, sum(cat4.w)) np.testing.assert_equal(cat4.sumw, sum(cat2.w[100:50000])) assert cat4.g1 is None assert cat4.g2 is None assert cat4.k is None do_pickle(cat1) do_pickle(cat2) do_pickle(cat3) do_pickle(cat4) assert_raises(ValueError, treecorr.Catalog, file_name, config, first_row=-10) assert_raises(ValueError, treecorr.Catalog, file_name, config, first_row=0) assert_raises(ValueError, treecorr.Catalog, file_name, config, first_row=60000) assert_raises(ValueError, treecorr.Catalog, file_name, config, first_row=50001) assert_raises(TypeError, treecorr.read_catalogs, config) assert_raises(TypeError, treecorr.read_catalogs, config, key='file_name', list_key='file_name') # If gg output not given, it is still invalid to only have one or the other of g1,g2. del config['gg_file_name'] assert_raises(ValueError, treecorr.Catalog, file_name, config, g1_col='0') assert_raises(ValueError, treecorr.Catalog, file_name, config, g2_col='0')
def test_dessv(): try: import fitsio except ImportError: print('Skipping dessv test, since fitsio is not installed') return #treecorr.set_omp_threads(1); get_from_wiki('des_sv.fits') file_name = os.path.join('data', 'des_sv.fits') cat = treecorr.Catalog(file_name, ra_col='ra', dec_col='dec', ra_units='deg', dec_units='deg') # Use an odd number to make sure we force some of the shuffle bits in InitializeCenters # to happen. npatch = 43 field = cat.getNField() t0 = time.time() patches = field.run_kmeans(npatch) t1 = time.time() print('patches = ', np.unique(patches)) assert len(patches) == cat.ntot assert min(patches) == 0 assert max(patches) == npatch - 1 # KMeans minimizes the total inertia. # Check this value and the rms size, which should also be quite small. xyz = np.array([cat.x, cat.y, cat.z]).T cen = np.array([xyz[patches == i].mean(axis=0) for i in range(npatch)]) inertia = np.array( [np.sum((xyz[patches == i] - cen[i])**2) for i in range(npatch)]) sizes = np.array( [np.mean((xyz[patches == i] - cen[i])**2) for i in range(npatch)])**0.5 sizes *= 180. / np.pi * 60. # convert to arcmin counts = np.array([np.sum(patches == i) for i in range(npatch)]) print('With standard algorithm:') print('time = ', t1 - t0) print('total inertia = ', np.sum(inertia)) print('mean inertia = ', np.mean(inertia)) print('rms inertia = ', np.std(inertia)) print('mean size = ', np.mean(sizes)) print('rms size = ', np.std(sizes)) assert np.sum( inertia ) < 200. # This is specific to this particular field and npatch. assert np.std(inertia) < 0.3 * np.mean( inertia) # rms is usually < 0.2 * mean assert np.std(sizes) < 0.1 * np.mean( sizes) # sizes have even less spread usually. # Should all have similar number of points. Nothing is required here though. print('mean counts = ', np.mean(counts)) print('min counts = ', np.min(counts)) print('max counts = ', np.max(counts)) # Check the alternate algorithm. rms inertia should be lower. t0 = time.time() patches = field.run_kmeans(npatch, alt=True) t1 = time.time() assert len(patches) == cat.ntot assert min(patches) == 0 assert max(patches) == npatch - 1 cen = np.array([xyz[patches == i].mean(axis=0) for i in range(npatch)]) inertia = np.array( [np.sum((xyz[patches == i] - cen[i])**2) for i in range(npatch)]) sizes = np.array( [np.mean((xyz[patches == i] - cen[i])**2) for i in range(npatch)])**0.5 sizes *= 180. / np.pi * 60. # convert to arcmin counts = np.array([np.sum(patches == i) for i in range(npatch)]) print('With alternate algorithm:') print('time = ', t1 - t0) print('total inertia = ', np.sum(inertia)) print('mean inertia = ', np.mean(inertia)) print('rms inertia = ', np.std(inertia)) print('mean size = ', np.mean(sizes)) print('rms size = ', np.std(sizes)) assert np.sum( inertia ) < 200. # Total shouldn't increase much. (And often decreases.) assert np.std(inertia) < 0.1 * np.mean( inertia) # rms should be even smaller here. assert np.std(sizes) < 0.1 * np.mean( sizes) # This is only a little bit smaller. # This doesn't keep the counts as equal as the standard algorithm. print('mean counts = ', np.mean(counts)) print('min counts = ', np.min(counts)) print('max counts = ', np.max(counts)) # Finally, use a field with lots of top level cells to check the other branch in # InitializeCenters. field = cat.getNField(min_top=10) t0 = time.time() patches = field.run_kmeans(npatch) t1 = time.time() assert len(patches) == cat.ntot assert min(patches) == 0 assert max(patches) == npatch - 1 cen = np.array([xyz[patches == i].mean(axis=0) for i in range(npatch)]) inertia = np.array( [np.sum((xyz[patches == i] - cen[i])**2) for i in range(npatch)]) sizes = np.array( [np.mean((xyz[patches == i] - cen[i])**2) for i in range(npatch)])**0.5 sizes *= 180. / np.pi * 60. # convert to arcmin counts = np.array([np.sum(patches == i) for i in range(npatch)]) # This doesn't give as good an initialization, so these are a bit worse usually. print('With min_top=10:') print('time = ', t1 - t0) print('total inertia = ', np.sum(inertia)) print('mean inertia = ', np.mean(inertia)) print('rms inertia = ', np.std(inertia)) print('mean size = ', np.mean(sizes)) print('rms size = ', np.std(sizes)) assert np.sum(inertia) < 210. assert np.std(inertia) < 0.4 * np.mean( inertia) # I've seen over 0.3 x mean here. assert np.std(sizes) < 0.15 * np.mean(sizes) print('mean counts = ', np.mean(counts)) print('min counts = ', np.min(counts)) print('max counts = ', np.max(counts))
def test_perp_minmax(): """This test is based on a bug report from Erika Wagoner where the lowest bins were getting spuriously high w(rp) values. It stemmed from a subtlety about how large rp can be compared to minsep. The maximum rp is more than just rp + s1 + s2. So this test checks that when the min and max are expanded a bit, the number of pairs doesn't change much in the bins that used to be the min and max. """ # Just use Erika's files for data and rand. config = { 'ra_col' : 1, 'dec_col' : 2, 'ra_units' : 'deg', 'dec_units' : 'deg', 'r_col' : 3, 'min_sep' : 20, 'bin_size' : 0.036652, 'nbins' : 50, 'verbose' : 1 } # Speed up for nosetests runs if __name__ != "__main__": config['nbins'] = 5 config['bin_size'] = 0.1 get_from_wiki('nn_perp_data.dat') dcat = treecorr.Catalog('data/nn_perp_data.dat', config) dd1 = treecorr.NNCorrelation(config) dd1.process(dcat, metric='Rperp') lower_min_sep = config['min_sep'] * numpy.exp(-2.*config['bin_size']) more_nbins = config['nbins'] + 4 dd2 = treecorr.NNCorrelation(config, min_sep=lower_min_sep, nbins=more_nbins) dd2.process(dcat, metric='Rperp') print('dd1 npairs = ',dd1.npairs) print('dd2 npairs = ',dd2.npairs[2:-2]) # First a basic sanity check. The values not near the edge should be identical. numpy.testing.assert_equal(dd1.npairs[2:-2], dd2.npairs[4:-4]) # The edge bins may differ slightly from the binning approximations (bin_slop and such), # but the differences should be very small. (When Erika reported the problem, the differences # were a few percent, which ended up making a bit difference in the correlation function.) numpy.testing.assert_almost_equal( dd1.npairs / dd2.npairs[2:-2], 1., decimal=4) if __name__ == '__main__': # If we're running from the command line, go ahead and finish the calculation # This catalog has 10^6 objects, which takes quite a while. I should really investigate # how to speed up the Rperp distance calculation. Probably by having a faster over- # and under-estimate first, and then only do the full calculation when it seems like we # will actually need it. # Anyway, until then, let's not take forever by using last_row=200000 get_from_wiki('nn_perp_rand.dat') rcat = treecorr.Catalog('data/nn_perp_rand.dat', config, last_row=200000) rr1 = treecorr.NNCorrelation(config) rr1.process(rcat, metric='Rperp') rr2 = treecorr.NNCorrelation(config, min_sep=lower_min_sep, nbins=more_nbins) rr2.process(rcat, metric='Rperp') print('rr1 npairs = ',rr1.npairs) print('rr2 npairs = ',rr2.npairs[2:-2]) numpy.testing.assert_almost_equal( rr1.npairs / rr2.npairs[2:-2], 1., decimal=4) dr1 = treecorr.NNCorrelation(config) dr1.process(dcat, rcat, metric='Rperp') dr2 = treecorr.NNCorrelation(config, min_sep=lower_min_sep, nbins=more_nbins) dr2.process(dcat, rcat, metric='Rperp') print('dr1 npairs = ',dr1.npairs) print('dr2 npairs = ',dr2.npairs[2:-2]) numpy.testing.assert_almost_equal( dr1.npairs / dr2.npairs[2:-2], 1., decimal=4) xi1, varxi1 = dd1.calculateXi(rr1, dr1) xi2, varxi2 = dd2.calculateXi(rr2, dr2) print('xi1 = ',xi1) print('xi2 = ',xi2[2:-2]) numpy.testing.assert_almost_equal( xi1 / xi2[2:-2], 1., decimal=2) # Check that we get the same result with the corr2 executable. import subprocess corr2_exe = get_script_name('corr2') p = subprocess.Popen( [corr2_exe,"nn_rperp.yaml"] ) p.communicate() corr2_output = numpy.genfromtxt(os.path.join('output','nn_rperp.out'),names=True,skip_header=1) print('xi = ',xi1) print('from corr2 output = ',corr2_output['xi']) print('ratio = ',corr2_output['xi']/xi1) print('diff = ',corr2_output['xi']-xi1) numpy.testing.assert_almost_equal(corr2_output['xi']/xi1, 1., decimal=3)
def test_aardvark(): # Eric Suchyta did a brute force calculation of the Aardvark catalog, so it is useful to # compare the output from my code with that. get_from_wiki('Aardvark.fit') file_name = os.path.join('data', 'Aardvark.fit') config = treecorr.read_config('Aardvark.params') cat1 = treecorr.Catalog(file_name, config) gg = treecorr.GGCorrelation(config) gg.process(cat1) direct_file_name = os.path.join('data', 'Aardvark.direct') direct_data = numpy.genfromtxt(direct_file_name) direct_xip = direct_data[:, 3] direct_xim = direct_data[:, 4] #print('gg.xip = ',gg.xip) #print('direct.xip = ',direct_xip) xip_err = gg.xip - direct_xip print('xip_err = ', xip_err) print('max = ', max(abs(xip_err))) assert max(abs(xip_err)) < 2.e-7 print('xip_im = ', gg.xip_im) print('max = ', max(abs(gg.xip_im))) assert max(abs(gg.xip_im)) < 3.e-7 xim_err = gg.xim - direct_xim print('xim_err = ', xim_err) print('max = ', max(abs(xim_err))) assert max(abs(xim_err)) < 1.e-7 print('xim_im = ', gg.xim_im) print('max = ', max(abs(gg.xim_im))) assert max(abs(gg.xim_im)) < 1.e-7 # However, after some back and forth about the calculation, we concluded that Eric hadn't # done the spherical trig correctly to get the shears relative to the great circle joining # the two positions. So let's compare with my own brute force calculation (i.e. using # bin_slop = 0): # This also has the advantage that the radial bins are done the same way -- uniformly # spaced in log of the chord distance, rather than the great circle distance. bs0_file_name = os.path.join('data', 'Aardvark.bs0') bs0_data = numpy.genfromtxt(bs0_file_name) bs0_xip = bs0_data[:, 2] bs0_xim = bs0_data[:, 3] #print('gg.xip = ',gg.xip) #print('bs0.xip = ',bs0_xip) xip_err = gg.xip - bs0_xip print('xip_err = ', xip_err) print('max = ', max(abs(xip_err))) assert max(abs(xip_err)) < 1.e-7 xim_err = gg.xim - bs0_xim print('xim_err = ', xim_err) print('max = ', max(abs(xim_err))) assert max(abs(xim_err)) < 5.e-8 # Check that we get the same result using the corr2 executable: # Note: This is the only test of the corr2 executable that we do with nosetests. # The other similar tests are blocked out with: if __name__ == '__main__': import subprocess corr2_exe = get_script_name('corr2') p = subprocess.Popen([corr2_exe, "Aardvark.params"]) p.communicate() corr2_output = numpy.genfromtxt(os.path.join('output', 'Aardvark.out'), names=True) print('gg.xip = ', gg.xip) print('from corr2 output = ', corr2_output['xip']) print('ratio = ', corr2_output['xip'] / gg.xip) print('diff = ', corr2_output['xip'] - gg.xip) numpy.testing.assert_almost_equal(corr2_output['xip'] / gg.xip, 1., decimal=3) print('gg.xim = ', gg.xim) print('from corr2 output = ', corr2_output['xim']) print('ratio = ', corr2_output['xim'] / gg.xim) print('diff = ', corr2_output['xim'] - gg.xim) numpy.testing.assert_almost_equal(corr2_output['xim'] / gg.xim, 1., decimal=3) print('xip_im from corr2 output = ', corr2_output['xip_im']) print('max err = ', max(abs(corr2_output['xip_im']))) assert max(abs(corr2_output['xip_im'])) < 3.e-7 print('xim_im from corr2 output = ', corr2_output['xim_im']) print('max err = ', max(abs(corr2_output['xim_im']))) assert max(abs(corr2_output['xim_im'])) < 1.e-7 # As bin_slop decreases, the agreement should get even better. # This test is slow, so only do it if running test_gg.py directly. if __name__ == '__main__': config['bin_slop'] = 0.2 gg = treecorr.GGCorrelation(config) gg.process(cat1) #print('gg.xip = ',gg.xip) #print('bs0.xip = ',bs0_xip) xip_err = gg.xip - bs0_xip print('xip_err = ', xip_err) print('max = ', max(abs(xip_err))) assert max(abs(xip_err)) < 1.e-8 xim_err = gg.xim - bs0_xim print('xim_err = ', xim_err) print('max = ', max(abs(xim_err))) assert max(abs(xim_err)) < 1.e-8
def test_aardvark(): # Eric Suchyta did a brute force calculation of the Aardvark catalog, so it is useful to # compare the output from my code with that. get_from_wiki('Aardvark.fit') file_name = os.path.join('data','Aardvark.fit') config = treecorr.read_config('Aardvark.yaml') cat1 = treecorr.Catalog(file_name, config) gg = treecorr.GGCorrelation(config) gg.process(cat1) direct_file_name = os.path.join('data','Aardvark.direct') direct_data = numpy.genfromtxt(direct_file_name) direct_xip = direct_data[:,3] direct_xim = direct_data[:,4] #print('gg.xip = ',gg.xip) #print('direct.xip = ',direct_xip) xip_err = gg.xip - direct_xip print('xip_err = ',xip_err) print('max = ',max(abs(xip_err))) assert max(abs(xip_err)) < 2.e-7 print('xip_im = ',gg.xip_im) print('max = ',max(abs(gg.xip_im))) assert max(abs(gg.xip_im)) < 3.e-7 xim_err = gg.xim - direct_xim print('xim_err = ',xim_err) print('max = ',max(abs(xim_err))) assert max(abs(xim_err)) < 1.e-7 print('xim_im = ',gg.xim_im) print('max = ',max(abs(gg.xim_im))) assert max(abs(gg.xim_im)) < 1.e-7 # However, after some back and forth about the calculation, we concluded that Eric hadn't # done the spherical trig correctly to get the shears relative to the great circle joining # the two positions. So let's compare with my own brute force calculation (i.e. using # bin_slop = 0): # This also has the advantage that the radial bins are done the same way -- uniformly # spaced in log of the chord distance, rather than the great circle distance. bs0_file_name = os.path.join('data','Aardvark.bs0') bs0_data = numpy.genfromtxt(bs0_file_name) bs0_xip = bs0_data[:,2] bs0_xim = bs0_data[:,3] #print('gg.xip = ',gg.xip) #print('bs0.xip = ',bs0_xip) xip_err = gg.xip - bs0_xip print('xip_err = ',xip_err) print('max = ',max(abs(xip_err))) assert max(abs(xip_err)) < 1.e-7 xim_err = gg.xim - bs0_xim print('xim_err = ',xim_err) print('max = ',max(abs(xim_err))) assert max(abs(xim_err)) < 5.e-8 # Check that we get the same result using the corr2 executable: # Note: This is the only test of the corr2 executable that we do with nosetests. # The other similar tests are blocked out with: if __name__ == '__main__': import subprocess corr2_exe = get_script_name('corr2') p = subprocess.Popen( [corr2_exe,"Aardvark.yaml"] ) p.communicate() corr2_output = numpy.genfromtxt(os.path.join('output','Aardvark.out'), names=True) print('gg.xip = ',gg.xip) print('from corr2 output = ',corr2_output['xip']) print('ratio = ',corr2_output['xip']/gg.xip) print('diff = ',corr2_output['xip']-gg.xip) numpy.testing.assert_almost_equal(corr2_output['xip']/gg.xip, 1., decimal=3) print('gg.xim = ',gg.xim) print('from corr2 output = ',corr2_output['xim']) print('ratio = ',corr2_output['xim']/gg.xim) print('diff = ',corr2_output['xim']-gg.xim) numpy.testing.assert_almost_equal(corr2_output['xim']/gg.xim, 1., decimal=3) print('xip_im from corr2 output = ',corr2_output['xip_im']) print('max err = ',max(abs(corr2_output['xip_im']))) assert max(abs(corr2_output['xip_im'])) < 3.e-7 print('xim_im from corr2 output = ',corr2_output['xim_im']) print('max err = ',max(abs(corr2_output['xim_im']))) assert max(abs(corr2_output['xim_im'])) < 1.e-7 # As bin_slop decreases, the agreement should get even better. # This test is slow, so only do it if running test_gg.py directly. if __name__ == '__main__': config['bin_slop'] = 0.2 gg = treecorr.GGCorrelation(config) gg.process(cat1) #print('gg.xip = ',gg.xip) #print('bs0.xip = ',bs0_xip) xip_err = gg.xip - bs0_xip print('xip_err = ',xip_err) print('max = ',max(abs(xip_err))) assert max(abs(xip_err)) < 1.e-8 xim_err = gg.xim - bs0_xim print('xim_err = ',xim_err) print('max = ',max(abs(xim_err))) assert max(abs(xim_err)) < 1.e-8
def test_fits_reader(): try: import fitsio except ImportError: print('Skipping FitsReader tests, since fitsio not installed.') return get_from_wiki('Aardvark.fit') r = FitsReader(os.path.join('data', 'Aardvark.fit')) # Check things not allowed if not in context with assert_raises(RuntimeError): r.read(['RA'], slice(0, 10, 2), 1) with assert_raises(RuntimeError): r.read('RA') with assert_raises(RuntimeError): r.row_count('DEC', 1) with assert_raises(RuntimeError): r.row_count() with assert_raises(RuntimeError): r.names(1) with assert_raises(RuntimeError): r.names() with assert_raises(RuntimeError): 1 in r with r: assert_raises(ValueError, r.check_valid_ext, 'invalid') assert_raises(ValueError, r.check_valid_ext, 0) r.check_valid_ext('AARDWOLF') r.check_valid_ext(1) # Default ext is 1 assert r.default_ext == 1 # Default ext is "in" reader assert 1 in r # Probably can slice, but depends on installed fitsio version assert r.can_slice == (fitsio.__version__ > '1.0.6') s = slice(0, 10, 2) for ext in [1, 'AARDWOLF']: data = r.read(['RA'], s, ext) dec = r.read('DEC', s, ext) assert data['RA'].size == 5 assert dec.size == 5 assert r.row_count('RA', ext) == 390935 assert r.row_count('GAMMA1', ext) == 390935 assert set(r.names(ext)) == set( "INDEX RA DEC Z EPSILON GAMMA1 GAMMA2 KAPPA MU".split()) assert set(r.names(ext)) == set(r.names()) # Can read without slice or ext to use defaults assert r.row_count() == 390935 g2 = r.read('GAMMA2') assert len(g2) == 390935 d = r.read(['KAPPA', 'MU']) assert len(d['KAPPA']) == 390935 assert len(d['MU']) == 390935 # check we can also index by integer, not just number d = r.read(['DEC'], np.arange(10), 'AARDWOLF') assert d.size == 10 if sys.version_info < (3, ): return # mock only available on python 3 from unittest import mock # Again check things not allowed if not in context with assert_raises(RuntimeError): r.read(['RA'], slice(0, 10, 2), 1) with assert_raises(RuntimeError): r.read('RA') with assert_raises(RuntimeError): r.row_count('DEC', 1) with assert_raises(RuntimeError): r.row_count() with assert_raises(RuntimeError): r.names(1) with assert_raises(RuntimeError): r.names() with assert_raises(RuntimeError): 1 in r # Regardless of the system's fitsio version, check the two cases in code. with mock.patch('fitsio.__version__', '1.0.6'): with FitsReader(os.path.join('data', 'Aardvark.fit')) as r: assert not r.can_slice with mock.patch('fitsio.__version__', '1.1.0'): with FitsReader(os.path.join('data', 'Aardvark.fit')) as r: assert r.can_slice
def test_hdf_reader(): try: import h5py except ImportError: print('Skipping HdfReader tests, since h5py not installed.') return get_from_wiki('Aardvark.hdf5') r = HdfReader(os.path.join('data', 'Aardvark.hdf5')) # Check things not allowed if not in context with assert_raises(RuntimeError): r.read(['RA'], slice(0, 10, 2), '/') with assert_raises(RuntimeError): r.read('RA') with assert_raises(RuntimeError): r.row_count('DEC', '/') with assert_raises(RuntimeError): r.row_count('DEC') with assert_raises(RuntimeError): r.names('/') with assert_raises(RuntimeError): r.names() with assert_raises(RuntimeError): '/' in r with r: # '/' is the only extension in this file. # TODO: Add an hdf5 example with other valid choices for ext assert_raises(ValueError, r.check_valid_ext, 'invalid') r.check_valid_ext('/') # Default ext is '/' assert r.default_ext == '/' # Default ext is "in" reader assert '/' in r # Can always slice assert r.can_slice s = slice(0, 10, 2) data = r.read(['RA'], s) dec = r.read('DEC', s) assert data['RA'].size == 5 assert dec.size == 5 assert r.row_count('RA') == 390935 assert r.row_count('RA', '/') == 390935 assert r.row_count('GAMMA1') == 390935 # Unlike the other readers, this needs a column name. assert_raises(TypeError, r.row_count) assert set(r.names()) == set( "INDEX RA DEC Z EPSILON GAMMA1 GAMMA2 KAPPA MU".split()) assert set(r.names('/')) == set(r.names()) # Again check things not allowed if not in context with assert_raises(RuntimeError): r.read(['RA'], slice(0, 10, 2), '/') with assert_raises(RuntimeError): r.read('RA') with assert_raises(RuntimeError): r.row_count('DEC', '/') with assert_raises(RuntimeError): r.row_count('DEC') with assert_raises(RuntimeError): r.names('/') with assert_raises(RuntimeError): r.names() with assert_raises(RuntimeError): '/' in r
def test_dessv(): try: import fitsio except ImportError: print('Skipping dessv test, since fitsio is not installed') return #treecorr.set_omp_threads(1); get_from_wiki('des_sv.fits') file_name = os.path.join('data','des_sv.fits') cat = treecorr.Catalog(file_name, ra_col='ra', dec_col='dec', ra_units='deg', dec_units='deg') # Use an odd number to make sure we force some of the shuffle bits in InitializeCenters # to happen. npatch = 43 field = cat.getNField() t0 = time.time() patches = field.run_kmeans(npatch) t1 = time.time() print('patches = ',np.unique(patches)) assert len(patches) == cat.ntot assert min(patches) == 0 assert max(patches) == npatch-1 # KMeans minimizes the total inertia. # Check this value and the rms size, which should also be quite small. xyz = np.array([cat.x, cat.y, cat.z]).T cen = np.array([xyz[patches==i].mean(axis=0) for i in range(npatch)]) inertia = np.array([np.sum((xyz[patches==i] - cen[i])**2) for i in range(npatch)]) sizes = np.array([np.mean((xyz[patches==i] - cen[i])**2) for i in range(npatch)])**0.5 sizes *= 180. / np.pi * 60. # convert to arcmin counts = np.array([np.sum(patches==i) for i in range(npatch)]) print('With standard algorithm:') print('time = ',t1-t0) print('total inertia = ',np.sum(inertia)) print('mean inertia = ',np.mean(inertia)) print('rms inertia = ',np.std(inertia)) print('mean size = ',np.mean(sizes)) print('rms size = ',np.std(sizes)) assert np.sum(inertia) < 200. # This is specific to this particular field and npatch. assert np.std(inertia) < 0.2 * np.mean(inertia) # rms is usually small mean assert np.std(sizes) < 0.1 * np.mean(sizes) # sizes have even less spread usually. # Should all have similar number of points. Nothing is required here though. print('mean counts = ',np.mean(counts)) print('min counts = ',np.min(counts)) print('max counts = ',np.max(counts)) # Check the alternate algorithm. rms inertia should be lower. t0 = time.time() patches = field.run_kmeans(npatch, alt=True) t1 = time.time() assert len(patches) == cat.ntot assert min(patches) == 0 assert max(patches) == npatch-1 cen = np.array([xyz[patches==i].mean(axis=0) for i in range(npatch)]) inertia = np.array([np.sum((xyz[patches==i] - cen[i])**2) for i in range(npatch)]) sizes = np.array([np.mean((xyz[patches==i] - cen[i])**2) for i in range(npatch)])**0.5 sizes *= 180. / np.pi * 60. # convert to arcmin counts = np.array([np.sum(patches==i) for i in range(npatch)]) print('With alternate algorithm:') print('time = ',t1-t0) print('total inertia = ',np.sum(inertia)) print('mean inertia = ',np.mean(inertia)) print('rms inertia = ',np.std(inertia)) print('mean size = ',np.mean(sizes)) print('rms size = ',np.std(sizes)) assert np.sum(inertia) < 200. # Total shouldn't increase much. (And often decreases.) assert np.std(inertia) < 0.1 * np.mean(inertia) # rms should be even smaller here. assert np.std(sizes) < 0.1 * np.mean(sizes) # This is only a little bit smaller. # This doesn't keep the counts as equal as the standard algorithm. print('mean counts = ',np.mean(counts)) print('min counts = ',np.min(counts)) print('max counts = ',np.max(counts)) # Finally, use a field with lots of top level cells to check the other branch in # InitializeCenters. field = cat.getNField(min_top=10) t0 = time.time() patches = field.run_kmeans(npatch) t1 = time.time() assert len(patches) == cat.ntot assert min(patches) == 0 assert max(patches) == npatch-1 cen = np.array([xyz[patches==i].mean(axis=0) for i in range(npatch)]) inertia = np.array([np.sum((xyz[patches==i] - cen[i])**2) for i in range(npatch)]) sizes = np.array([np.mean((xyz[patches==i] - cen[i])**2) for i in range(npatch)])**0.5 sizes *= 180. / np.pi * 60. # convert to arcmin counts = np.array([np.sum(patches==i) for i in range(npatch)]) # This doesn't give as good an initialization, so these are a bit worse usually. print('With min_top=10:') print('time = ',t1-t0) print('total inertia = ',np.sum(inertia)) print('mean inertia = ',np.mean(inertia)) print('rms inertia = ',np.std(inertia)) print('mean size = ',np.mean(sizes)) print('rms size = ',np.std(sizes)) assert np.sum(inertia) < 210. assert np.std(inertia) < 0.4 * np.mean(inertia) # I've seen over 0.3 x mean here. assert np.std(sizes) < 0.1 * np.mean(sizes) print('mean counts = ',np.mean(counts)) print('min counts = ',np.min(counts)) print('max counts = ',np.max(counts))