def run_fit_helper( true_comp, starcounts, measurement_error, burnin_step=None, run_name='default', trace_orbit_func=None, ): py_vers = sys.version[0] data_filename = 'temp_data/{}_compfitter_{}.fits'.format(py_vers, run_name) log_filename = 'logs/{}_compfitter_{}.log'.format(py_vers, run_name) plot_dir = 'temp_plots/{}_compfitter_{}'.format(py_vers, run_name) save_dir = 'temp_data/' logging.basicConfig(level=logging.INFO, filename=log_filename, filemode='w') synth_data = SynthData(pars=true_comp.get_pars(), starcounts=starcounts, measurement_error=measurement_error) synth_data.synthesise_everything() tabletool.convert_table_astro2cart(synth_data.table, write_table=True, filename=data_filename) res = gf.fit_comp( data=synth_data.table, plot_it=True, burnin_steps=burnin_step, plot_dir=plot_dir, save_dir=save_dir, trace_orbit_func=trace_orbit_func, ) return res
def test_convertSynthTableToCart(): """ Checks that current day measured cartesian values (with negligbile measurement error) match the true current day cartesian values """ AGE = 40. PARS = np.array([ [0., 0., 0., 0., 0., 0., 10., 5., AGE], ]) STARCOUNTS = [50] #, 30] COMPONENTS = SphereComponent MEASUREMENT_ERROR = 1e-10 # Generate synthetic data synth_data = SynthData( pars=PARS, starcounts=STARCOUNTS, Components=COMPONENTS, measurement_error=MEASUREMENT_ERROR, ) synth_data.synthesise_everything() # Convert (inplace) astrometry to cartesian tabletool.convert_table_astro2cart(synth_data.table) # Check consistency between true current-day kinematics and measured # current-day kinematics (with negliglbe error) for dim in 'XYZUVW': dim_now = dim.lower() + '_now' assert np.allclose(synth_data.table[dim_now], synth_data.table[dim])
def test_swigImplementation(): """ Compares the swigged c implementation against the python one in likelihood.py """ true_comp_mean = np.zeros(6) true_comp_dx = 2. true_comp_dv = 2. true_comp_covmatrix = np.identity(6) true_comp_covmatrix[:3,:3] *= true_comp_dx**2 true_comp_covmatrix[3:,3:] *= true_comp_dv**2 true_comp_age = 1e-10 true_comp = SphereComponent(attributes={ 'mean':true_comp_mean, 'covmatrix':true_comp_covmatrix, 'age':true_comp_age, }) nstars = 100 synth_data = SynthData(pars=true_comp.get_pars(), starcounts=nstars) synth_data.synthesise_everything() tabletool.convert_table_astro2cart(synth_data.table) star_data = tabletool.build_data_dict_from_table(synth_data.table) p_lnos = p_lno(true_comp.get_covmatrix(), true_comp.get_mean(), star_data['covs'], star_data['means']) c_lnos = c_lno(true_comp.get_covmatrix(), true_comp.get_mean(), star_data['covs'], star_data['means'], nstars) assert np.allclose(p_lnos, c_lnos) assert np.isfinite(p_lnos).all() assert np.isfinite(c_lnos).all()
def test_different_component_forms(): """Check component forms can be different""" tiny_age = 1e-10 mean1 = np.zeros(6) covmatrix1 = np.eye(6) * 4 comp1 = SphereComponent(attributes={ 'mean':mean1, 'covmatrix':covmatrix1, 'age':tiny_age, }) mean2 = np.zeros(6) + 10. covmatrix2 = np.eye(6) * 9 comp2 = EllipComponent(attributes={ 'mean':mean2, 'covmatrix':covmatrix2, 'age':tiny_age, }) starcounts = [100,100] synth_data = SynthData(pars=[comp1.get_pars(), comp2.get_pars()], starcounts=starcounts, Components=[SphereComponent, EllipComponent]) synth_data.synthesise_everything() assert len(synth_data.table) == np.sum(starcounts)
def run_fit_helper(true_comp, starcounts, measurement_error, burnin_step=None, run_name='default', trace_orbit_func=None, ): py_vers = sys.version[0] data_filename = 'temp_data/{}_compfitter_{}.fits'.format(py_vers, run_name) log_filename = 'logs/{}_compfitter_{}.log'.format(py_vers, run_name) plot_dir = 'temp_plots/{}_compfitter_{}'.format(py_vers, run_name) save_dir = 'temp_data/' logging.basicConfig(level=logging.INFO, filename=log_filename, filemode='w') synth_data = SynthData(pars=true_comp.get_pars(), starcounts=starcounts, measurement_error=measurement_error) synth_data.synthesise_everything() tabletool.convert_table_astro2cart(synth_data.table, write_table=True, filename=data_filename) res = gf.fit_comp( data=synth_data.table, plot_it=True, burnin_steps=burnin_step, plot_dir=plot_dir, save_dir=save_dir, trace_orbit_func=trace_orbit_func, ) return res
def test_background_component(): """Create artificial association composed of two stars at opposite vertices of unit 6D rectangle. Then base background density distribution on that.""" background_density = 100 # Since the background double the span of data, by setting the means as # follows, the backbround should extend from 0 to 1 in each dimension, # which greatly simplifies reasoning about densities and starcounts. upper_mean = np.zeros(6) + 0.75 lower_mean = np.zeros(6) + 0.25 narrow_dx = 1e-10 narrow_dv = 1e-10 tiny_age = 1e-10 upper_pars = np.hstack((upper_mean, narrow_dx, narrow_dv, tiny_age)) lower_pars = np.hstack((lower_mean, narrow_dx, narrow_dv, tiny_age)) starcounts = [1,1] synth_data = SynthData(pars=[upper_pars, lower_pars], starcounts=starcounts, background_density=background_density) synth_data.generate_all_init_cartesian() means = tabletool.build_data_dict_from_table( synth_data.table[2:], main_colnames=[el+'0' for el in 'xyzuvw'], only_means=True, ) assert np.allclose(0.5, np.mean(means, axis=0), atol=0.1) assert np.allclose(1.0, np.max(means, axis=0), atol=0.1) assert np.allclose(0.0, np.min(means, axis=0), atol=0.1) assert len(synth_data.table) == background_density + 2
def test_swigImplementation(): """ Compares the swigged c implementation against the python one in likelihood.py """ true_comp_mean = np.zeros(6) true_comp_dx = 2. true_comp_dv = 2. true_comp_covmatrix = np.identity(6) true_comp_covmatrix[:3, :3] *= true_comp_dx**2 true_comp_covmatrix[3:, 3:] *= true_comp_dv**2 true_comp_age = 1e-10 true_comp = SphereComponent( attributes={ 'mean': true_comp_mean, 'covmatrix': true_comp_covmatrix, 'age': true_comp_age, }) nstars = 100 synth_data = SynthData(pars=true_comp.get_pars(), starcounts=nstars) synth_data.synthesise_everything() tabletool.convert_table_astro2cart(synth_data.table) star_data = tabletool.build_data_dict_from_table(synth_data.table) p_lnos = p_lno(true_comp.get_covmatrix(), true_comp.get_mean(), star_data['covs'], star_data['means']) c_lnos = c_lno(true_comp.get_covmatrix(), true_comp.get_mean(), star_data['covs'], star_data['means'], nstars) assert np.allclose(p_lnos, c_lnos) assert np.isfinite(p_lnos).all() assert np.isfinite(c_lnos).all()
def test_expectation(): """ Super basic, generates some association stars along with some background stars and checks membership allocation is correct """ age = 1e-5 ass_pars1 = np.array([0, 0, 0, 0, 0, 0, 5., 2., age]) comp1 = SphereComponent(ass_pars1) ass_pars2 = np.array([100., 0, 0, 20, 0, 0, 5., 2., age]) comp2 = SphereComponent(ass_pars2) starcounts = [100,100] synth_data = SynthData(pars=[ass_pars1, ass_pars2], starcounts=starcounts) synth_data.synthesise_everything() tabletool.convert_table_astro2cart(synth_data.table) true_memb_probs = np.zeros((np.sum(starcounts), 2)) true_memb_probs[:starcounts[0], 0] = 1. true_memb_probs[starcounts[0]:, 1] = 1. # star_means, star_covs = tabletool.buildDataFromTable(synth_data.astr_table) # all_lnols = em.getAllLnOverlaps( # synth_data.astr_table, [comp1, comp2] # ) fitted_memb_probs = em.expectation( tabletool.build_data_dict_from_table(synth_data.table), [comp1, comp2] ) assert np.allclose(true_memb_probs, fitted_memb_probs, atol=1e-10)
def test_convertSynthTableToCart(): """ Checks that current day measured cartesian values (with negligbile measurement error) match the true current day cartesian values """ AGE = 40. PARS = np.array([ [0., 0., 0., 0., 0., 0., 10., 5., AGE], ]) STARCOUNTS = [50] #, 30] COMPONENTS = SphereComponent MEASUREMENT_ERROR = 1e-10 # Generate synthetic data synth_data = SynthData(pars=PARS, starcounts=STARCOUNTS, Components=COMPONENTS, measurement_error=MEASUREMENT_ERROR, ) synth_data.synthesise_everything() # Convert (inplace) astrometry to cartesian tabletool.convert_table_astro2cart(synth_data.table) # Check consistency between true current-day kinematics and measured # current-day kinematics (with negliglbe error) for dim in 'XYZUVW': dim_now = dim.lower() + '_now' assert np.allclose(synth_data.table[dim_now], synth_data.table[dim])
def test_pythonFuncs(): """ TODO: remove the requirements of file, have data stored in file? """ true_comp_mean = np.zeros(6) true_comp_dx = 2. true_comp_dv = 2. true_comp_covmatrix = np.identity(6) true_comp_covmatrix[:3, :3] *= true_comp_dx ** 2 true_comp_covmatrix[3:, 3:] *= true_comp_dv ** 2 true_comp_age = 1e-10 true_comp = SphereComponent(attributes={ 'mean': true_comp_mean, 'covmatrix': true_comp_covmatrix, 'age': true_comp_age, }) nstars = 100 synth_data = SynthData(pars=true_comp.get_pars(), starcounts=nstars) synth_data.synthesise_everything() tabletool.convert_table_astro2cart(synth_data.table) star_data = tabletool.build_data_dict_from_table(synth_data.table) # star_data['means'] = star_data['means'] # star_data['covs'] = star_data['covs'] group_mean = true_comp.get_mean() group_cov = true_comp.get_covmatrix() # Test overlap with true component co1s = [] co2s = [] for i, (scov, smn) in enumerate(zip(star_data['covs'], star_data['means'])): co1s.append(co1(group_cov, group_mean, scov, smn)) co2s.append(co2(group_cov, group_mean, scov, smn)) co1s = np.array(co1s) co2s = np.array(co2s) co3s = np.exp(p_lno(group_cov, group_mean, star_data['covs'], star_data['means'])) assert np.allclose(co1s, co2s) assert np.allclose(co2s, co3s) assert np.allclose(co1s, co3s) # Test overlap with neighbouring star (with the aim of testing # tiny overlap values). Note that most overlaps go to 0, but the # log overlaps retain the information co1s = [] co2s = [] for i, (scov, smn) in enumerate(zip(star_data['covs'], star_data['means'])): co1s.append(co1(star_data['covs'][15], star_data['means'][15], scov, smn)) co2s.append(co2(star_data['covs'][15], star_data['means'][15], scov, smn)) co1s = np.array(co1s) co2s = np.array(co2s) lnos = p_lno(star_data['covs'][15], star_data['means'][15], star_data['covs'], star_data['means']) co3s = np.exp(lnos) assert np.allclose(co1s, co2s) assert np.allclose(co2s, co3s) assert np.allclose(co1s, co3s)
def test_execution_simple_fit(): """ Don't test for correctness, but check that everything actually executes """ run_name = 'quickdirty' logging.info(60 * '-') logging.info(15 * '-' + '{:^30}'.format('TEST: ' + run_name) + 15 * '-') logging.info(60 * '-') savedir = 'temp_data/{}_expectmax_{}/'.format(PY_VERS, run_name) mkpath(savedir) data_filename = savedir + '{}_expectmax_{}_data.fits'.format( PY_VERS, run_name) log_filename = 'temp_data/{}_expectmax_{}/log.log'.format( PY_VERS, run_name) logging.basicConfig(level=logging.INFO, filemode='w', filename=log_filename) uniform_age = 1e-10 sphere_comp_pars = np.array([ # X, Y, Z, U, V, W, dX, dV, age, [0, 0, 0, 0, 0, 0, 10., 5, uniform_age], ]) starcount = 100 background_density = 1e-9 ncomps = sphere_comp_pars.shape[0] # true_memb_probs = np.zeros((starcount, ncomps)) # true_memb_probs[:,0] = 1. synth_data = SynthData( pars=sphere_comp_pars, starcounts=[starcount], Components=SphereComponent, background_density=background_density, ) synth_data.synthesise_everything() tabletool.convert_table_astro2cart(synth_data.table) background_count = len(synth_data.table) - starcount # insert background densities synth_data.table['background_log_overlap'] =\ len(synth_data.table) * [np.log(background_density)] synth_data.table.write(data_filename, overwrite=True) origins = [SphereComponent(pars) for pars in sphere_comp_pars] best_comps, med_and_spans, memb_probs = \ expectmax.fit_many_comps(data=synth_data.table, ncomps=ncomps, rdir=savedir, burnin=10, sampling_steps=10, trace_orbit_func=dummy_trace_orbit_func, use_background=True, ignore_stable_comps=False, max_em_iterations=200)
def test_generateInitXYZUVW(): """Check that the mean of initial xyzuvw of stars matches that of the initialising component""" starcounts = (int(1e6), ) sd = SynthData(pars=PARS[:1], starcounts=starcounts, Components=COMPONENTS) sd.generate_all_init_cartesian() comp = SphereComponent(PARS[0]) init_xyzuvw = sd.extract_data_as_array([dim + '0' for dim in 'xyzuvw']) assert np.allclose(comp.get_mean(), np.mean(init_xyzuvw, axis=0), atol=0.1)
def test_generateInitXYZUVW(): """Check that the mean of initial xyzuvw of stars matches that of the initialising component""" starcounts = (int(1e6),) sd = SynthData(pars=PARS[:1], starcounts=starcounts, Components=COMPONENTS) sd.generate_all_init_cartesian() comp = SphereComponent(PARS[0]) init_xyzuvw = sd.extract_data_as_array([dim + '0' for dim in 'xyzuvw']) assert np.allclose(comp.get_mean(), np.mean(init_xyzuvw, axis=0), atol=0.1)
def test_lnprob_func(): """ Generates two components. Generates a synthetic data set based on the first component. Confrims that the lnprob is larger for the first component than the second. """ measurement_error = 1e-10 star_count = 500 tiny_age = 1e-10 dim = 6 comp_covmatrix = np.identity(dim) comp_means = { 'comp1': np.zeros(dim), 'comp2': 10 * np.ones(dim) } comps = {} data = {} for comp_name in comp_means.keys(): comp = SphereComponent(attributes={ 'mean':comp_means[comp_name], 'covmatrix':comp_covmatrix, 'age':tiny_age }) synth_data = SynthData(pars=[comp.get_pars()], starcounts=star_count, measurement_error=measurement_error) synth_data.synthesise_everything() tabletool.convert_table_astro2cart(synth_data.table) data[comp_name] = tabletool.build_data_dict_from_table(synth_data.table) comps[comp_name] = comp lnprob_comp1_data1 = likelihood.lnprob_func(pars=comps['comp1'].get_pars(), data=data['comp1']) lnprob_comp2_data1 = likelihood.lnprob_func(pars=comps['comp2'].get_pars(), data=data['comp1']) lnprob_comp1_data2 = likelihood.lnprob_func(pars=comps['comp1'].get_pars(), data=data['comp2']) lnprob_comp2_data2 = likelihood.lnprob_func(pars=comps['comp2'].get_pars(), data=data['comp2']) print(lnprob_comp1_data1) print(lnprob_comp2_data1) print(lnprob_comp1_data2) print(lnprob_comp2_data2) assert lnprob_comp1_data1 > lnprob_comp2_data1 assert lnprob_comp2_data2 > lnprob_comp1_data2 # Check that the different realisations only differ by 20% assert np.isclose(lnprob_comp1_data1, lnprob_comp2_data2, rtol=2e-1) assert np.isclose(lnprob_comp1_data2, lnprob_comp2_data1, rtol=2e-1)
def test_measureXYZUVW(): """Check measurements of xyzuvw_now to astrometry occur properly. Will use extremely dense component as case study as this ensures stars all have more or less the same true values""" compact_comp_pars = np.copy(PARS[0]) compact_comp_pars[6] = 1e-15 compact_comp_pars[7] = 1e-15 compact_comp_pars[8] = 1e-15 starcounts = [1000] sd = SynthData(pars=np.array([compact_comp_pars]), starcounts=starcounts, Components=COMPONENTS) sd.generate_all_init_cartesian() sd.project_stars() sd.measure_astrometry() for colname in SynthData.DEFAULT_ASTR_COLNAMES: assert np.allclose(sd.GERROR[colname + '_error'], sd.table[colname + '_error']) # Check spread of data is similar to Gaia error, we use # a large tolerance so a small number of stars can be used assert np.isclose(sd.GERROR[colname + '_error'], np.std(sd.table[colname]), rtol=1e-1)
def test_artificialMeasurement(): """Ensure that scaling the measurement uncertainty scales the reported uncertainties appropriately, and that offsets in data due to error scale with input error""" pars = PARS[:1] starcounts = [100] sd_dict = {} names = ['perf', 'good', 'norm', 'bad'] m_err_dict = { 'perf':1e-10, 'good':1e-1, 'norm':1.0, 'bad':1e1, } for name in names: np.random.seed(1) sd = SynthData(pars=pars, starcounts=starcounts, measurement_error=m_err_dict[name], Components=COMPONENTS) sd.synthesise_everything() sd_dict[name] = sd # Assert that measurement errors are stored correctly in columns for name in names[1:]: assert np.allclose( sd_dict[name].table['radial_velocity_error'], m_err_dict[name]*SynthData.GERROR['radial_velocity_error'] ) # Get reference for degree of offset expected norm_offset = np.mean( np.abs(sd_dict['perf'].table['radial_velocity'] - sd_dict['norm'].table['radial_velocity']) ) bad_offset = np.mean( np.abs(sd_dict['perf'].table['radial_velocity'] - sd_dict['bad'].table['radial_velocity']) ) good_offset = np.mean( np.abs(sd_dict['perf'].table['radial_velocity'] - sd_dict['good'].table['radial_velocity']) ) # Check the average offset scales with incorporated measurement error assert np.isclose(norm_offset*m_err_dict['bad'], bad_offset) assert np.isclose(norm_offset*m_err_dict['good'], good_offset)
def test_lnprob_func(): """ Generates two components. Generates a synthetic data set based on the first component. Confrims that the lnprob is larger for the first component than the second. """ measurement_error = 1e-10 star_count = 500 tiny_age = 1e-10 dim = 6 comp_covmatrix = np.identity(dim) comp_means = { 'comp1': np.zeros(dim), 'comp2': 10 * np.ones(dim) } comps = {} data = {} for comp_name in comp_means.keys(): comp = SphereComponent(attributes={ 'mean':comp_means[comp_name], 'covmatrix':comp_covmatrix, 'age':tiny_age }) synth_data = SynthData(pars=[comp.get_pars()], starcounts=star_count, measurement_error=measurement_error) synth_data.synthesise_everything() tabletool.convert_table_astro2cart(synth_data.table) data[comp_name] = tabletool.build_data_dict_from_table(synth_data.table) comps[comp_name] = comp lnprob_comp1_data1 = likelihood.lnprob_func(pars=comps['comp1'].get_pars(), data=data['comp1']) lnprob_comp2_data1 = likelihood.lnprob_func(pars=comps['comp2'].get_pars(), data=data['comp1']) lnprob_comp1_data2 = likelihood.lnprob_func(pars=comps['comp1'].get_pars(), data=data['comp2']) lnprob_comp2_data2 = likelihood.lnprob_func(pars=comps['comp2'].get_pars(), data=data['comp2']) assert lnprob_comp1_data1 > lnprob_comp2_data1 assert lnprob_comp2_data2 > lnprob_comp1_data2 # Check that the different realisations only differ by 10% assert np.isclose(lnprob_comp1_data1, lnprob_comp2_data2, rtol=1e-1) assert np.isclose(lnprob_comp1_data2, lnprob_comp2_data1, rtol=1e-1)
def test_initialisation(): """Basic sanity check to see if things start off ok""" sd = SynthData(pars=PARS, starcounts=STARCOUNTS, Components=COMPONENTS) assert np.allclose(PARS, sd.pars) assert sd.ncomps == len(PARS) assert np.allclose(PARS[0], sd.components[0].get_pars()) assert np.allclose(np.array(STARCOUNTS), sd.starcounts) sd2 = SynthData(pars=PARS[0], starcounts=STARCOUNTS[0], Components=COMPONENTS) assert np.allclose(np.array([STARCOUNTS[0]]), sd2.starcounts) starcounts = 50. sd3 = SynthData(pars=PARS[0], starcounts=starcounts, Components=COMPONENTS) assert np.allclose(np.array([np.int(starcounts)]), sd3.starcounts)
def test_multiple_synth_components(): """Check initialising with multiple components works""" age = 1e-10 dx = 5. dv = 2. ass_pars1 = np.array([10, 20, 30, 40, 50, 60, dx, dv, age]) comp1 = SphereComponent(ass_pars1) ass_pars2 = np.array([0., 0., 0, 0, 0, 0, dx, dv, age]) comp2 = SphereComponent(ass_pars2) starcounts = [100, 100] try: synth_data = SynthData(pars=[ass_pars1, ass_pars2], starcounts=starcounts[0], Components=SphereComponent) raise UserWarning('AssertionError should have been thrown by synthdata') except AssertionError: pass synth_data = SynthData(pars=[ass_pars1, ass_pars2], starcounts=starcounts, Components=SphereComponent) synth_data.synthesise_everything() assert len(synth_data.table) == np.sum(starcounts) means = tabletool.build_data_dict_from_table( synth_data.table, main_colnames=[el+'0' for el in 'xyzuvw'], only_means=True ) assert np.allclose(comp2.get_mean(), means[starcounts[0]:].mean(axis=0), atol=2.) assert np.allclose(comp1.get_mean(), means[:starcounts[0]].mean(axis=0), atol=2.)
def test_projectStars(): """Check that the mean of stars after projection matches the mean of the component after projection""" starcounts = (int(1e3),) sd = SynthData(pars=PARS[:1], starcounts=starcounts, Components=COMPONENTS) sd.generate_all_init_cartesian() sd.project_stars() comp_mean_now, comp_covmatrix_now = \ sd.components[0].get_currentday_projection() final_xyzuvw = sd.extract_data_as_array([dim + '_now' for dim in 'xzyuvw']) assert np.allclose(comp_mean_now, final_xyzuvw.mean(axis=0), atol=1.)
def test_maximisation_gradient_descent_with_multiprocessing_tech(): """ Added by MZ 2020 - 07 - 13 Test if maximisation works when using gradient descent and multiprocessing. NOTE: this is not a test if maximisation returns appropriate results but it only tests if the code runs withour errors. This is mainly to test multiprocessing. """ age = 1e-5 ass_pars1 = np.array([0, 0, 0, 0, 0, 0, 5., 2., age]) comp1 = SphereComponent(ass_pars1) starcounts = [100,] synth_data = SynthData(pars=[ass_pars1,], starcounts=starcounts) synth_data.synthesise_everything() tabletool.convert_table_astro2cart(synth_data.table) true_memb_probs = np.zeros((np.sum(starcounts), 1)) true_memb_probs[:starcounts[0], 0] = 1. #~ true_memb_probs[starcounts[0]:, 1] = 1. ncomps = len(starcounts) noise = np.random.rand(ass_pars1.shape[0])*5 all_init_pars = [ass_pars1 + noise] new_comps, all_samples, _, all_init_pos, success_mask =\ expectmax.maximisation(synth_data.table, ncomps, true_memb_probs, 100, 'iter00', all_init_pars, optimisation_method='Nelder-Mead', nprocess_ncomp=True, )
def test_storeTable(): """Check storing table and loading works""" filename = 'temp_data/test_storeTable_output.fits' sd = SynthData(pars=PARS, starcounts=STARCOUNTS, Components=COMPONENTS) sd.synthesise_everything() sd.store_table(filename=filename, overwrite=True) stored_table = Table.read(filename) assert np.allclose(sd.table['parallax'], stored_table['parallax'])
def test_storeAndLoad(): """Check that storing and loading works as expected""" filename = 'temp_data/test_synthesiseEverything_output.fits' sd = SynthData(pars=PARS, starcounts=STARCOUNTS, Components=COMPONENTS) sd.synthesise_everything(filename=filename, overwrite=True) # Trying to store table at `filename` without overwrite throws error try: sd.synthesise_everything(filename=filename, overwrite=False) except IOError: pass
def test_fit_one_comp_with_background(): """ Synthesise a file with negligible error, retrieve initial parameters Takes a while... Parameters ---------- """ run_name = 'background' logging.info(60 * '-') logging.info(15 * '-' + '{:^30}'.format('TEST: ' + run_name) + 15 * '-') logging.info(60 * '-') savedir = 'temp_data/{}_expectmax_{}/'.format(PY_VERS, run_name) mkpath(savedir) data_filename = savedir + '{}_expectmax_{}_data.fits'.format( PY_VERS, run_name) log_filename = 'temp_data/{}_expectmax_{}/log.log'.format( PY_VERS, run_name) logging.basicConfig(level=logging.INFO, filemode='w', filename=log_filename) uniform_age = 1e-10 sphere_comp_pars = np.array([ # X, Y, Z, U, V, W, dX, dV, age, [0, 0, 0, 0, 0, 0, 10., 5, uniform_age], ]) starcount = 200 background_density = 1e-9 ncomps = sphere_comp_pars.shape[0] # true_memb_probs = np.zeros((starcount, ncomps)) # true_memb_probs[:,0] = 1. synth_data = SynthData( pars=sphere_comp_pars, starcounts=[starcount], Components=SphereComponent, background_density=background_density, ) synth_data.synthesise_everything() tabletool.convert_table_astro2cart(synth_data.table) background_count = len(synth_data.table) - starcount logging.info('Generated {} background stars'.format(background_count)) # insert background densities synth_data.table['background_log_overlap'] =\ len(synth_data.table) * [np.log(background_density)] synth_data.table.write(data_filename, overwrite=True) origins = [SphereComponent(pars) for pars in sphere_comp_pars] best_comps, med_and_spans, memb_probs = \ expectmax.fit_many_comps(data=synth_data.table, ncomps=ncomps, rdir=savedir, burnin=500, sampling_steps=5000, trace_orbit_func=dummy_trace_orbit_func, use_background=True, ignore_stable_comps=False, max_em_iterations=200) # return best_comps, med_and_spans, memb_probs # Check parameters are close assert np.allclose(sphere_comp_pars, best_comps[0].get_pars(), atol=1.5) # Check most assoc members are correctly classified recovery_count_threshold = 0.95 * starcount recovery_count_actual = np.sum(memb_probs[:starcount, 0] > 0.5) assert recovery_count_threshold < recovery_count_actual # Check most background stars are correctly classified # Number of bg stars classified as members should be less than 5% # of all background stars contamination_count_threshold = 0.05 * len(memb_probs[starcount:]) contamination_count_actual = np.sum(memb_probs[starcount:, 0] > 0.5) assert contamination_count_threshold > contamination_count_actual # Check reported membership probabilities are consistent with recovery # rate (within 5%) mean_membership_confidence = np.mean(memb_probs[:starcount, 0]) assert np.isclose(recovery_count_actual / starcount, mean_membership_confidence, atol=0.05)
if (config.config['data_savefile'] != '' and os.path.isfile(config.config['data_savefile'])): log_message('Loading pre-prepared data') datafile = config.config['data_savefile'] data_table = tabletool.load(datafile) historical = 'c_XU' in data_table.colnames # Otherwise, perform entire process else: # Construct synthetic data if required if config.synth is not None: log_message('Getting synthetic data') datafile = config.config['data_savefile'] if not os.path.exists(datafile) and config.config['pickup_prev_run']: synth_data = SynthData(pars=config.synth['pars'], starcounts=config.synth['starcounts'], Components=Component) synth_data.synthesise_everything(filename=datafile, overwrite=True) np.save(rdir + 'true_synth_pars.npy', config.synth['pars']) np.save(rdir + 'true_synth_starcounts.npy', config.synth['starcounts']) else: log_message('Synthetic data already exists') else: datafile = config.config['data_loadfile'] assert os.path.exists(datafile) # Read in data as table log_message('Read data into table') data_table = tabletool.read(datafile)
def test_fit_many_comps(): """ Synthesise a file with negligible error, retrieve initial parameters Takes a while... maybe this belongs in integration unit_tests """ run_name = 'stationary' logging.info(60 * '-') logging.info(15 * '-' + '{:^30}'.format('TEST: ' + run_name) + 15 * '-') logging.info(60 * '-') savedir = 'temp_data/{}_expectmax_{}/'.format(PY_VERS, run_name) mkpath(savedir) data_filename = savedir + '{}_expectmax_{}_data.fits'.format( PY_VERS, run_name) log_filename = 'temp_data/{}_expectmax_{}/log.log'.format( PY_VERS, run_name) logging.basicConfig(level=logging.INFO, filemode='w', filename=log_filename) uniform_age = 1e-10 sphere_comp_pars = np.array([ # X, Y, Z, U, V, W, dX, dV, age, [-50, -50, -50, 0, 0, 0, 10., 5, uniform_age], [50, 50, 50, 0, 0, 0, 10., 5, uniform_age], ]) starcounts = [20, 50] ncomps = sphere_comp_pars.shape[0] # initialise z appropriately true_memb_probs = np.zeros((np.sum(starcounts), ncomps)) start = 0 for i in range(ncomps): true_memb_probs[start:start + starcounts[i], i] = 1.0 start += starcounts[i] # Initialise some random membership probablities # Normalising such that each row sums to 1 init_memb_probs = np.random.rand(np.sum(starcounts), ncomps) init_memb_probs = (init_memb_probs.T / init_memb_probs.sum(axis=1)).T synth_data = SynthData( pars=sphere_comp_pars, starcounts=starcounts, Components=SphereComponent, ) synth_data.synthesise_everything() tabletool.convert_table_astro2cart(synth_data.table, write_table=True, filename=data_filename) origins = [SphereComponent(pars) for pars in sphere_comp_pars] best_comps, med_and_spans, memb_probs = \ expectmax.fit_many_comps(data=synth_data.table, ncomps=ncomps, rdir=savedir, init_memb_probs=init_memb_probs, trace_orbit_func=dummy_trace_orbit_func, ignore_stable_comps=False) perm = expectmax.get_best_permutation(memb_probs, true_memb_probs) logging.info('Best permutation is: {}'.format(perm)) assert np.allclose(true_memb_probs, memb_probs[:, perm]) for origin, best_comp in zip(origins, np.array(best_comps)[perm, ]): assert (isinstance(origin, SphereComponent) and isinstance(best_comp, SphereComponent)) o_pars = origin.get_pars() b_pars = best_comp.get_pars() logging.info("origin pars: {}".format(o_pars)) logging.info("best fit pars: {}".format(b_pars)) assert np.allclose(origin.get_mean(), best_comp.get_mean(), atol=5.) assert np.allclose(origin.get_sphere_dx(), best_comp.get_sphere_dx(), atol=2.) assert np.allclose(origin.get_sphere_dv(), best_comp.get_sphere_dv(), atol=2.) assert np.allclose(origin.get_age(), best_comp.get_age(), atol=1.)
def test_pythonFuncs(): """ TODO: remove the requirements of file, have data stored in file? """ true_comp_mean = np.zeros(6) true_comp_dx = 2. true_comp_dv = 2. true_comp_covmatrix = np.identity(6) true_comp_covmatrix[:3, :3] *= true_comp_dx**2 true_comp_covmatrix[3:, 3:] *= true_comp_dv**2 true_comp_age = 1e-10 true_comp = SphereComponent( attributes={ 'mean': true_comp_mean, 'covmatrix': true_comp_covmatrix, 'age': true_comp_age, }) nstars = 100 synth_data = SynthData(pars=true_comp.get_pars(), starcounts=nstars) synth_data.synthesise_everything() tabletool.convert_table_astro2cart(synth_data.table) star_data = tabletool.build_data_dict_from_table(synth_data.table) # star_data['means'] = star_data['means'] # star_data['covs'] = star_data['covs'] group_mean = true_comp.get_mean() group_cov = true_comp.get_covmatrix() # Test overlap with true component co1s = [] co2s = [] for i, (scov, smn) in enumerate(zip(star_data['covs'], star_data['means'])): co1s.append(co1(group_cov, group_mean, scov, smn)) co2s.append(co2(group_cov, group_mean, scov, smn)) co1s = np.array(co1s) co2s = np.array(co2s) co3s = np.exp( p_lno(group_cov, group_mean, star_data['covs'], star_data['means'])) assert np.allclose(co1s, co2s) assert np.allclose(co2s, co3s) assert np.allclose(co1s, co3s) # Test overlap with neighbouring star (with the aim of testing # tiny overlap values). Note that most overlaps go to 0, but the # log overlaps retain the information co1s = [] co2s = [] for i, (scov, smn) in enumerate(zip(star_data['covs'], star_data['means'])): co1s.append( co1(star_data['covs'][15], star_data['means'][15], scov, smn)) co2s.append( co2(star_data['covs'][15], star_data['means'][15], scov, smn)) co1s = np.array(co1s) co2s = np.array(co2s) lnos = p_lno(star_data['covs'][15], star_data['means'][15], star_data['covs'], star_data['means']) co3s = np.exp(lnos) assert np.allclose(co1s, co2s) assert np.allclose(co2s, co3s) assert np.allclose(co1s, co3s)
from chronostar.synthdata import SynthData from chronostar import tabletool from chronostar import compfitter if __name__ == '__main__': logging.basicConfig(level=logging.INFO, filename='compfitter.log') save_dir = '' group_savefile = save_dir + 'origins_stat.npy' xyzuvw_init_savefile = save_dir + 'xyzuvw_init_stat.npy' astro_savefile = save_dir + 'astro_table_stat.txt' xyzuvw_conv_savefile = save_dir + 'xyzuvw_conv_stat.fits' pars = np.array([0., 0., 0., 0., 0., 0., 5., 2., 1e-8]) starcount = 100 error_frac = 1. synth_data = SynthData(pars=pars, starcounts=starcount) synth_data.synthesise_everything() tabletool.convert_table_astro2cart(synth_data.table) data = tabletool.build_data_dict_from_table(synth_data.table) stat_file = 'compfitter.stat' # best_fit, chain, lnprob = \ cProfile.run( "compfitter.fit_comp(data=data, plot_it=True," "convergence_tol=2., burnin_steps=400, plot_dir=''," "save_dir='')", stat_file, ) stat = pstats.Stats(stat_file) stat.sort_stats('cumtime')
my_free_pars[7] = 2 # dV or dU? km/s my_free_pars[8] = 2 # dV or dU? km/s my_free_pars[9] = 0.9 # Set quaternians to 1(?) my_free_pars[10] = 0.3 my_free_pars[11] = 0.7 my_free_pars[12] = 0.5 my_free_pars[13] = XU_CORR # Set XU correlation to XU_CORR my_free_pars[-1] = 19. # Age in Myr print('my_free_pars', my_free_pars) # my_free_comp = EllipComponent(pars=my_free_pars) # # print('my_free_comp', my_free_comp) my_synth_data = SynthData(pars=my_free_pars, starcounts=NSTARS, Components=EllipComponent) #~ my_synth_data.generate_all_init_cartesian() my_synth_data.synthesise_everything() # Don't actually need everything # mean_colnames = [el for el in 'XYZUVW'] # mean_colnames = [el+'0' for el in 'xyzuvw'] # Use this for initial star positions mean_colnames = [el + '_now' for el in 'xyzuvw' ] # Use this for current day star positions, # will need to uncomment synthesise_everything() though means = tt.build_data_dict_from_table( my_synth_data.table[:], main_colnames=mean_colnames, only_means=True, )
# and the data prep has already been done if (config.config['data_savefile'] != '' and os.path.isfile(config.config['data_savefile'])): log_message('Loading pre-prepared data') data_table = tabletool.load(config.config['data_savefile']) historical = 'c_XU' in data_table.colnames # Otherwise, perform entire process else: # Construct synthetic data if required datafile = config.config['data_loadfile'] if config.synth is not None: log_message('Getting synthetic data') if not os.path.exists(datafile) and config.config['pickup_prev_run']: synth_data = SynthData(pars=config.synth['pars'], starcounts=config.synth['starcounts'], Components=Component) synth_data.synthesise_everything(filename=datafile, overwrite=True) else: log_message('Synthetic data already exists') assert os.path.exists(datafile) # Read in data as table log_message('Read data into table') data_table = tabletool.read(datafile) historical = 'c_XU' in data_table.colnames # If data cuts provided, then apply them if config.config['banyan_assoc_name'] != '':
def run_fit_helper(true_comp, starcounts, measurement_error, burnin_step=None, run_name='default', trace_orbit_func=None, Component=EllipComponent, init_pars=None): py_vers = sys.version[0] save_dir = 'temp_data/%s_compfitter_%s/' % (py_vers, run_name) data_filename = save_dir + 'synth_data.fits'.format(py_vers, run_name) plot_dir = save_dir print("---------", save_dir) if not os.path.isdir(save_dir): os.mkdir(save_dir) log_filename = save_dir + 'log.log'.format(py_vers, run_name) logging.basicConfig(level=logging.INFO, filename=log_filename, filemode='w') synth_data = SynthData(pars=true_comp.get_pars(), starcounts=starcounts, measurement_error=measurement_error, Components=Component) synth_data.synthesise_everything() tabletool.convert_table_astro2cart(synth_data.table, write_table=True, filename=data_filename) print("newPars ------------------------------ \n", init_pars) if init_pars is None: internal_pars = None else: internal_pars = Component.internalise(init_pars) res = cf.fit_comp(data=synth_data.table, plot_it=True, burnin_steps=burnin_step, store_burnin_chains=True, plot_dir=plot_dir, save_dir=save_dir, trace_orbit_func=trace_orbit_func, optimisation_method='emcee', Component=Component, init_pars=internal_pars) comps_filename = save_dir + 'true_and_best_comp.py' best_comp = res[0] EllipComponent.store_raw_components(comps_filename, [true_comp, best_comp]) star_pars = tabletool.build_data_dict_from_table(synth_data.table) plot_results(true_comp, best_fit_comp=res[0], star_pars=star_pars, plt_dir=save_dir) return res
def test_synthesiseEverything(): """Check everything goes to plan with single call""" sd = SynthData(pars=PARS, starcounts=STARCOUNTS, Components=COMPONENTS) sd.synthesise_everything() assert np.isclose(np.sum(STARCOUNTS), len(sd.table))
from chronostar.synthdata import SynthData from chronostar import tabletool from chronostar import compfitter if __name__ == '__main__': logging.basicConfig(level=logging.INFO, filename='temp_logs/groupfitter.log') save_dir = 'temp_data/' group_savefile = save_dir + 'origins_stat.npy' xyzuvw_init_savefile = save_dir + 'xyzuvw_init_stat.npy' astro_savefile = save_dir + 'astro_table_stat.txt' xyzuvw_conv_savefile = save_dir + 'xyzuvw_conv_stat.fits' pars = np.array([0., 0., 0., 0., 0., 0., 5., 2., 1e-8]) starcount = 100 error_frac = 1. synth_data = SynthData(pars=pars, starcounts=starcount) synth_data.synthesise_everything() tabletool.convert_table_astro2cart(synth_data.table) data = tabletool.build_data_dict_from_table(synth_data.table) stat_file = 'stat_dumps/groupfitter.stat' # best_fit, chain, lnprob = \ cProfile.run( "groupfitter.fit_comp(data=data, plot_it=True," "convergence_tol=2., burnin_steps=400, plot_dir='temp_plots/'," "save_dir='temp_data/')", stat_file, ) stat = pstats.Stats(stat_file) stat.sort_stats('cumtime')
def test_fit_stability_mixed_comps(): """ Have a fit with some iterations that have a mix of stable and unstable comps. TODO: Maybe give 2 similar comps tiny age but overlapping origins """ run_name = 'mixed_stability' logging.info(60 * '-') logging.info(15 * '-' + '{:^30}'.format('TEST: ' + run_name) + 15 * '-') logging.info(60 * '-') savedir = 'temp_data/{}_expectmax_{}/'.format(PY_VERS, run_name) mkpath(savedir) data_filename = savedir + '{}_expectmax_{}_data.fits'.format( PY_VERS, run_name) log_filename = 'temp_data/{}_expectmax_{}/log.log'.format( PY_VERS, run_name) logging.basicConfig(level=logging.INFO, filemode='w', filename=log_filename) shared_cd_mean = np.zeros(6) tiny_age = 0.1 medium_age = 10. # origin_1 = traceorbit.trace_cartesian_orbit(shared_cd_mean, times=-medium_age) # origin_2 = traceorbit.trace_cartesian_orbit(shared_cd_mean, times=-2*medium_age) # # cd_mean_3 = np.array([-200,200,0,0,50,0.]) # origin_3 = traceorbit.trace_cartesian_orbit(cd_mean_3, times=-tiny_age) # # sphere_comp_pars = np.array([ # # X, Y, Z, U, V, W, dX, dV, age, # np.hstack((origin_1, 10., 5., medium_age)), # Next two comps share a current day origin # np.hstack((origin_2, 10., 5., 2*medium_age)), # so hopefully will need several iterations to\ # # disentangle # np.hstack((origin_3, 10., 5., tiny_age)), # a distinct comp that is stable quickly # ]) uniform_age = 1e-10 sphere_comp_pars = np.array([ # X, Y, Z, U, V, W, dX, dV, age, [50, 0, 0, 0, 50, 0, 10., 5, uniform_age], # Very distant (and stable) comp [0, -20, 0, 0, -5, 0, 10., 5, uniform_age], # Overlapping comp 1 [0, 20, 0, 0, 5, 0, 10., 5, uniform_age], # Overlapping comp 2 ]) starcounts = [50, 100, 200] ncomps = sphere_comp_pars.shape[0] # initialise z appropriately true_memb_probs = np.zeros((np.sum(starcounts), ncomps)) start = 0 for i in range(ncomps): true_memb_probs[start:start + starcounts[i], i] = 1.0 start += starcounts[i] # Initialise some random membership probablities # which will serve as our starting guess init_memb_probs = np.random.rand(np.sum(starcounts), ncomps) # To aid a component in quickly becoming stable, initialse the memberships # correclty for stars belonging to this component init_memb_probs[:starcounts[0]] = 0. init_memb_probs[:starcounts[0], 0] = 1. init_memb_probs[starcounts[0]:, 0] = 0. # Normalising such that each row sums to 1 init_memb_probs = (init_memb_probs.T / init_memb_probs.sum(axis=1)).T synth_data = SynthData( pars=sphere_comp_pars, starcounts=starcounts, Components=SphereComponent, ) synth_data.synthesise_everything() tabletool.convert_table_astro2cart(synth_data.table, write_table=True, filename=data_filename) origins = [SphereComponent(pars) for pars in sphere_comp_pars] SphereComponent.store_raw_components(savedir + 'origins.npy', origins) best_comps, med_and_spans, memb_probs = \ expectmax.fit_many_comps(data=synth_data.table, ncomps=ncomps, rdir=savedir, init_memb_probs=init_memb_probs, trace_orbit_func=dummy_trace_orbit_func, ignore_stable_comps=True) perm = expectmax.get_best_permutation(memb_probs, true_memb_probs) logging.info('Best permutation is: {}'.format(perm)) # Calculate the membership difference, we divide by 2 since # incorrectly allocated stars are double counted total_diff = 0.5 * np.sum(np.abs(true_memb_probs - memb_probs[:, perm])) # Assert that expected membership is less than 10% assert total_diff < 0.1 * np.sum(starcounts) for origin, best_comp in zip(origins, np.array(best_comps)[perm, ]): assert (isinstance(origin, SphereComponent) and isinstance(best_comp, SphereComponent)) o_pars = origin.get_pars() b_pars = best_comp.get_pars() logging.info("origin pars: {}".format(o_pars)) logging.info("best fit pars: {}".format(b_pars)) assert np.allclose(origin.get_mean(), best_comp.get_mean(), atol=5.) assert np.allclose(origin.get_sphere_dx(), best_comp.get_sphere_dx(), atol=2.) assert np.allclose(origin.get_sphere_dv(), best_comp.get_sphere_dv(), atol=2.) assert np.allclose(origin.get_age(), best_comp.get_age(), atol=1.)
def test_fit_many_comps(): """ Synthesise a file with negligible error, retrieve initial parameters Takes a while... maybe this belongs in integration unit_tests """ run_name = 'stationary' savedir = 'temp_data/{}_expectmax_{}/'.format(PY_VERS, run_name) mkpath(savedir) data_filename = savedir + '{}_expectmax_{}_data.fits'.format(PY_VERS, run_name) # log_filename = 'temp_data/{}_expectmax_{}/log.log'.format(PY_VERS, # run_name) logging.basicConfig(level=logging.INFO, filemode='w', filename=log_filename) uniform_age = 1e-10 sphere_comp_pars = np.array([ # X, Y, Z, U, V, W, dX, dV, age, [-50,-50,-50, 0, 0, 0, 10., 5, uniform_age], [ 50, 50, 50, 0, 0, 0, 10., 5, uniform_age], ]) starcounts = [200,200] ncomps = sphere_comp_pars.shape[0] # initialise z appropriately # start = 0 # for i in range(ngroups): # nstars_in_group = int(group_pars[i,-1]) # z[start:start+nstars_in_group,i] = 1.0 # start += nstars_in_group true_memb_probs = np.zeros((np.sum(starcounts), ncomps)) true_memb_probs[:200,0] = 1. true_memb_probs[200:,1] = 1. synth_data = SynthData(pars=sphere_comp_pars, starcounts=starcounts, Components=SphereComponent, ) synth_data.synthesise_everything() tabletool.convert_table_astro2cart(synth_data.table, write_table=True, filename=data_filename) origins = [SphereComponent(pars) for pars in sphere_comp_pars] best_comps, med_and_spans, memb_probs = \ expectmax.fit_many_comps(data=synth_data.table, ncomps=ncomps, rdir=savedir, trace_orbit_func=dummy_trace_orbit_func, ) # compare fit with input try: assert np.allclose(true_memb_probs, memb_probs) except AssertionError: # If not close, check if flipping component order fixes things memb_probs = memb_probs[:,::-1] best_comps = best_comps[::-1] assert np.allclose(true_memb_probs, memb_probs) for origin, best_comp in zip(origins, best_comps): assert (isinstance(origin, SphereComponent) and isinstance(best_comp, SphereComponent)) o_pars = origin.get_pars() b_pars = best_comp.get_pars() logging.info("origin pars: {}".format(o_pars)) logging.info("best fit pars: {}".format(b_pars)) assert np.allclose(origin.get_mean(), best_comp.get_mean(), atol=5.) assert np.allclose(origin.get_sphere_dx(), best_comp.get_sphere_dx(), atol=2.) assert np.allclose(origin.get_sphere_dv(), best_comp.get_sphere_dv(), atol=2.) assert np.allclose(origin.get_age(), best_comp.get_age(), atol=1.)
def test_2comps_and_background(): """ Synthesise a file with negligible error, retrieve initial parameters Takes a while... maybe this belongs in integration unit_tests Performance of test is a bit tricky to callibrate. Since we are skipping any temporal evolution for speed reasons, we model two isotropic Gaussians. Now if these Gaussians are too far apart, NaiveFit will gravitate to one of the Gaussians during the 1 component fit, and then struggle to discover the second Gaussian. If the Gaussians are too close, then both will be characteresied by the 1 component fit, and the BIC will decide two Gaussians components are overkill. I think I've addressed this by having the two groups have large number of stars. """ using_bg = True run_name = '2comps_and_background' logging.info(60 * '-') logging.info(15 * '-' + '{:^30}'.format('TEST: ' + run_name) + 15 * '-') logging.info(60 * '-') savedir = 'temp_data/{}_naive_{}/'.format(PY_VERS, run_name) mkpath(savedir) data_filename = savedir + '{}_naive_{}_data.fits'.format(PY_VERS, run_name) log_filename = 'temp_data/{}_naive_{}/log.log'.format(PY_VERS, run_name) logging.basicConfig(level=logging.INFO, filemode='w', filename=log_filename) ### INITIALISE SYNTHETIC DATA ### # DON'T CHANGE THE AGE! BECAUSE THIS TEST DOESN'T USE ANY ORBIT INTEGRATION!!! # Note: if peaks are too far apart, it will be difficult for # chronostar to identify the 2nd when moving from a 1-component # to a 2-component fit. uniform_age = 1e-10 sphere_comp_pars = np.array([ # X, Y, Z, U, V, W, dX, dV, age, [0, 0, 0, 0, 0, 0, 10., 5, uniform_age], [30, 0, 0, 0, 5, 0, 10., 5, uniform_age], ]) starcounts = [100, 150] ncomps = sphere_comp_pars.shape[0] nstars = np.sum(starcounts) background_density = 1e-9 # initialise z appropriately true_memb_probs = np.zeros((np.sum(starcounts), ncomps)) start = 0 for i in range(ncomps): true_memb_probs[start:start + starcounts[i], i] = 1.0 start += starcounts[i] try: # Check if the synth data has already been constructed data_dict = tabletool.build_data_dict_from_table(data_filename) except: synth_data = SynthData( pars=sphere_comp_pars, starcounts=starcounts, Components=SphereComponent, background_density=background_density, ) synth_data.synthesise_everything() tabletool.convert_table_astro2cart(synth_data.table, write_table=True, filename=data_filename) background_count = len(synth_data.table) - np.sum(starcounts) # insert background densities synth_data.table['background_log_overlap'] =\ len(synth_data.table) * [np.log(background_density)] synth_data.table.write(data_filename, overwrite=True) origins = [SphereComponent(pars) for pars in sphere_comp_pars] ### SET UP PARAMETER FILE ### fit_pars = { 'results_dir': savedir, 'data_table': data_filename, 'trace_orbit_func': 'dummy_trace_orbit_func', 'return_results': True, 'par_log_file': savedir + 'fit_pars.log', 'overwrite_prev_run': True, # 'nthreads':18, 'nthreads': 3, } ### INITIALISE AND RUN A NAIVE FIT ### naivefit = NaiveFit(fit_pars=fit_pars) result, score = naivefit.run_fit() best_comps = result['comps'] memb_probs = result['memb_probs'] # Check membership has ncomps + 1 (bg) columns n_fitted_comps = memb_probs.shape[-1] - 1 assert ncomps == n_fitted_comps ### CHECK RESULT ### # No guarantee of order, so check if result is permutated # also we drop the bg memberships for permutation reasons perm = expectmax.get_best_permutation(memb_probs[:nstars, :ncomps], true_memb_probs) memb_probs = memb_probs[:nstars] logging.info('Best permutation is: {}'.format(perm)) n_misclassified_stars = np.sum( np.abs(true_memb_probs - np.round(memb_probs[:, perm]))) # Check fewer than 15% of association stars are misclassified try: assert n_misclassified_stars / nstars * 100 < 15 except AssertionError: import pdb pdb.set_trace() for origin, best_comp in zip(origins, np.array(best_comps)[perm, ]): assert (isinstance(origin, SphereComponent) and isinstance(best_comp, SphereComponent)) o_pars = origin.get_pars() b_pars = best_comp.get_pars() logging.info("origin pars: {}".format(o_pars)) logging.info("best fit pars: {}".format(b_pars)) assert np.allclose(origin.get_mean(), best_comp.get_mean(), atol=5.) assert np.allclose(origin.get_sphere_dx(), best_comp.get_sphere_dx(), atol=2.5) assert np.allclose(origin.get_sphere_dv(), best_comp.get_sphere_dv(), atol=2.5) assert np.allclose(origin.get_age(), best_comp.get_age(), atol=1.)
def test_fit_one_comp_with_background(): """ Synthesise a file with negligible error, retrieve initial parameters Takes a while... maybe this belongs in integration unit_tests """ run_name = 'background' savedir = 'temp_data/{}_expectmax_{}/'.format(PY_VERS, run_name) mkpath(savedir) data_filename = savedir + '{}_expectmax_{}_data.fits'.format(PY_VERS, run_name) # log_filename = 'temp_data/{}_expectmax_{}/log.log'.format(PY_VERS, # run_name) logging.basicConfig(level=logging.INFO, filemode='w', filename=log_filename) uniform_age = 1e-10 sphere_comp_pars = np.array([ # X, Y, Z, U, V, W, dX, dV, age, [ 0, 0, 0, 0, 0, 0, 10., 5, uniform_age], ]) starcount = 100 background_density = 1e-9 ncomps = sphere_comp_pars.shape[0] # true_memb_probs = np.zeros((starcount, ncomps)) # true_memb_probs[:,0] = 1. synth_data = SynthData(pars=sphere_comp_pars, starcounts=[starcount], Components=SphereComponent, background_density=background_density, ) synth_data.synthesise_everything() tabletool.convert_table_astro2cart(synth_data.table, write_table=True, filename=data_filename) background_count = len(synth_data.table) - starcount # insert background densities synth_data.table['background_log_overlap'] =\ len(synth_data.table) * [np.log(background_density)] origins = [SphereComponent(pars) for pars in sphere_comp_pars] best_comps, med_and_spans, memb_probs = \ expectmax.fit_many_comps(data=synth_data.table, ncomps=ncomps, rdir=savedir, trace_orbit_func=dummy_trace_orbit_func, use_background=True) return best_comps, med_and_spans, memb_probs # Check parameters are close assert np.allclose(sphere_comp_pars, best_comps[0].get_pars(), atol=1.) # Check most assoc members are correctly classified recovery_count_threshold = 0.95 * starcounts[0] recovery_count_actual = np.sum(np.round(memb_probs[:starcount,0])) assert recovery_count_threshold < recovery_count_actual # Check most background stars are correctly classified contamination_count_threshold = 0.05 * len(memb_probs[100:]) contamination_count_actual = np.sum(np.round(memb_probs[starcount:,0])) assert contamination_count_threshold < contamination_count_actual # Check reported membership probabilities are consistent with recovery # rate (within 5%) mean_membership_confidence = np.mean(memb_probs[:starcount,0]) assert np.isclose(recovery_count_actual/100., mean_membership_confidence, atol=0.05)