def test_convertAstrTableToCart(): """ Using a historical table, confirm that cartesian conversion yields same results by comparing the cartesian means and covariance matrices are identical. Gets historical cartesian data from building data from table cart cols. Gets updated cartesian data from building astro data from table cols, converting to cartesian (stored back into table) then building data from newly inserted table cart cols. """ hist_filename = '../data/paper1/historical_beta_Pictoris_with_gaia_small_everything_final.fits' hist_table = Table.read(hist_filename) curr_filename = '../data/paper1/beta_Pictoris_with_gaia_small_everything_final.fits' curr_table = Table.read(curr_filename) # Drop stars that have gone through any binary checking hist_table = Table(hist_table[100:300]) curr_table = Table(curr_table[100:300]) # load in original means and covs orig_cart_data =\ tabletool.build_data_dict_from_table(table=hist_table, cartesian=True, historical=True) tabletool.convert_table_astro2cart(table=curr_table, write_table=False) cart_data = tabletool.build_data_dict_from_table(curr_table, cartesian=True) assert np.allclose(orig_cart_data['means'], cart_data['means']) assert np.allclose(hist_table['dX'], curr_table['X_error']) assert np.allclose(orig_cart_data['covs'], cart_data['covs'])
def test_convertAstrTableToCart(): """ Using a historical table, confirm that cartesian conversion yields same results by comparing the cartesian means and covariance matrices are identical. Gets historical cartesian data from building data from table cart cols. Gets updated cartesian data from building astro data from table cols, converting to cartesian (stored back into table) then building data from newly inserted table cart cols. """ # hist_filename = '../data/paper1/historical_beta_Pictoris_with_gaia_small_everything_final.fits' hist_table = Table.read(HIST_FILE_NAME) # curr_filename = '../data/paper1/beta_Pictoris_with_gaia_small_everything_final.fits' curr_table = Table.read(CURR_FILE_NAME) # Drop stars that have gone through any binary checking # hist_table = Table(hist_table[100:300]) # curr_table = Table(curr_table[100:300]) # load in original means and covs orig_cart_data =\ tabletool.build_data_dict_from_table(table=hist_table, cartesian=True, historical=True) tabletool.convert_table_astro2cart(table=curr_table, write_table=False) cart_data = tabletool.build_data_dict_from_table(curr_table, cartesian=True) assert np.allclose(orig_cart_data['means'], cart_data['means']) assert np.allclose(hist_table['dX'], curr_table['X_error']) assert np.allclose(orig_cart_data['covs'], cart_data['covs'])
def plot_comps_and_stars( dim1, dim2, star_pars, comps, Component=SphereComponent, star_orbits=False, star_age=0., star_then=False, comp_kwargs={}, ): if type(star_pars) is str: tabletool.build_data_dict_from_table(star_pars) if type(comps) is str: comps = Component.load_raw_components(comps)
def test_expectation(): """ Super basic, generates some association stars along with some background stars and checks membership allocation is correct """ age = 1e-5 ass_pars1 = np.array([0, 0, 0, 0, 0, 0, 5., 2., age]) comp1 = SphereComponent(ass_pars1) ass_pars2 = np.array([100., 0, 0, 20, 0, 0, 5., 2., age]) comp2 = SphereComponent(ass_pars2) starcounts = [100,100] synth_data = SynthData(pars=[ass_pars1, ass_pars2], starcounts=starcounts) synth_data.synthesise_everything() tabletool.convert_table_astro2cart(synth_data.table) true_memb_probs = np.zeros((np.sum(starcounts), 2)) true_memb_probs[:starcounts[0], 0] = 1. true_memb_probs[starcounts[0]:, 1] = 1. # star_means, star_covs = tabletool.buildDataFromTable(synth_data.astr_table) # all_lnols = em.getAllLnOverlaps( # synth_data.astr_table, [comp1, comp2] # ) fitted_memb_probs = em.expectation( tabletool.build_data_dict_from_table(synth_data.table), [comp1, comp2] ) assert np.allclose(true_memb_probs, fitted_memb_probs, atol=1e-10)
def get_region(assoc_name, gagne_reference_data = None): if gagne_reference_data is None: gagne_reference_data =\ '../data/gagne_bonafide_full_kinematics_with_lit_and_best_radial_velocity' \ '_comb_binars_with_banyan_radec.fits' gagne_table = tabletool.read(gagne_reference_data) if assoc_name not in set(gagne_table['Moving group']): raise UserWarning,\ 'Association name must be one of:\n{}\nReceived: "{}"'.format( list(set(gagne_table['Moving group'])), assoc_name ) # Dummy comment # Extract all stars subtable = gagne_table[np.where(gagne_table['Moving group'] == assoc_name)] star_means = tabletool.build_data_dict_from_table(subtable, only_means=True) data_upper_bound = np.nanmax(star_means, axis=0) data_lower_bound = np.nanmin(star_means, axis=0) data_span = data_upper_bound - data_lower_bound data_centre = 0.5 * (data_upper_bound + data_lower_bound) # Set up boundaries of box that span double the association box_lower_bound = data_centre - data_span box_upper_bound = data_centre + data_span return box_lower_bound, box_upper_bound
def test_background_component(): """Create artificial association composed of two stars at opposite vertices of unit 6D rectangle. Then base background density distribution on that.""" background_density = 100 # Since the background double the span of data, by setting the means as # follows, the backbround should extend from 0 to 1 in each dimension, # which greatly simplifies reasoning about densities and starcounts. upper_mean = np.zeros(6) + 0.75 lower_mean = np.zeros(6) + 0.25 narrow_dx = 1e-10 narrow_dv = 1e-10 tiny_age = 1e-10 upper_pars = np.hstack((upper_mean, narrow_dx, narrow_dv, tiny_age)) lower_pars = np.hstack((lower_mean, narrow_dx, narrow_dv, tiny_age)) starcounts = [1,1] synth_data = SynthData(pars=[upper_pars, lower_pars], starcounts=starcounts, background_density=background_density) synth_data.generate_all_init_cartesian() means = tabletool.build_data_dict_from_table( synth_data.table[2:], main_colnames=[el+'0' for el in 'xyzuvw'], only_means=True, ) assert np.allclose(0.5, np.mean(means, axis=0), atol=0.1) assert np.allclose(1.0, np.max(means, axis=0), atol=0.1) assert np.allclose(0.0, np.min(means, axis=0), atol=0.1) assert len(synth_data.table) == background_density + 2
def test_multiple_synth_components(): """Check initialising with multiple components works""" age = 1e-10 dx = 5. dv = 2. ass_pars1 = np.array([10, 20, 30, 40, 50, 60, dx, dv, age]) comp1 = SphereComponent(ass_pars1) ass_pars2 = np.array([0., 0., 0, 0, 0, 0, dx, dv, age]) comp2 = SphereComponent(ass_pars2) starcounts = [100, 100] try: synth_data = SynthData(pars=[ass_pars1, ass_pars2], starcounts=starcounts[0], Components=SphereComponent) raise UserWarning('AssertionError should have been thrown by synthdata') except AssertionError: pass synth_data = SynthData(pars=[ass_pars1, ass_pars2], starcounts=starcounts, Components=SphereComponent) synth_data.synthesise_everything() assert len(synth_data.table) == np.sum(starcounts) means = tabletool.build_data_dict_from_table( synth_data.table, main_colnames=[el+'0' for el in 'xyzuvw'], only_means=True ) assert np.allclose(comp2.get_mean(), means[starcounts[0]:].mean(axis=0), atol=2.) assert np.allclose(comp1.get_mean(), means[:starcounts[0]].mean(axis=0), atol=2.)
def test_swigImplementation(): """ Compares the swigged c implementation against the python one in likelihood.py """ true_comp_mean = np.zeros(6) true_comp_dx = 2. true_comp_dv = 2. true_comp_covmatrix = np.identity(6) true_comp_covmatrix[:3, :3] *= true_comp_dx**2 true_comp_covmatrix[3:, 3:] *= true_comp_dv**2 true_comp_age = 1e-10 true_comp = SphereComponent( attributes={ 'mean': true_comp_mean, 'covmatrix': true_comp_covmatrix, 'age': true_comp_age, }) nstars = 100 synth_data = SynthData(pars=true_comp.get_pars(), starcounts=nstars) synth_data.synthesise_everything() tabletool.convert_table_astro2cart(synth_data.table) star_data = tabletool.build_data_dict_from_table(synth_data.table) p_lnos = p_lno(true_comp.get_covmatrix(), true_comp.get_mean(), star_data['covs'], star_data['means']) c_lnos = c_lno(true_comp.get_covmatrix(), true_comp.get_mean(), star_data['covs'], star_data['means'], nstars) assert np.allclose(p_lnos, c_lnos) assert np.isfinite(p_lnos).all() assert np.isfinite(c_lnos).all()
def test_swigImplementation(): """ Compares the swigged c implementation against the python one in likelihood.py """ true_comp_mean = np.zeros(6) true_comp_dx = 2. true_comp_dv = 2. true_comp_covmatrix = np.identity(6) true_comp_covmatrix[:3,:3] *= true_comp_dx**2 true_comp_covmatrix[3:,3:] *= true_comp_dv**2 true_comp_age = 1e-10 true_comp = SphereComponent(attributes={ 'mean':true_comp_mean, 'covmatrix':true_comp_covmatrix, 'age':true_comp_age, }) nstars = 100 synth_data = SynthData(pars=true_comp.get_pars(), starcounts=nstars) synth_data.synthesise_everything() tabletool.convert_table_astro2cart(synth_data.table) star_data = tabletool.build_data_dict_from_table(synth_data.table) p_lnos = p_lno(true_comp.get_covmatrix(), true_comp.get_mean(), star_data['covs'], star_data['means']) c_lnos = c_lno(true_comp.get_covmatrix(), true_comp.get_mean(), star_data['covs'], star_data['means'], nstars) assert np.allclose(p_lnos, c_lnos) assert np.isfinite(p_lnos).all() assert np.isfinite(c_lnos).all()
def test_pythonFuncs(): """ TODO: remove the requirements of file, have data stored in file? """ true_comp_mean = np.zeros(6) true_comp_dx = 2. true_comp_dv = 2. true_comp_covmatrix = np.identity(6) true_comp_covmatrix[:3, :3] *= true_comp_dx ** 2 true_comp_covmatrix[3:, 3:] *= true_comp_dv ** 2 true_comp_age = 1e-10 true_comp = SphereComponent(attributes={ 'mean': true_comp_mean, 'covmatrix': true_comp_covmatrix, 'age': true_comp_age, }) nstars = 100 synth_data = SynthData(pars=true_comp.get_pars(), starcounts=nstars) synth_data.synthesise_everything() tabletool.convert_table_astro2cart(synth_data.table) star_data = tabletool.build_data_dict_from_table(synth_data.table) # star_data['means'] = star_data['means'] # star_data['covs'] = star_data['covs'] group_mean = true_comp.get_mean() group_cov = true_comp.get_covmatrix() # Test overlap with true component co1s = [] co2s = [] for i, (scov, smn) in enumerate(zip(star_data['covs'], star_data['means'])): co1s.append(co1(group_cov, group_mean, scov, smn)) co2s.append(co2(group_cov, group_mean, scov, smn)) co1s = np.array(co1s) co2s = np.array(co2s) co3s = np.exp(p_lno(group_cov, group_mean, star_data['covs'], star_data['means'])) assert np.allclose(co1s, co2s) assert np.allclose(co2s, co3s) assert np.allclose(co1s, co3s) # Test overlap with neighbouring star (with the aim of testing # tiny overlap values). Note that most overlaps go to 0, but the # log overlaps retain the information co1s = [] co2s = [] for i, (scov, smn) in enumerate(zip(star_data['covs'], star_data['means'])): co1s.append(co1(star_data['covs'][15], star_data['means'][15], scov, smn)) co2s.append(co2(star_data['covs'][15], star_data['means'][15], scov, smn)) co1s = np.array(co1s) co2s = np.array(co2s) lnos = p_lno(star_data['covs'][15], star_data['means'][15], star_data['covs'], star_data['means']) co3s = np.exp(lnos) assert np.allclose(co1s, co2s) assert np.allclose(co2s, co3s) assert np.allclose(co1s, co3s)
def approxCurrentDayDistribution(self): means = tabletool.build_data_dict_from_table(self.data, cartesian=True, only_means=True) mean_of_means = np.average(means, axis=0, weights=self.membership_probs) cov_of_means = np.cov(means.T, ddof=0., aweights=self.membership_probs) return mean_of_means, cov_of_means
def test_get_lnoverlaps(): """ Confirms that star-component overlaps get smaller as stars get further away. First generates a component `sphere_comp`. Then generates three stars. The first one is identical to `sphere_comp` in mean and covmatrix. The other two share the same covmatrix yet are separated in X. We check that the overlap integral is smaller for the more separated stars. """ dim = 6 mean = np.zeros(dim) covmatrix = np.identity(dim) age = 1e-10 sphere_comp = SphereComponent(attributes={ 'mean': mean, 'covmatrix': covmatrix, 'age': age, }) dx_offsets = [0., 1., 10.] star_comps = [] for dx_offset in dx_offsets: star = SphereComponent( attributes={ 'mean': sphere_comp.get_mean() + np.array([dx_offset, 0., 0., 0., 0., 0.]), 'covmatrix': sphere_comp.get_covmatrix(), 'age': sphere_comp.get_age(), }) star_comps.append(star) nstars = len(star_comps) dummy_table = Table(data=np.arange(nstars).reshape(nstars, 1), names=['name']) tabletool.append_cart_cols_to_table(dummy_table) for star_comp, row in zip(star_comps, dummy_table): tabletool.insert_data_into_row( row, star_comp.get_mean(), star_comp.get_covmatrix(), cartesian=True, ) dummy_data = tabletool.build_data_dict_from_table(dummy_table) ln_overlaps = likelihood.get_lnoverlaps(sphere_comp, data=dummy_data) # Checks that ln_overlaps is descending assert np.allclose(ln_overlaps, sorted(ln_overlaps)[::-1])
def test_lnprob_func(): """ Generates two components. Generates a synthetic data set based on the first component. Confrims that the lnprob is larger for the first component than the second. """ measurement_error = 1e-10 star_count = 500 tiny_age = 1e-10 dim = 6 comp_covmatrix = np.identity(dim) comp_means = { 'comp1': np.zeros(dim), 'comp2': 10 * np.ones(dim) } comps = {} data = {} for comp_name in comp_means.keys(): comp = SphereComponent(attributes={ 'mean':comp_means[comp_name], 'covmatrix':comp_covmatrix, 'age':tiny_age }) synth_data = SynthData(pars=[comp.get_pars()], starcounts=star_count, measurement_error=measurement_error) synth_data.synthesise_everything() tabletool.convert_table_astro2cart(synth_data.table) data[comp_name] = tabletool.build_data_dict_from_table(synth_data.table) comps[comp_name] = comp lnprob_comp1_data1 = likelihood.lnprob_func(pars=comps['comp1'].get_pars(), data=data['comp1']) lnprob_comp2_data1 = likelihood.lnprob_func(pars=comps['comp2'].get_pars(), data=data['comp1']) lnprob_comp1_data2 = likelihood.lnprob_func(pars=comps['comp1'].get_pars(), data=data['comp2']) lnprob_comp2_data2 = likelihood.lnprob_func(pars=comps['comp2'].get_pars(), data=data['comp2']) print(lnprob_comp1_data1) print(lnprob_comp2_data1) print(lnprob_comp1_data2) print(lnprob_comp2_data2) assert lnprob_comp1_data1 > lnprob_comp2_data1 assert lnprob_comp2_data2 > lnprob_comp1_data2 # Check that the different realisations only differ by 20% assert np.isclose(lnprob_comp1_data1, lnprob_comp2_data2, rtol=2e-1) assert np.isclose(lnprob_comp1_data2, lnprob_comp2_data1, rtol=2e-1)
def test_get_lnoverlaps(): """ Confirms that star-component overlaps get smaller as stars get further away. First generates a component `sphere_comp`. Then generates three stars. The first one is identical to `sphere_comp` in mean and covmatrix. The other two share the same covmatrix yet are separated in X. We check that the overlap integral is smaller for the more separated stars. """ dim = 6 mean = np.zeros(dim) covmatrix = np.identity(dim) age = 1e-10 sphere_comp = SphereComponent(attributes={ 'mean':mean, 'covmatrix':covmatrix, 'age':age, }) dx_offsets = [0., 1., 10.] star_comps = [] for dx_offset in dx_offsets: star = SphereComponent(attributes={ 'mean':sphere_comp.get_mean()+np.array([dx_offset,0.,0.,0.,0.,0.]), 'covmatrix':sphere_comp.get_covmatrix(), 'age':sphere_comp.get_age(), }) star_comps.append(star) nstars = len(star_comps) dummy_table = Table(data=np.arange(nstars).reshape(nstars,1), names=['name']) tabletool.append_cart_cols_to_table(dummy_table) for star_comp, row in zip(star_comps, dummy_table): tabletool.insert_data_into_row(row, star_comp.get_mean(), star_comp.get_covmatrix(), cartesian=True, ) dummy_data = tabletool.build_data_dict_from_table(dummy_table) ln_overlaps = likelihood.get_lnoverlaps(sphere_comp, data=dummy_data) # Checks that ln_overlaps is descending assert np.allclose(ln_overlaps, sorted(ln_overlaps)[::-1])
def test_convertTableXYZUVWToArray(): """ Check that generating cartesian means and covariance matrices matches previous implementation """ orig_star_pars = loadDictFromTable(HIST_FILE_NAME) main_colnames, error_colnames, corr_colnames =\ tabletool.get_historical_cart_colnames() data = tabletool.build_data_dict_from_table( orig_star_pars['table'][orig_star_pars['indices']], main_colnames=main_colnames, error_colnames=error_colnames, corr_colnames=corr_colnames) assert np.allclose(orig_star_pars['xyzuvw'], data['means']) assert np.allclose(orig_star_pars['xyzuvw_cov'], data['covs'])
def test_lnprob_func(): """ Generates two components. Generates a synthetic data set based on the first component. Confrims that the lnprob is larger for the first component than the second. """ measurement_error = 1e-10 star_count = 500 tiny_age = 1e-10 dim = 6 comp_covmatrix = np.identity(dim) comp_means = { 'comp1': np.zeros(dim), 'comp2': 10 * np.ones(dim) } comps = {} data = {} for comp_name in comp_means.keys(): comp = SphereComponent(attributes={ 'mean':comp_means[comp_name], 'covmatrix':comp_covmatrix, 'age':tiny_age }) synth_data = SynthData(pars=[comp.get_pars()], starcounts=star_count, measurement_error=measurement_error) synth_data.synthesise_everything() tabletool.convert_table_astro2cart(synth_data.table) data[comp_name] = tabletool.build_data_dict_from_table(synth_data.table) comps[comp_name] = comp lnprob_comp1_data1 = likelihood.lnprob_func(pars=comps['comp1'].get_pars(), data=data['comp1']) lnprob_comp2_data1 = likelihood.lnprob_func(pars=comps['comp2'].get_pars(), data=data['comp1']) lnprob_comp1_data2 = likelihood.lnprob_func(pars=comps['comp1'].get_pars(), data=data['comp2']) lnprob_comp2_data2 = likelihood.lnprob_func(pars=comps['comp2'].get_pars(), data=data['comp2']) assert lnprob_comp1_data1 > lnprob_comp2_data1 assert lnprob_comp2_data2 > lnprob_comp1_data2 # Check that the different realisations only differ by 10% assert np.isclose(lnprob_comp1_data1, lnprob_comp2_data2, rtol=1e-1) assert np.isclose(lnprob_comp1_data2, lnprob_comp2_data1, rtol=1e-1)
def getZfromOrigins(origins, star_pars): if type(origins) is str: origins = SphereComponent.load_components(origins) if type(star_pars) is str: star_pars = tt.build_data_dict_from_table(star_pars) nstars = star_pars['means'].shape[0] ngroups = len(origins) nassoc_stars = np.sum([o.nstars for o in origins]) using_bg = nstars != nassoc_stars z = np.zeros((nstars, ngroups + using_bg)) stars_so_far = 0 # set associaiton members memberships to 1 for i, o in enumerate(origins): z[stars_so_far:stars_so_far + o.nstars, i] = 1. stars_so_far += o.nstars # set remaining stars as members of background if using_bg: z[stars_so_far:, -1] = 1. return z
def test_convertTableXYZUVWToArray(): """ Check that generating cartesian means and covariance matrices matches previous implementation """ filename_historical = '../data/paper1/' \ 'historical_beta_Pictoris_with_gaia_small_everything_final.fits' orig_star_pars = loadDictFromTable(filename_historical) main_colnames, error_colnames, corr_colnames =\ tabletool.get_historical_cart_colnames() data = tabletool.build_data_dict_from_table( orig_star_pars['table'][orig_star_pars['indices']], main_colnames=main_colnames, error_colnames=error_colnames, corr_colnames=corr_colnames) assert np.allclose(orig_star_pars['xyzuvw'], data['means']) assert np.allclose(orig_star_pars['xyzuvw_cov'], data['covs'])
def test_convertTableXYZUVWToArray(): """ Check that generating cartesian means and covariance matrices matches previous implementation """ filename_historical = '../data/paper1/' \ 'historical_beta_Pictoris_with_gaia_small_everything_final.fits' orig_star_pars = loadDictFromTable(filename_historical) main_colnames, error_colnames, corr_colnames =\ tabletool.get_historical_cart_colnames() data = tabletool.build_data_dict_from_table( orig_star_pars['table'][orig_star_pars['indices']], main_colnames=main_colnames, error_colnames=error_colnames, corr_colnames=corr_colnames ) assert np.allclose(orig_star_pars['xyzuvw'], data['means']) assert np.allclose(orig_star_pars['xyzuvw_cov'], data['covs'])
def test_build_data_from_incomplete_table(): """ Sometimes rows will be missing data, e.g. from when binaries have been merged. build_data_from_dict should detect the presence of nans and skip them """ # build a dummy table of data NSTARS = 10 NDIM = 6 missing_row_ix = (np.array([0, 3, 4]), ) means = np.random.rand(NSTARS, NDIM) covs = np.array(NSTARS * [np.eye(NDIM, NDIM)]) nan_mask = np.array(NSTARS * [False]) # check bad data are within index range assert np.all(missing_row_ix[0] < NSTARS) nan_mask[missing_row_ix] = True covs[nan_mask] = np.nan names = np.arange(NSTARS) dummy_table = Table() dummy_table['names'] = names tabletool.append_cart_cols_to_table(dummy_table) for row, mean, cov in zip(dummy_table, means, covs): tabletool.insert_data_into_row(row, mean, cov) star_pars = tabletool.build_data_dict_from_table(dummy_table) assert not np.any(np.isnan(star_pars['means'])) assert not np.any(np.isnan(star_pars['covs'])) # check the correct number of rows have been returned assert len(star_pars['means']) == np.sum(np.logical_not(nan_mask)) assert len(star_pars['covs']) == np.sum(np.logical_not(nan_mask))
def approxCurrentDayDistribution(self): means = tabletool.build_data_dict_from_table(self.data, cartesian=True, only_means=True) mean_of_means = np.average(means, axis=0, weights=self.membership_probs) cov_of_means = np.cov(means.T, ddof=0., aweights=self.membership_probs) return mean_of_means, cov_of_means
# This table is masked. Unmask: data_table = data_table.filled() print('DATA READ', len(data_table)) historical = 'c_XU' in data_table.colnames ############################################################################ ############ COMPONENT OVERLAPS ############################################ ############################################################################ print('Create data dict') # Create data dict data_dict = tabletool.build_data_dict_from_table( data_table, get_background_overlaps=True, historical=historical, ) # Create components comps = SphereComponent.load_raw_components(comps_filename) # COMPONENT OVERLAPS overlaps = expectmax.get_all_lnoverlaps(data_dict, comps) print('overlaps.shape', overlaps.shape, len(comps)) # MEMBERSHIP PROBABILITIES membership_probabilities = np.array( [expectmax.calc_membership_probs(ol) for ol in overlaps]) # Create a table
def get_region(assoc_name, pos_margin=30., vel_margin=5., scale_margin=None, gagne_reference_data=None): """ Get a 6D box surrounding a known association with members from BANYAN Parameters ---------- assoc_name: str Name of the association as listed in BANYAN table. One of: {'118 Tau', '32 Orionis', 'AB Doradus', 'Carina', 'Carina-Near', 'Columba', 'Coma Ber', 'Corona Australis', 'Hyades', 'IC 2391', 'IC 2602', 'Lower Centaurus-Crux', 'Octans', 'Platais 8', 'Pleiades', 'TW Hya', 'Taurus', 'Tucana-Horologium', 'Upper Centaurus Lupus', 'Upper CrA', 'Upper Scorpius', 'Ursa Major', 'beta Pictoris', 'chi{ 1 For (Alessi 13)', 'epsilon Cha', 'eta Cha', 'rho Ophiuci'} pos_margin: float {30.} Margin in position space around known members from which new candidate members are included vel_margin: float {5.} Margin in velocity space around known members from which new candidate members are included gagne_reference_data: str filename to BANYAN table Returns ------- box_lower_bounds: [6] float array The lower bounds of the 6D box [X,Y,Z,U,V,W] box_upper_bounds: [6] float array The upper bounds of the 6D box [X,Y,Z,U,V,W] """ if gagne_reference_data is None: gagne_reference_data =\ '../data/gagne_bonafide_full_kinematics_with_lit_and_best_radial_velocity' \ '_comb_binars_with_banyan_radec.fits' gagne_table = tabletool.read(gagne_reference_data) if assoc_name not in set(gagne_table['Moving group']): raise UserWarning( 'Association name must be one of:\n{}\nReceived: "{}"'.format( list(set(gagne_table['Moving group'])), assoc_name)) # Extract all stars subtable = gagne_table[np.where(gagne_table['Moving group'] == assoc_name)] logging.info('Initial membership list has {} members'.format( len(subtable))) star_means = tabletool.build_data_dict_from_table(subtable, only_means=True) data_upper_bound = np.nanmax(star_means, axis=0) data_lower_bound = np.nanmin(star_means, axis=0) logging.info('Stars span from {} to {}'.format(np.round(data_lower_bound), np.round(data_upper_bound))) # First try and scale box margins on. # scale_margin of 1 would double total span (1 + 1) if scale_margin is not None: data_span = data_upper_bound - data_lower_bound box_margin = 0.5 * scale_margin * data_span # Set up boundaries of box that span double the association box_lower_bound = data_lower_bound - box_margin box_upper_bound = data_upper_bound + box_margin # Set margin based on provided (or default) constant amounts else: data_margin = np.array(3 * [pos_margin] + 3 * [vel_margin]) box_lower_bound = data_lower_bound - data_margin box_upper_bound = data_upper_bound + data_margin logging.info('Range extended.\nLower: {}\nUpper: {}'.format( np.round(box_lower_bound), np.round(box_upper_bound))) return box_lower_bound, box_upper_bound
print('DATA_TABLE READ', len(data_table)) # Compute overlaps only for the part of the data (chunk) # Every 100k stars take about 2 days, so I only want about that many stars in each run, in case something # goes wrong N = 10 # that many chunks NI = int(sys.argv[1] ) # take this chunk #TODO: update this number for every run! print('NI=%d' % NI) # TAKE ONLY the i-th part of the data indices_chunks = np.array_split(range(len(data_table)), N) data_table = data_table[indices_chunks[NI]] data_dict = tabletool.build_data_dict_from_table( data_table, get_background_overlaps=False, # bg overlap not available yet historical=historical, ) star_means = data_dict['means'] star_covs = data_dict['covs'] # PREPARE BACKGROUND DATA print('Read background Gaia data') background_means = tabletool.build_data_dict_from_table( '/home/tcrun/chronostar/data/gaia_cartesian_full_6d_table.fits', only_means=True, ) # Inverting the vertical values star_means = np.copy(star_means) star_means[:, 2] *= -1
col_name=bg_lnol_colname) if config.config['overwrite_datafile']: data_table.write(datafile, overwrite=True) elif config.config['data_savefile'] != '': data_table.write(config.config['data_savefile'], overwrite=True) # LOAD DATA DICT FROM ORIGINAL DATA # LOAD DATA DICT FROM MANIPULATED DATA # THEN CALCULATE MEMB PROBS BOTH TIMES # Convert data table into numpy arrays of mean and covariance matrices log_message('Building data dictionary') data_dict = tabletool.build_data_dict_from_table( data_table, get_background_overlaps=config.config['include_background_distribution'], historical=historical, ) # Save data_dict of the original data # np.save('data_dict_original.npy', data_dict) # MEMB PROBS for data WITH RV available data_dict_original = np.load('data_dict_original.npy') memb_probs_with_rv = expectmax.expectation(data=data_dict_original, comps=bp_comp_with_rv) print(memb_probs_with_rv) # Some values are nan. Mask out all stars that have any of values in their covariance matrices # equal to nan. mask = [~np.any(np.isnan(x)) for x in data_dict['covs']]
# Set up trace_orbit_func. Maybe move this into compfitter. if global_pars['trace_orbit_func'] == 'dummy_trace_orbit_func': global_pars['trace_orbit_func'] = traceorbit.dummy_trace_orbit_func elif global_pars['trace_orbit_func'] == 'epicyclic': log_message('trace_orbit: epicyclic') global_pars['trace_orbit_func'] = traceorbit.trace_epicyclic_orbit else: global_pars['trace_orbit_func'] = traceorbit.trace_cartesian_orbit ################################## ### READ DATA #################### ################################## # Stellar data #~ data_dict = tabletool.build_data_dict_from_table(global_pars['data_table'], mask_good=mask_good) data_dict = tabletool.build_data_dict_from_table( global_pars['data_table'], get_background_overlaps=global_pars['use_background']) #~ print('ONECOME', len(data_dict['means']), global_pars['data_table']) # Membership: memb_probs is what we get from the expectation step if os.path.exists(local_pars['filename_membership']): memb_probs = np.load(local_pars['filename_membership']) else: # This is first run and we have to start somewhere nstars = data_dict['means'].shape[0] init_memb_probs = np.ones((nstars, ncomps)) / ncomps print('MEMB PROBS INIT EQUAL') # Add background if global_pars['use_background']: memb_probs = np.hstack((init_memb_probs, np.zeros((nstars, 1))))
historical = 'c_XU' in data_table.colnames # If data cuts provided, then apply them if config.config['banyan_assoc_name'] != '': bounds = get_region(config.config['banyan_assoc_name']) elif config.data_bound is not None: bounds = (config.data_bound['lower_bound'], config.data_bound['upper_bound']) else: bounds = None if bounds is not None: log_message('Applying data cuts') star_means = tabletool.build_data_dict_from_table( datafile, main_colnames=config.cart_colnames.get('main_colnames', None), only_means=True, historical=historical, ) data_mask = np.where( np.all(star_means < bounds[1], axis=1) & np.all(star_means > bounds[0], axis=1)) data_table = data_table[data_mask] log_message('Data table has {} rows'.format(len(data_table))) # By the end of this, data will be a astropy table # with cartesian data written in # columns in default way. if config.config['convert_to_cartesian']: # Performs conversion in place (in memory) on `data_table` if (not 'c_XU' in data_table.colnames and
def run_fit_helper(true_comp, starcounts, measurement_error, burnin_step=None, run_name='default', trace_orbit_func=None, Component=EllipComponent, init_pars=None): py_vers = sys.version[0] save_dir = 'temp_data/%s_compfitter_%s/' % (py_vers, run_name) data_filename = save_dir + 'synth_data.fits'.format(py_vers, run_name) plot_dir = save_dir print("---------", save_dir) if not os.path.isdir(save_dir): os.mkdir(save_dir) log_filename = save_dir + 'log.log'.format(py_vers, run_name) logging.basicConfig(level=logging.INFO, filename=log_filename, filemode='w') synth_data = SynthData(pars=true_comp.get_pars(), starcounts=starcounts, measurement_error=measurement_error, Components=Component) synth_data.synthesise_everything() tabletool.convert_table_astro2cart(synth_data.table, write_table=True, filename=data_filename) print("newPars ------------------------------ \n", init_pars) if init_pars is None: internal_pars = None else: internal_pars = Component.internalise(init_pars) res = cf.fit_comp(data=synth_data.table, plot_it=True, burnin_steps=burnin_step, store_burnin_chains=True, plot_dir=plot_dir, save_dir=save_dir, trace_orbit_func=trace_orbit_func, optimisation_method='emcee', Component=Component, init_pars=internal_pars) comps_filename = save_dir + 'true_and_best_comp.py' best_comp = res[0] EllipComponent.store_raw_components(comps_filename, [true_comp, best_comp]) star_pars = tabletool.build_data_dict_from_table(synth_data.table) plot_results(true_comp, best_fit_comp=res[0], star_pars=star_pars, plt_dir=save_dir) return res
from astropy.table import Table import numpy as np import sys sys.path.insert(0, '..') from chronostar import tabletool orig_table_path = '../data/paper1/beta_Pictoris_with_gaia_small_everything_final.fits' orig_table = Table.read(orig_table_path) res_dir = '../results/beta_Pictoris_with_gaia_small_inv2/6/E/final/' final_memb = np.load(res_dir + 'final_membership.npy') recons_star_pars, table_ixs =\ tabletool.build_data_dict_from_table(orig_table, return_table_ixs=True) # -------------------------------------------------- # -- Insert/replace membership probabilities ------ # -------------------------------------------------- # need to add new column for comp_F # simpler just to remove all membership probability columns and append # to end, this keeps them together without reshuffling of columns existing_colnames = ['comp_' + char for char in 'ABCDE'] + ['comp_background'] print(existing_colnames) for colname in existing_colnames: del orig_table[colname] new_colnames = ['comp_' + char for char in 'ABCDEF'] + ['comp_background']
if __name__ == '__main__': logging.basicConfig(level=logging.INFO, filename='temp_logs/groupfitter.log') save_dir = 'temp_data/' group_savefile = save_dir + 'origins_stat.npy' xyzuvw_init_savefile = save_dir + 'xyzuvw_init_stat.npy' astro_savefile = save_dir + 'astro_table_stat.txt' xyzuvw_conv_savefile = save_dir + 'xyzuvw_conv_stat.fits' pars = np.array([0., 0., 0., 0., 0., 0., 5., 2., 1e-8]) starcount = 100 error_frac = 1. synth_data = SynthData(pars=pars, starcounts=starcount) synth_data.synthesise_everything() tabletool.convert_table_astro2cart(synth_data.table) data = tabletool.build_data_dict_from_table(synth_data.table) stat_file = 'stat_dumps/groupfitter.stat' # best_fit, chain, lnprob = \ cProfile.run( "groupfitter.fit_comp(data=data, plot_it=True," "convergence_tol=2., burnin_steps=400, plot_dir='temp_plots/'," "save_dir='temp_data/')", stat_file, ) stat = pstats.Stats(stat_file) stat.sort_stats('cumtime') stat.print_stats(0.1)
) print("Applying tick parameters") for ax in fig.axes: ax.tick_params(direction='in', labelsize='x-large', top=True, right=True) print("... saving") plt.savefig(plot_name) if PLOT_BPMG_REAL: # PLOTTING ITERATION 6E # star_pars_file = '../../data/beta_Pictoris_with_gaia_small_xyzuvw.fits' table_file = '../../data/paper1/beta_Pictoris_corrected_everything.fits' bpmg_table = Table.read(table_file) star_pars, table_ixs = tabletool.build_data_dict_from_table( bpmg_table, return_table_ixs=True) nstars = len(table_ixs[0]) fit_name = 'bpmg_and_nearby' rdir = '../../results/beta_Pictoris_with_gaia_small_inv2/6/E/final/' memb_file = rdir + 'final_membership.npy' comp_file = rdir + 'final_comps.npy' z = np.load(memb_file) comps = SphereComponent.load_raw_components(comp_file) # Assign markers based on BANYAN membership banyan_markers = np.array(nstars * ['.']) banyan_membs = bpmg_table['banyan_assoc'][table_ixs] # Assign markers to each star (via `banyan_markers`) whilst concurrently
mpl.use('Agg') import matplotlib.pyplot as plt import numpy as np import sys sys.path.insert(0, '..') from chronostar.component import SphereComponent from chronostar import tabletool from chronostar import likelihood from chronostar import expectmax component_file = '../results/all_nonbg_scocen_comps.npy' membership_file = '../results/all_scocen_total_membership.npy' joined_table = '../data/scocen/joined_scocen_no_duplicates.fit' star_pars = tabletool.build_data_dict_from_table(joined_table, historical=True) all_comps = SphereComponent.load_raw_components(component_file) init_z = np.load(membership_file) # pop manually determined duplicates if True: all_comps.pop(9) all_comps.pop(6) init_z = init_z[(np.array([0, 1, 2, 3, 4, 5, 7, 8]), )] print(len(all_comps)) print(len(init_z)) init_z.shape = (1, -1) memberships = expectmax.expectation(star_pars, all_comps,
if __name__ == '__main__': logging.basicConfig(level=logging.INFO, filename='compfitter.log') save_dir = '' group_savefile = save_dir + 'origins_stat.npy' xyzuvw_init_savefile = save_dir + 'xyzuvw_init_stat.npy' astro_savefile = save_dir + 'astro_table_stat.txt' xyzuvw_conv_savefile = save_dir + 'xyzuvw_conv_stat.fits' pars = np.array([0., 0., 0., 0., 0., 0., 5., 2., 1e-8]) starcount = 100 error_frac = 1. synth_data = SynthData(pars=pars, starcounts=starcount) synth_data.synthesise_everything() tabletool.convert_table_astro2cart(synth_data.table) data = tabletool.build_data_dict_from_table(synth_data.table) stat_file = 'compfitter.stat' # best_fit, chain, lnprob = \ cProfile.run( "compfitter.fit_comp(data=data, plot_it=True," "convergence_tol=2., burnin_steps=400, plot_dir=''," "save_dir='')", stat_file, ) stat = pstats.Stats(stat_file) stat.sort_stats('cumtime') stat.print_stats(0.3)
config.config['banyan_assoc_name'], pos_margin=config.advanced.get('pos_margin', 30.), vel_margin=config.advanced.get('vel_margin', 5.), scale_margin=config.advanced.get('scale_margin', None), ) elif config.data_bound is not None: bounds = (config.data_bound['lower_bound'], config.data_bound['upper_bound']) else: bounds = None if bounds is not None: log_message('Applying data cuts') star_means = tabletool.build_data_dict_from_table( datafile, main_colnames=config.cart_colnames.get('main_colnames', None), only_means=True, historical=historical, ) data_mask = np.where( np.all(star_means < bounds[1], axis=1) & np.all(star_means > bounds[0], axis=1)) data_table = data_table[data_mask] log_message('Data table has {} rows'.format(len(data_table))) # By the end of this, data will be a astropy table # with cartesian data written in # columns in default way. if config.config['convert_to_cartesian']: log_message('Trying to convert to cartesian') # Performs conversion in place (in memory) on `data_table` if (not 'c_XU' in data_table.colnames
def test_pythonFuncs(): """ TODO: remove the requirements of file, have data stored in file? """ true_comp_mean = np.zeros(6) true_comp_dx = 2. true_comp_dv = 2. true_comp_covmatrix = np.identity(6) true_comp_covmatrix[:3, :3] *= true_comp_dx**2 true_comp_covmatrix[3:, 3:] *= true_comp_dv**2 true_comp_age = 1e-10 true_comp = SphereComponent( attributes={ 'mean': true_comp_mean, 'covmatrix': true_comp_covmatrix, 'age': true_comp_age, }) nstars = 100 synth_data = SynthData(pars=true_comp.get_pars(), starcounts=nstars) synth_data.synthesise_everything() tabletool.convert_table_astro2cart(synth_data.table) star_data = tabletool.build_data_dict_from_table(synth_data.table) # star_data['means'] = star_data['means'] # star_data['covs'] = star_data['covs'] group_mean = true_comp.get_mean() group_cov = true_comp.get_covmatrix() # Test overlap with true component co1s = [] co2s = [] for i, (scov, smn) in enumerate(zip(star_data['covs'], star_data['means'])): co1s.append(co1(group_cov, group_mean, scov, smn)) co2s.append(co2(group_cov, group_mean, scov, smn)) co1s = np.array(co1s) co2s = np.array(co2s) co3s = np.exp( p_lno(group_cov, group_mean, star_data['covs'], star_data['means'])) assert np.allclose(co1s, co2s) assert np.allclose(co2s, co3s) assert np.allclose(co1s, co3s) # Test overlap with neighbouring star (with the aim of testing # tiny overlap values). Note that most overlaps go to 0, but the # log overlaps retain the information co1s = [] co2s = [] for i, (scov, smn) in enumerate(zip(star_data['covs'], star_data['means'])): co1s.append( co1(star_data['covs'][15], star_data['means'][15], scov, smn)) co2s.append( co2(star_data['covs'][15], star_data['means'][15], scov, smn)) co1s = np.array(co1s) co2s = np.array(co2s) lnos = p_lno(star_data['covs'][15], star_data['means'][15], star_data['covs'], star_data['means']) co3s = np.exp(lnos) assert np.allclose(co1s, co2s) assert np.allclose(co2s, co3s) assert np.allclose(co1s, co3s)
my_synth_data = SynthData(pars=my_free_pars, starcounts=NSTARS, Components=EllipComponent) #~ my_synth_data.generate_all_init_cartesian() my_synth_data.synthesise_everything() # Don't actually need everything # mean_colnames = [el for el in 'XYZUVW'] # mean_colnames = [el+'0' for el in 'xyzuvw'] # Use this for initial star positions mean_colnames = [el + '_now' for el in 'xyzuvw' ] # Use this for current day star positions, # will need to uncomment synthesise_everything() though means = tt.build_data_dict_from_table( my_synth_data.table[:], main_colnames=mean_colnames, only_means=True, ) my_table = my_synth_data.table plt.clf() # plt.plot(means[:,0], means[:,3], '.') plt.plot(my_table['x_now'], my_table['u_now'], '.', c='r') plt.plot(my_table['x0'], my_table['u0'], '.', c='b') my_synth_data.components[0].plot('X', 'U', comp_then=True, comp_now=True, comp_orbit=True) plt.xlabel('X')
# for step_ix in range(burnin_chain.shape[1]): # lims = 6 * [None] stride = 20 nplots = int(nsteps / stride) print('Construction {} plots in total'.format(nplots)) # Some constants dims = [(0, 1), (0, 3), (1, 4), (2, 5)] labels = 'XYZUVW' units = 3 * ['pc'] + 3 * ['km/s'] base_figure_file = 'base_figure.pkl' star_data_file = data_dir + 'synth_for_plot_data.fit' star_data = tabletool.build_data_dict_from_table(star_data_file) # Set up base subplots, plotting everything that is the same across iterative # plots. We will then store this via Pickle to save time base_fig, base_ax = plt.subplots(nrows=2, ncols=2) base_fig.set_size_inches(8, 8) base_fig.set_tight_layout(True) lims = 6 * [None] for ax, (dim1, dim2) in zip(base_ax.flatten(), dims): true_comp.plot(ax=ax, dim1=dim1, dim2=dim2, comp_now=False, comp_then=True, comp_orbit=True,
# origins = np.array(origins.item()) # weights = np.array([origin.nstars for origin in origins]) # for dim1, dim2 in ('xy', 'uv', 'xu', 'yv', 'zw', 'xw'): # plt.clf() # fp.plotPaneWithHists(dim1, dim2, star_pars=star_pars_file, # groups=origins, weights=weights, # group_now=True, with_bg=with_bg, # no_bg_covs=with_bg, # ) # plt.savefig(rdir + 'pre_plot_{}{}.pdf'.format(dim1,dim2)) # Now choose if handling incremental fit or plain fit true_memb = None ncomps = 1 if type(data_file) is str: star_pars = tt.build_data_dict_from_table(data_file) print("nstars: {}".format(star_pars['means'].shape[0])) while os.path.isdir(rdir + '{}/'.format(ncomps)): print("ncomps: {}".format(ncomps)) if ncomps == 1: plotEveryIter(rdir + '{}/'.format(ncomps), star_pars, bg_hists, true_memb=true_memb) else: for i in range(ncomps - 1): print("sub directory {}".format(chr(ord('A') + i))) subrdir = rdir + '{}/{}/'.format(ncomps, chr(ord('A') + i)) if os.path.isdir(subrdir): plotEveryIter(subrdir, star_pars,
def test_2comps_and_background(): """ Synthesise a file with negligible error, retrieve initial parameters Takes a while... maybe this belongs in integration unit_tests Performance of test is a bit tricky to callibrate. Since we are skipping any temporal evolution for speed reasons, we model two isotropic Gaussians. Now if these Gaussians are too far apart, NaiveFit will gravitate to one of the Gaussians during the 1 component fit, and then struggle to discover the second Gaussian. If the Gaussians are too close, then both will be characteresied by the 1 component fit, and the BIC will decide two Gaussians components are overkill. I think I've addressed this by having the two groups have large number of stars. """ using_bg = True run_name = '2comps_and_background' logging.info(60 * '-') logging.info(15 * '-' + '{:^30}'.format('TEST: ' + run_name) + 15 * '-') logging.info(60 * '-') savedir = 'temp_data/{}_naive_{}/'.format(PY_VERS, run_name) mkpath(savedir) data_filename = savedir + '{}_naive_{}_data.fits'.format(PY_VERS, run_name) log_filename = 'temp_data/{}_naive_{}/log.log'.format(PY_VERS, run_name) logging.basicConfig(level=logging.INFO, filemode='w', filename=log_filename) ### INITIALISE SYNTHETIC DATA ### # DON'T CHANGE THE AGE! BECAUSE THIS TEST DOESN'T USE ANY ORBIT INTEGRATION!!! # Note: if peaks are too far apart, it will be difficult for # chronostar to identify the 2nd when moving from a 1-component # to a 2-component fit. uniform_age = 1e-10 sphere_comp_pars = np.array([ # X, Y, Z, U, V, W, dX, dV, age, [0, 0, 0, 0, 0, 0, 10., 5, uniform_age], [30, 0, 0, 0, 5, 0, 10., 5, uniform_age], ]) starcounts = [100, 150] ncomps = sphere_comp_pars.shape[0] nstars = np.sum(starcounts) background_density = 1e-9 # initialise z appropriately true_memb_probs = np.zeros((np.sum(starcounts), ncomps)) start = 0 for i in range(ncomps): true_memb_probs[start:start + starcounts[i], i] = 1.0 start += starcounts[i] try: # Check if the synth data has already been constructed data_dict = tabletool.build_data_dict_from_table(data_filename) except: synth_data = SynthData( pars=sphere_comp_pars, starcounts=starcounts, Components=SphereComponent, background_density=background_density, ) synth_data.synthesise_everything() tabletool.convert_table_astro2cart(synth_data.table, write_table=True, filename=data_filename) background_count = len(synth_data.table) - np.sum(starcounts) # insert background densities synth_data.table['background_log_overlap'] =\ len(synth_data.table) * [np.log(background_density)] synth_data.table.write(data_filename, overwrite=True) origins = [SphereComponent(pars) for pars in sphere_comp_pars] ### SET UP PARAMETER FILE ### fit_pars = { 'results_dir': savedir, 'data_table': data_filename, 'trace_orbit_func': 'dummy_trace_orbit_func', 'return_results': True, 'par_log_file': savedir + 'fit_pars.log', 'overwrite_prev_run': True, # 'nthreads':18, 'nthreads': 3, } ### INITIALISE AND RUN A NAIVE FIT ### naivefit = NaiveFit(fit_pars=fit_pars) result, score = naivefit.run_fit() best_comps = result['comps'] memb_probs = result['memb_probs'] # Check membership has ncomps + 1 (bg) columns n_fitted_comps = memb_probs.shape[-1] - 1 assert ncomps == n_fitted_comps ### CHECK RESULT ### # No guarantee of order, so check if result is permutated # also we drop the bg memberships for permutation reasons perm = expectmax.get_best_permutation(memb_probs[:nstars, :ncomps], true_memb_probs) memb_probs = memb_probs[:nstars] logging.info('Best permutation is: {}'.format(perm)) n_misclassified_stars = np.sum( np.abs(true_memb_probs - np.round(memb_probs[:, perm]))) # Check fewer than 15% of association stars are misclassified try: assert n_misclassified_stars / nstars * 100 < 15 except AssertionError: import pdb pdb.set_trace() for origin, best_comp in zip(origins, np.array(best_comps)[perm, ]): assert (isinstance(origin, SphereComponent) and isinstance(best_comp, SphereComponent)) o_pars = origin.get_pars() b_pars = best_comp.get_pars() logging.info("origin pars: {}".format(o_pars)) logging.info("best fit pars: {}".format(b_pars)) assert np.allclose(origin.get_mean(), best_comp.get_mean(), atol=5.) assert np.allclose(origin.get_sphere_dx(), best_comp.get_sphere_dx(), atol=2.5) assert np.allclose(origin.get_sphere_dv(), best_comp.get_sphere_dv(), atol=2.5) assert np.allclose(origin.get_age(), best_comp.get_age(), atol=1.)
col_name=bg_lnol_colname) if config.config['overwrite_datafile']: data_table.write(datafile, overwrite=True) elif config.config['data_savefile'] != '': data_table.write(config.config['data_savefile'], overwrite=True) # LOAD DATA DICT FROM ORIGINAL DATA # LOAD DATA DICT FROM MANIPULATED DATA # THEN CALCULATE MEMB PROBS BOTH TIMES # Convert data table into numpy arrays of mean and covariance matrices log_message('Building data dictionary') data_dict = tabletool.build_data_dict_from_table( data_table, get_background_overlaps=config.config['include_background_distribution'], historical=historical, ) # Save data_dict of the original data # np.save('data_dict_original.npy', data_dict) # MEMB PROBS for data WITH RV available data_dict_original=np.load('data_dict_original.npy') memb_probs_with_rv = expectmax.expectation(data=data_dict_original, comps=bp_comp_with_rv) print(memb_probs_with_rv) # Some values are nan. Mask out all stars that have any of values in their covariance matrices # equal to nan.
def fit_comp(data, memb_probs=None, init_pos=None, init_pars=None, burnin_steps=1000, Component=SphereComponent, plot_it=False, pool=None, convergence_tol=0.25, plot_dir='', save_dir='', sampling_steps=None, max_iter=None, trace_orbit_func=None): """Fits a single 6D gaussian to a weighted set (by membership probabilities) of stellar phase-space positions. Stores the final sampling chain and lnprob in `save_dir`, but also returns the best fit (walker step corresponding to maximum lnprob), sampling chain and lnprob. If neither init_pos nor init_pars are provided, then the weighted mean and covariance of the provided data set are calculated, then used to generate a sample parameter list (using Component). Walkers are then initialised around this parameter list. Parameters ---------- data: dict -or- astropy.table.Table -or- path to astrop.table.Table if dict, should have following structure: 'means': [nstars,6] float array_like the central estimates of star phase-space properties 'covs': [nstars,6,6] float array_like the phase-space covariance matrices of stars 'bg_lnols': [nstars] float array_like (opt.) the log overlaps of stars with whatever pdf describes the background distribution of stars. if table, see tabletool.build_data_dict_from_table to see table requirements. memb_probs: [nstars] float array_like Membership probability (from 0.0 to 1.0) for each star to the component being fitted. init_pos: [ngroups, npars] array The precise locations at which to initiate the walkers. Generally the saved locations from a previous, yet similar run. init_pars: [npars] array the position in parameter space about which walkers should be initialised. The standard deviation about each parameter is hardcoded as INIT_SDEV burnin_steps: int {1000} Number of steps per each burnin iteration Component: Implementation of AbstractComponent {Sphere Component} The class used to convert raw parametrisation of a model to actual model attributes. plot_it: bool {False} Whether to generate plots of the lnprob in 'plot_dir' pool: MPIPool object {None} pool of threads to execute walker steps concurrently convergence_tol: float {0.25} How many standard devaitions an lnprob chain is allowed to vary from its mean over the course of a burnin stage and still be considered "converged". Default value allows the median of the final 20 steps to differ by 0.25 of its standard deviations from the median of the first 20 steps. plot_dir: str {''} The directory in which to store plots save_dir: str {''} The directory in which to store results and/or byproducts of fit sampling_steps: int {None} If this is set, after convergence, a sampling stage will be entered. Only do this if a very fine map of the parameter distributions is required, since the burnin stage already characterises a converged solution for "burnin_steps". max_iter: int {None} The maximum iterations permitted to run. (Useful for expectation maximisation implementation triggering an abandonment of rubbish components). If left as None, then run will continue until convergence. trace_orbit_func: function {None} A function to trace cartesian oribts through the Galactic potential. If left as None, will use traceorbit.trace_cartesian_orbit (base signature of any alternate function on this ones) Returns ------- best_component The component model which yielded the highest posterior probability chain [nwalkers, nsteps, npars] array of all samples probability [nwalkers, nsteps] array of probabilities for each sample """ # TIDYING INPUT if not isinstance(data, dict): data = tabletool.build_data_dict_from_table(data) if memb_probs is None: memb_probs = np.ones(len(data['means'])) # Ensure plot_dir has a single trailing '/' if plot_dir != '': plot_dir = plot_dir.rstrip('/') + '/' if plot_it and plot_dir != '': if not os.path.exists(plot_dir): os.mkdir(plot_dir) npars = len(Component.PARAMETER_FORMAT) nwalkers = 2*npars # Initialise the emcee sampler if init_pos is None: init_pos = get_init_emcee_pos(data=data, memb_probs=memb_probs, init_pars=init_pars, Component=Component, nwalkers=nwalkers) sampler = emcee.EnsembleSampler( nwalkers, npars, lnprob_func, args=[data, memb_probs, trace_orbit_func], pool=pool, ) # PERFORM BURN IN state = None converged = False cnt = 0 logging.info("Beginning burnin loop") burnin_lnprob_res = np.zeros((nwalkers,0)) # burn in until converged or the (optional) max_iter is reached while (not converged) and cnt != max_iter: logging.info("Burning in cnt: {}".format(cnt)) sampler.reset() init_pos, lnprob, state = sampler.run_mcmc(init_pos, burnin_steps, state) converged = burnin_convergence(sampler.lnprobability, tol=convergence_tol) logging.info("Burnin status: {}".format(converged)) if plot_it and plt_avail: plt.clf() plt.plot(sampler.lnprobability.T) plt.savefig(plot_dir+"burnin_lnprobT{:02}.png".format(cnt)) # If about to burnin again, help out the struggling walkers by shifting # them to the best walker's position if not converged: best_ix = np.argmax(lnprob) poor_ixs = np.where(lnprob < np.percentile(lnprob, 33)) for ix in poor_ixs: init_pos[ix] = init_pos[best_ix] burnin_lnprob_res = np.hstack(( burnin_lnprob_res, sampler.lnprobability )) cnt += 1 logging.info("Burnt in, with convergence: {}".format(converged)) if plot_it and plt_avail: plt.clf() plt.plot(burnin_lnprob_res.T) plt.savefig(plot_dir+"burnin_lnprobT.png") # SAMPLING STAGE if not sampling_steps: logging.info("Taking final burnin segment as sampling stage"\ .format(converged)) else: logging.info("Entering sampling stage for {} steps".format( sampling_steps )) sampler.reset() # Don't need to keep track of any outputs sampler.run_mcmc(init_pos, sampling_steps, state) logging.info("Sampling done") # save the chain for later inspection np.save(save_dir+"final_chain.npy", sampler.chain) np.save(save_dir+"final_lnprob.npy", sampler.lnprobability) if plot_it and plt_avail: logging.info("Plotting final lnprob") plt.clf() plt.plot(sampler.lnprobability.T) plt.savefig(plot_dir+"lnprobT.png") logging.info("Plotting done") # Identify the best component final_best_ix = np.argmax(sampler.lnprobability) best_sample = sampler.flatchain[final_best_ix] best_component = Component(emcee_pars=best_sample) # Determining the median and span of each parameter med_and_span = calc_med_and_span(sampler.chain) logging.info("Results:\n{}".format(med_and_span)) return best_component, sampler.chain, sampler.lnprobability
dim1=dim1, dim2=dim2, comp_now=True, comp_then=True, color='blue') best_comp.plot(ax=ax, dim1=dim1, dim2=dim2, comp_now=True, comp_then=True, color='red') # Fancy pancy animation type stuff Writer = animation.writers['ffmpeg'] writer = Writer(fps=20, metadata=dict(artist='Me'), bitrate=1800) data = tabletool.build_data_dict_from_table(datafile) figsize = 10 fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(figsize, figsize)) fig.set_tight_layout(True) ani = matplotlib.animation.FuncAnimation(fig, animate, frames=N_MAX_ITERS, repeat=True) save_filename = 'convergence_movie.mp4' ani.save(save_filename, writer=writer)