Ejemplo n.º 1
0
def test_convertAstrTableToCart():
    """
    Using a historical table, confirm that cartesian conversion yields
    same results by comparing the cartesian means and covariance matrices
    are identical.

    Gets historical cartesian data from building data from table cart cols.

    Gets updated cartesian data from building astro data from table cols,
    converting to cartesian (stored back into table) then building data
    from newly inserted table cart cols.
    """
    hist_filename = '../data/paper1/historical_beta_Pictoris_with_gaia_small_everything_final.fits'
    hist_table = Table.read(hist_filename)

    curr_filename = '../data/paper1/beta_Pictoris_with_gaia_small_everything_final.fits'
    curr_table = Table.read(curr_filename)
    # Drop stars that have gone through any binary checking
    hist_table = Table(hist_table[100:300])
    curr_table = Table(curr_table[100:300])

    # load in original means and covs
    orig_cart_data =\
        tabletool.build_data_dict_from_table(table=hist_table, cartesian=True,
                                             historical=True)

    tabletool.convert_table_astro2cart(table=curr_table, write_table=False)

    cart_data = tabletool.build_data_dict_from_table(curr_table, cartesian=True)

    assert np.allclose(orig_cart_data['means'], cart_data['means'])
    assert np.allclose(hist_table['dX'], curr_table['X_error'])
    assert np.allclose(orig_cart_data['covs'], cart_data['covs'])
Ejemplo n.º 2
0
def test_convertAstrTableToCart():
    """
    Using a historical table, confirm that cartesian conversion yields
    same results by comparing the cartesian means and covariance matrices
    are identical.

    Gets historical cartesian data from building data from table cart cols.

    Gets updated cartesian data from building astro data from table cols,
    converting to cartesian (stored back into table) then building data
    from newly inserted table cart cols.
    """
    # hist_filename = '../data/paper1/historical_beta_Pictoris_with_gaia_small_everything_final.fits'
    hist_table = Table.read(HIST_FILE_NAME)

    # curr_filename = '../data/paper1/beta_Pictoris_with_gaia_small_everything_final.fits'
    curr_table = Table.read(CURR_FILE_NAME)
    # Drop stars that have gone through any binary checking
    # hist_table = Table(hist_table[100:300])
    # curr_table = Table(curr_table[100:300])

    # load in original means and covs
    orig_cart_data =\
        tabletool.build_data_dict_from_table(table=hist_table, cartesian=True,
                                             historical=True)

    tabletool.convert_table_astro2cart(table=curr_table, write_table=False)

    cart_data = tabletool.build_data_dict_from_table(curr_table,
                                                     cartesian=True)

    assert np.allclose(orig_cart_data['means'], cart_data['means'])
    assert np.allclose(hist_table['dX'], curr_table['X_error'])
    assert np.allclose(orig_cart_data['covs'], cart_data['covs'])
Ejemplo n.º 3
0
def plot_comps_and_stars(
    dim1,
    dim2,
    star_pars,
    comps,
    Component=SphereComponent,
    star_orbits=False,
    star_age=0.,
    star_then=False,
    comp_kwargs={},
):
    if type(star_pars) is str:
        tabletool.build_data_dict_from_table(star_pars)
    if type(comps) is str:
        comps = Component.load_raw_components(comps)
Ejemplo n.º 4
0
def test_expectation():
    """
    Super basic, generates some association stars along
    with some background stars and checks membership allocation
    is correct
    """

    age = 1e-5
    ass_pars1 = np.array([0, 0, 0, 0, 0, 0, 5., 2., age])
    comp1 = SphereComponent(ass_pars1)
    ass_pars2 = np.array([100., 0, 0, 20, 0, 0, 5., 2., age])
    comp2 = SphereComponent(ass_pars2)
    starcounts = [100,100]
    synth_data = SynthData(pars=[ass_pars1, ass_pars2],
                           starcounts=starcounts)
    synth_data.synthesise_everything()
    tabletool.convert_table_astro2cart(synth_data.table)

    true_memb_probs = np.zeros((np.sum(starcounts), 2))
    true_memb_probs[:starcounts[0], 0] = 1.
    true_memb_probs[starcounts[0]:, 1] = 1.

    # star_means, star_covs = tabletool.buildDataFromTable(synth_data.astr_table)
    # all_lnols = em.getAllLnOverlaps(
    #         synth_data.astr_table, [comp1, comp2]
    # )

    fitted_memb_probs = em.expectation(
            tabletool.build_data_dict_from_table(synth_data.table),
            [comp1, comp2]
    )

    assert np.allclose(true_memb_probs, fitted_memb_probs, atol=1e-10)
def get_region(assoc_name, gagne_reference_data = None):
    if gagne_reference_data is None:
        gagne_reference_data =\
            '../data/gagne_bonafide_full_kinematics_with_lit_and_best_radial_velocity' \
            '_comb_binars_with_banyan_radec.fits'

    gagne_table = tabletool.read(gagne_reference_data)

    if assoc_name not in set(gagne_table['Moving group']):
        raise UserWarning,\
            'Association name must be one of:\n{}\nReceived: "{}"'.format(
                    list(set(gagne_table['Moving group'])), assoc_name
            )

    # Dummy comment

    # Extract all stars
    subtable = gagne_table[np.where(gagne_table['Moving group'] == assoc_name)]

    star_means = tabletool.build_data_dict_from_table(subtable, only_means=True)

    data_upper_bound = np.nanmax(star_means, axis=0)
    data_lower_bound = np.nanmin(star_means, axis=0)

    data_span = data_upper_bound - data_lower_bound
    data_centre = 0.5 * (data_upper_bound + data_lower_bound)

    # Set up boundaries of box that span double the association
    box_lower_bound = data_centre - data_span
    box_upper_bound = data_centre + data_span
    return box_lower_bound, box_upper_bound
Ejemplo n.º 6
0
def test_background_component():
    """Create artificial association composed of two stars at opposite vertices
    of unit 6D rectangle. Then base background density distribution on that."""
    background_density = 100

    # Since the background double the span of data, by setting the means as
    # follows, the backbround should extend from 0 to 1 in each dimension,
    # which greatly simplifies reasoning about densities and starcounts.
    upper_mean = np.zeros(6) + 0.75
    lower_mean = np.zeros(6) + 0.25
    narrow_dx = 1e-10
    narrow_dv = 1e-10
    tiny_age = 1e-10
    upper_pars = np.hstack((upper_mean, narrow_dx, narrow_dv, tiny_age))
    lower_pars = np.hstack((lower_mean, narrow_dx, narrow_dv, tiny_age))

    starcounts = [1,1]

    synth_data = SynthData(pars=[upper_pars, lower_pars],
                           starcounts=starcounts,
                           background_density=background_density)
    synth_data.generate_all_init_cartesian()

    means = tabletool.build_data_dict_from_table(
            synth_data.table[2:],
            main_colnames=[el+'0' for el in 'xyzuvw'],
            only_means=True,
    )
    assert np.allclose(0.5, np.mean(means, axis=0), atol=0.1)
    assert np.allclose(1.0, np.max(means, axis=0), atol=0.1)
    assert np.allclose(0.0, np.min(means, axis=0), atol=0.1)
    assert len(synth_data.table) == background_density + 2
Ejemplo n.º 7
0
def test_multiple_synth_components():
    """Check initialising with multiple components works"""
    age = 1e-10
    dx = 5.
    dv = 2.
    ass_pars1 = np.array([10, 20, 30, 40, 50, 60, dx, dv, age])
    comp1 = SphereComponent(ass_pars1)
    ass_pars2 = np.array([0., 0., 0, 0, 0, 0, dx, dv, age])
    comp2 = SphereComponent(ass_pars2)
    starcounts = [100, 100]
    try:
        synth_data = SynthData(pars=[ass_pars1, ass_pars2],
                               starcounts=starcounts[0],
                               Components=SphereComponent)
        raise UserWarning('AssertionError should have been thrown by synthdata')
    except AssertionError:
        pass

    synth_data = SynthData(pars=[ass_pars1, ass_pars2],
                           starcounts=starcounts,
                           Components=SphereComponent)
    synth_data.synthesise_everything()

    assert len(synth_data.table) == np.sum(starcounts)
    means = tabletool.build_data_dict_from_table(
            synth_data.table,
            main_colnames=[el+'0' for el in 'xyzuvw'],
            only_means=True
    )
    assert np.allclose(comp2.get_mean(), means[starcounts[0]:].mean(axis=0),
                       atol=2.)
    assert np.allclose(comp1.get_mean(), means[:starcounts[0]].mean(axis=0),
                       atol=2.)
Ejemplo n.º 8
0
def test_swigImplementation():
    """
    Compares the swigged c implementation against the python one in
    likelihood.py
    """
    true_comp_mean = np.zeros(6)
    true_comp_dx = 2.
    true_comp_dv = 2.
    true_comp_covmatrix = np.identity(6)
    true_comp_covmatrix[:3, :3] *= true_comp_dx**2
    true_comp_covmatrix[3:, 3:] *= true_comp_dv**2
    true_comp_age = 1e-10
    true_comp = SphereComponent(
        attributes={
            'mean': true_comp_mean,
            'covmatrix': true_comp_covmatrix,
            'age': true_comp_age,
        })
    nstars = 100
    synth_data = SynthData(pars=true_comp.get_pars(), starcounts=nstars)
    synth_data.synthesise_everything()
    tabletool.convert_table_astro2cart(synth_data.table)

    star_data = tabletool.build_data_dict_from_table(synth_data.table)

    p_lnos = p_lno(true_comp.get_covmatrix(), true_comp.get_mean(),
                   star_data['covs'], star_data['means'])
    c_lnos = c_lno(true_comp.get_covmatrix(), true_comp.get_mean(),
                   star_data['covs'], star_data['means'], nstars)

    assert np.allclose(p_lnos, c_lnos)
    assert np.isfinite(p_lnos).all()
    assert np.isfinite(c_lnos).all()
Ejemplo n.º 9
0
def test_swigImplementation():
    """
    Compares the swigged c implementation against the python one in
    likelihood.py
    """
    true_comp_mean = np.zeros(6)
    true_comp_dx = 2.
    true_comp_dv = 2.
    true_comp_covmatrix = np.identity(6)
    true_comp_covmatrix[:3,:3] *= true_comp_dx**2
    true_comp_covmatrix[3:,3:] *= true_comp_dv**2
    true_comp_age = 1e-10
    true_comp = SphereComponent(attributes={
        'mean':true_comp_mean,
        'covmatrix':true_comp_covmatrix,
        'age':true_comp_age,
    })
    nstars = 100
    synth_data = SynthData(pars=true_comp.get_pars(), starcounts=nstars)
    synth_data.synthesise_everything()
    tabletool.convert_table_astro2cart(synth_data.table)

    star_data = tabletool.build_data_dict_from_table(synth_data.table)

    p_lnos = p_lno(true_comp.get_covmatrix(), true_comp.get_mean(),
                   star_data['covs'], star_data['means'])
    c_lnos = c_lno(true_comp.get_covmatrix(), true_comp.get_mean(),
                   star_data['covs'], star_data['means'], nstars)

    assert np.allclose(p_lnos, c_lnos)
    assert np.isfinite(p_lnos).all()
    assert np.isfinite(c_lnos).all()
Ejemplo n.º 10
0
def test_pythonFuncs():
    """
    TODO: remove the requirements of file, have data stored in file?
    """
    true_comp_mean = np.zeros(6)
    true_comp_dx = 2.
    true_comp_dv = 2.
    true_comp_covmatrix = np.identity(6)
    true_comp_covmatrix[:3, :3] *= true_comp_dx ** 2
    true_comp_covmatrix[3:, 3:] *= true_comp_dv ** 2
    true_comp_age = 1e-10
    true_comp = SphereComponent(attributes={
        'mean': true_comp_mean,
        'covmatrix': true_comp_covmatrix,
        'age': true_comp_age,
    })
    nstars = 100
    synth_data = SynthData(pars=true_comp.get_pars(), starcounts=nstars)
    synth_data.synthesise_everything()
    tabletool.convert_table_astro2cart(synth_data.table)

    star_data = tabletool.build_data_dict_from_table(synth_data.table)
    # star_data['means'] = star_data['means']
    # star_data['covs'] = star_data['covs']
    group_mean = true_comp.get_mean()
    group_cov = true_comp.get_covmatrix()

    # Test overlap with true component
    co1s = []
    co2s = []
    for i, (scov, smn) in enumerate(zip(star_data['covs'], star_data['means'])):
        co1s.append(co1(group_cov, group_mean, scov, smn))
        co2s.append(co2(group_cov, group_mean, scov, smn))
    co1s = np.array(co1s)
    co2s = np.array(co2s)
    co3s = np.exp(p_lno(group_cov, group_mean,
                        star_data['covs'], star_data['means']))
    assert np.allclose(co1s, co2s)
    assert np.allclose(co2s, co3s)
    assert np.allclose(co1s, co3s)

    # Test overlap with neighbouring star (with the aim of testing
    # tiny overlap values). Note that most overlaps go to 0, but the
    # log overlaps retain the information
    co1s = []
    co2s = []
    for i, (scov, smn) in enumerate(zip(star_data['covs'], star_data['means'])):
        co1s.append(co1(star_data['covs'][15], star_data['means'][15],
                        scov, smn))
        co2s.append(co2(star_data['covs'][15], star_data['means'][15],
                        scov, smn))
    co1s = np.array(co1s)
    co2s = np.array(co2s)
    lnos = p_lno(star_data['covs'][15], star_data['means'][15],
                 star_data['covs'], star_data['means'])
    co3s = np.exp(lnos)
    assert np.allclose(co1s, co2s)
    assert np.allclose(co2s, co3s)
    assert np.allclose(co1s, co3s)
Ejemplo n.º 11
0
 def approxCurrentDayDistribution(self):
     means = tabletool.build_data_dict_from_table(self.data,
                                                  cartesian=True,
                                                  only_means=True)
     mean_of_means = np.average(means,
                                axis=0,
                                weights=self.membership_probs)
     cov_of_means = np.cov(means.T, ddof=0., aweights=self.membership_probs)
     return mean_of_means, cov_of_means
Ejemplo n.º 12
0
def test_get_lnoverlaps():
    """
    Confirms that star-component overlaps get smaller as stars get further
    away.

    First generates a component `sphere_comp`. Then generates three stars.
    The first one is identical to `sphere_comp` in mean and covmatrix.
    The other two share the same covmatrix yet are separated in X.
    We check that the overlap integral is smaller for the more separated
    stars.
    """
    dim = 6
    mean = np.zeros(dim)
    covmatrix = np.identity(dim)
    age = 1e-10
    sphere_comp = SphereComponent(attributes={
        'mean': mean,
        'covmatrix': covmatrix,
        'age': age,
    })

    dx_offsets = [0., 1., 10.]

    star_comps = []
    for dx_offset in dx_offsets:
        star = SphereComponent(
            attributes={
                'mean':
                sphere_comp.get_mean() +
                np.array([dx_offset, 0., 0., 0., 0., 0.]),
                'covmatrix':
                sphere_comp.get_covmatrix(),
                'age':
                sphere_comp.get_age(),
            })
        star_comps.append(star)

    nstars = len(star_comps)
    dummy_table = Table(data=np.arange(nstars).reshape(nstars, 1),
                        names=['name'])
    tabletool.append_cart_cols_to_table(dummy_table)

    for star_comp, row in zip(star_comps, dummy_table):
        tabletool.insert_data_into_row(
            row,
            star_comp.get_mean(),
            star_comp.get_covmatrix(),
            cartesian=True,
        )
    dummy_data = tabletool.build_data_dict_from_table(dummy_table)
    ln_overlaps = likelihood.get_lnoverlaps(sphere_comp, data=dummy_data)

    # Checks that ln_overlaps is descending
    assert np.allclose(ln_overlaps, sorted(ln_overlaps)[::-1])
Ejemplo n.º 13
0
def test_lnprob_func():
    """
    Generates two components. Generates a synthetic data set based on the
    first component. Confrims that the lnprob is larger for the first
    component than the second.
    """
    measurement_error = 1e-10
    star_count = 500
    tiny_age = 1e-10
    dim = 6
    comp_covmatrix = np.identity(dim)
    comp_means = {
        'comp1': np.zeros(dim),
        'comp2': 10 * np.ones(dim)
    }
    comps = {}
    data = {}

    for comp_name in comp_means.keys():
        comp = SphereComponent(attributes={
            'mean':comp_means[comp_name],
            'covmatrix':comp_covmatrix,
            'age':tiny_age
        })

        synth_data = SynthData(pars=[comp.get_pars()], starcounts=star_count,
                                measurement_error=measurement_error)
        synth_data.synthesise_everything()
        tabletool.convert_table_astro2cart(synth_data.table)
        data[comp_name] = tabletool.build_data_dict_from_table(synth_data.table)
        comps[comp_name] = comp

    lnprob_comp1_data1 = likelihood.lnprob_func(pars=comps['comp1'].get_pars(),
                                                data=data['comp1'])
    lnprob_comp2_data1 = likelihood.lnprob_func(pars=comps['comp2'].get_pars(),
                                                data=data['comp1'])
    lnprob_comp1_data2 = likelihood.lnprob_func(pars=comps['comp1'].get_pars(),
                                                data=data['comp2'])
    lnprob_comp2_data2 = likelihood.lnprob_func(pars=comps['comp2'].get_pars(),
                                                data=data['comp2'])
    
    print(lnprob_comp1_data1)
    print(lnprob_comp2_data1)
    print(lnprob_comp1_data2)
    print(lnprob_comp2_data2)
    
    assert lnprob_comp1_data1 > lnprob_comp2_data1
    assert lnprob_comp2_data2 > lnprob_comp1_data2

    # Check that the different realisations only differ by 20%
    assert np.isclose(lnprob_comp1_data1, lnprob_comp2_data2, rtol=2e-1)
    assert np.isclose(lnprob_comp1_data2, lnprob_comp2_data1, rtol=2e-1)
Ejemplo n.º 14
0
def test_get_lnoverlaps():
    """
    Confirms that star-component overlaps get smaller as stars get further
    away.

    First generates a component `sphere_comp`. Then generates three stars.
    The first one is identical to `sphere_comp` in mean and covmatrix.
    The other two share the same covmatrix yet are separated in X.
    We check that the overlap integral is smaller for the more separated
    stars.
    """
    dim = 6
    mean = np.zeros(dim)
    covmatrix = np.identity(dim)
    age = 1e-10
    sphere_comp = SphereComponent(attributes={
        'mean':mean,
        'covmatrix':covmatrix,
        'age':age,
    })

    dx_offsets = [0., 1., 10.]

    star_comps = []
    for dx_offset in dx_offsets:
        star = SphereComponent(attributes={
            'mean':sphere_comp.get_mean()+np.array([dx_offset,0.,0.,0.,0.,0.]),
            'covmatrix':sphere_comp.get_covmatrix(),
            'age':sphere_comp.get_age(),
        })
        star_comps.append(star)

    nstars = len(star_comps)
    dummy_table = Table(data=np.arange(nstars).reshape(nstars,1),
                        names=['name'])
    tabletool.append_cart_cols_to_table(dummy_table)

    for star_comp, row in zip(star_comps, dummy_table):
        tabletool.insert_data_into_row(row,
                                       star_comp.get_mean(),
                                       star_comp.get_covmatrix(),
                                       cartesian=True,
                                       )
    dummy_data = tabletool.build_data_dict_from_table(dummy_table)
    ln_overlaps = likelihood.get_lnoverlaps(sphere_comp, data=dummy_data)

    # Checks that ln_overlaps is descending
    assert np.allclose(ln_overlaps, sorted(ln_overlaps)[::-1])
Ejemplo n.º 15
0
def test_convertTableXYZUVWToArray():
    """
    Check that generating cartesian means and covariance matrices matches
    previous implementation
    """
    orig_star_pars = loadDictFromTable(HIST_FILE_NAME)
    main_colnames, error_colnames, corr_colnames =\
        tabletool.get_historical_cart_colnames()
    data = tabletool.build_data_dict_from_table(
        orig_star_pars['table'][orig_star_pars['indices']],
        main_colnames=main_colnames,
        error_colnames=error_colnames,
        corr_colnames=corr_colnames)

    assert np.allclose(orig_star_pars['xyzuvw'], data['means'])
    assert np.allclose(orig_star_pars['xyzuvw_cov'], data['covs'])
Ejemplo n.º 16
0
def test_lnprob_func():
    """
    Generates two components. Generates a synthetic data set based on the
    first component. Confrims that the lnprob is larger for the first
    component than the second.
    """
    measurement_error = 1e-10
    star_count = 500
    tiny_age = 1e-10
    dim = 6
    comp_covmatrix = np.identity(dim)
    comp_means = {
        'comp1': np.zeros(dim),
        'comp2': 10 * np.ones(dim)
    }
    comps = {}
    data = {}

    for comp_name in comp_means.keys():
        comp = SphereComponent(attributes={
            'mean':comp_means[comp_name],
            'covmatrix':comp_covmatrix,
            'age':tiny_age
        })

        synth_data = SynthData(pars=[comp.get_pars()], starcounts=star_count,
                                measurement_error=measurement_error)
        synth_data.synthesise_everything()
        tabletool.convert_table_astro2cart(synth_data.table)
        data[comp_name] = tabletool.build_data_dict_from_table(synth_data.table)
        comps[comp_name] = comp

    lnprob_comp1_data1 = likelihood.lnprob_func(pars=comps['comp1'].get_pars(),
                                                data=data['comp1'])
    lnprob_comp2_data1 = likelihood.lnprob_func(pars=comps['comp2'].get_pars(),
                                                data=data['comp1'])
    lnprob_comp1_data2 = likelihood.lnprob_func(pars=comps['comp1'].get_pars(),
                                                data=data['comp2'])
    lnprob_comp2_data2 = likelihood.lnprob_func(pars=comps['comp2'].get_pars(),
                                                data=data['comp2'])
    assert lnprob_comp1_data1 > lnprob_comp2_data1
    assert lnprob_comp2_data2 > lnprob_comp1_data2

    # Check that the different realisations only differ by 10%
    assert np.isclose(lnprob_comp1_data1, lnprob_comp2_data2, rtol=1e-1)
    assert np.isclose(lnprob_comp1_data2, lnprob_comp2_data1, rtol=1e-1)
Ejemplo n.º 17
0
def getZfromOrigins(origins, star_pars):
    if type(origins) is str:
        origins = SphereComponent.load_components(origins)
    if type(star_pars) is str:
        star_pars = tt.build_data_dict_from_table(star_pars)
    nstars = star_pars['means'].shape[0]
    ngroups = len(origins)
    nassoc_stars = np.sum([o.nstars for o in origins])
    using_bg = nstars != nassoc_stars
    z = np.zeros((nstars, ngroups + using_bg))
    stars_so_far = 0
    # set associaiton members memberships to 1
    for i, o in enumerate(origins):
        z[stars_so_far:stars_so_far + o.nstars, i] = 1.
        stars_so_far += o.nstars
    # set remaining stars as members of background
    if using_bg:
        z[stars_so_far:, -1] = 1.
    return z
Ejemplo n.º 18
0
def test_convertTableXYZUVWToArray():
    """
    Check that generating cartesian means and covariance matrices matches
    previous implementation
    """
    filename_historical = '../data/paper1/' \
                          'historical_beta_Pictoris_with_gaia_small_everything_final.fits'

    orig_star_pars = loadDictFromTable(filename_historical)
    main_colnames, error_colnames, corr_colnames =\
        tabletool.get_historical_cart_colnames()
    data = tabletool.build_data_dict_from_table(
        orig_star_pars['table'][orig_star_pars['indices']],
        main_colnames=main_colnames,
        error_colnames=error_colnames,
        corr_colnames=corr_colnames)

    assert np.allclose(orig_star_pars['xyzuvw'], data['means'])
    assert np.allclose(orig_star_pars['xyzuvw_cov'], data['covs'])
Ejemplo n.º 19
0
def test_convertTableXYZUVWToArray():
    """
    Check that generating cartesian means and covariance matrices matches
    previous implementation
    """
    filename_historical = '../data/paper1/' \
                          'historical_beta_Pictoris_with_gaia_small_everything_final.fits'

    orig_star_pars = loadDictFromTable(filename_historical)
    main_colnames, error_colnames, corr_colnames =\
        tabletool.get_historical_cart_colnames()
    data = tabletool.build_data_dict_from_table(
            orig_star_pars['table'][orig_star_pars['indices']],
            main_colnames=main_colnames,
            error_colnames=error_colnames,
            corr_colnames=corr_colnames
    )

    assert np.allclose(orig_star_pars['xyzuvw'], data['means'])
    assert np.allclose(orig_star_pars['xyzuvw_cov'], data['covs'])
Ejemplo n.º 20
0
def test_build_data_from_incomplete_table():
    """
    Sometimes rows will be missing data, e.g. from when binaries
    have been merged. build_data_from_dict should detect the
    presence of nans and skip them
    """
    # build a dummy table of data
    NSTARS = 10
    NDIM = 6
    missing_row_ix = (np.array([0, 3, 4]), )
    means = np.random.rand(NSTARS, NDIM)
    covs = np.array(NSTARS * [np.eye(NDIM, NDIM)])
    nan_mask = np.array(NSTARS * [False])

    # check bad data are within index range
    assert np.all(missing_row_ix[0] < NSTARS)
    nan_mask[missing_row_ix] = True
    covs[nan_mask] = np.nan

    names = np.arange(NSTARS)

    dummy_table = Table()
    dummy_table['names'] = names

    tabletool.append_cart_cols_to_table(dummy_table)

    for row, mean, cov in zip(dummy_table, means, covs):
        tabletool.insert_data_into_row(row, mean, cov)

    star_pars = tabletool.build_data_dict_from_table(dummy_table)

    assert not np.any(np.isnan(star_pars['means']))
    assert not np.any(np.isnan(star_pars['covs']))

    # check the correct number of rows have been returned
    assert len(star_pars['means']) == np.sum(np.logical_not(nan_mask))
    assert len(star_pars['covs']) == np.sum(np.logical_not(nan_mask))
Ejemplo n.º 21
0
 def approxCurrentDayDistribution(self):
     means = tabletool.build_data_dict_from_table(self.data, cartesian=True,
                                                  only_means=True)
     mean_of_means = np.average(means, axis=0, weights=self.membership_probs)
     cov_of_means = np.cov(means.T, ddof=0., aweights=self.membership_probs)
     return mean_of_means, cov_of_means
Ejemplo n.º 22
0
# This table is masked. Unmask:
data_table = data_table.filled()

print('DATA READ', len(data_table))
historical = 'c_XU' in data_table.colnames

############################################################################
############ COMPONENT OVERLAPS ############################################
############################################################################

print('Create data dict')
# Create data dict
data_dict = tabletool.build_data_dict_from_table(
    data_table,
    get_background_overlaps=True,
    historical=historical,
)

# Create components
comps = SphereComponent.load_raw_components(comps_filename)

# COMPONENT OVERLAPS
overlaps = expectmax.get_all_lnoverlaps(data_dict, comps)
print('overlaps.shape', overlaps.shape, len(comps))

# MEMBERSHIP PROBABILITIES
membership_probabilities = np.array(
    [expectmax.calc_membership_probs(ol) for ol in overlaps])

# Create a table
Ejemplo n.º 23
0
def get_region(assoc_name,
               pos_margin=30.,
               vel_margin=5.,
               scale_margin=None,
               gagne_reference_data=None):
    """
    Get a 6D box surrounding a known association with members from BANYAN

    Parameters
    ----------
    assoc_name: str
        Name of the association as listed in BANYAN table. One of:
        {'118 Tau', '32 Orionis', 'AB Doradus', 'Carina', 'Carina-Near',
        'Columba', 'Coma Ber', 'Corona Australis', 'Hyades', 'IC 2391',
        'IC 2602', 'Lower Centaurus-Crux', 'Octans', 'Platais 8',
        'Pleiades', 'TW Hya', 'Taurus', 'Tucana-Horologium',
        'Upper Centaurus Lupus', 'Upper CrA', 'Upper Scorpius',
        'Ursa Major', 'beta Pictoris', 'chi{ 1 For (Alessi 13)',
        'epsilon Cha', 'eta Cha', 'rho Ophiuci'}

    pos_margin: float {30.}
        Margin in position space around known members from which new candidate
        members are included
    vel_margin: float {5.}
        Margin in velocity space around known members from which new candidate
        members are included
    gagne_reference_data: str
        filename to BANYAN table

    Returns
    -------
    box_lower_bounds: [6] float array
        The lower bounds of the 6D box [X,Y,Z,U,V,W]
    box_upper_bounds: [6] float array
        The upper bounds of the 6D box [X,Y,Z,U,V,W]
    """

    if gagne_reference_data is None:
        gagne_reference_data =\
            '../data/gagne_bonafide_full_kinematics_with_lit_and_best_radial_velocity' \
            '_comb_binars_with_banyan_radec.fits'

    gagne_table = tabletool.read(gagne_reference_data)

    if assoc_name not in set(gagne_table['Moving group']):
        raise UserWarning(
            'Association name must be one of:\n{}\nReceived: "{}"'.format(
                list(set(gagne_table['Moving group'])), assoc_name))

    # Extract all stars
    subtable = gagne_table[np.where(gagne_table['Moving group'] == assoc_name)]
    logging.info('Initial membership list has {} members'.format(
        len(subtable)))

    star_means = tabletool.build_data_dict_from_table(subtable,
                                                      only_means=True)

    data_upper_bound = np.nanmax(star_means, axis=0)
    data_lower_bound = np.nanmin(star_means, axis=0)
    logging.info('Stars span from {} to {}'.format(np.round(data_lower_bound),
                                                   np.round(data_upper_bound)))

    # First try and scale box margins on.
    # scale_margin of 1 would double total span (1 + 1)
    if scale_margin is not None:
        data_span = data_upper_bound - data_lower_bound
        box_margin = 0.5 * scale_margin * data_span

        # Set up boundaries of box that span double the association
        box_lower_bound = data_lower_bound - box_margin
        box_upper_bound = data_upper_bound + box_margin

    # Set margin based on provided (or default) constant amounts
    else:
        data_margin = np.array(3 * [pos_margin] + 3 * [vel_margin])
        box_lower_bound = data_lower_bound - data_margin
        box_upper_bound = data_upper_bound + data_margin

    logging.info('Range extended.\nLower: {}\nUpper: {}'.format(
        np.round(box_lower_bound), np.round(box_upper_bound)))

    return box_lower_bound, box_upper_bound
Ejemplo n.º 24
0
    print('DATA_TABLE READ', len(data_table))

    # Compute overlaps only for the part of the data (chunk)
    # Every 100k stars take about 2 days, so I only want about that many stars in each run, in case something
    # goes wrong
    N = 10  # that many chunks
    NI = int(sys.argv[1]
             )  # take this chunk #TODO: update this number for every run!
    print('NI=%d' % NI)
    # TAKE ONLY the i-th part of the data
    indices_chunks = np.array_split(range(len(data_table)), N)
    data_table = data_table[indices_chunks[NI]]

    data_dict = tabletool.build_data_dict_from_table(
        data_table,
        get_background_overlaps=False,  # bg overlap not available yet
        historical=historical,
    )
    star_means = data_dict['means']
    star_covs = data_dict['covs']

    # PREPARE BACKGROUND DATA
    print('Read background Gaia data')
    background_means = tabletool.build_data_dict_from_table(
        '/home/tcrun/chronostar/data/gaia_cartesian_full_6d_table.fits',
        only_means=True,
    )

    # Inverting the vertical values
    star_means = np.copy(star_means)
    star_means[:, 2] *= -1
Ejemplo n.º 25
0
                                        col_name=bg_lnol_colname)

if config.config['overwrite_datafile']:
    data_table.write(datafile, overwrite=True)
elif config.config['data_savefile'] != '':
    data_table.write(config.config['data_savefile'], overwrite=True)

# LOAD DATA DICT FROM ORIGINAL DATA
# LOAD DATA DICT FROM MANIPULATED DATA
# THEN CALCULATE MEMB PROBS BOTH TIMES

# Convert data table into numpy arrays of mean and covariance matrices
log_message('Building data dictionary')
data_dict = tabletool.build_data_dict_from_table(
    data_table,
    get_background_overlaps=config.config['include_background_distribution'],
    historical=historical,
)

# Save data_dict of the original data
# np.save('data_dict_original.npy', data_dict)

# MEMB PROBS for data WITH RV available
data_dict_original = np.load('data_dict_original.npy')
memb_probs_with_rv = expectmax.expectation(data=data_dict_original,
                                           comps=bp_comp_with_rv)
print(memb_probs_with_rv)

# Some values are nan. Mask out all stars that have any of values in their covariance matrices
# equal to nan.
mask = [~np.any(np.isnan(x)) for x in data_dict['covs']]
# Set up trace_orbit_func. Maybe move this into compfitter.
if global_pars['trace_orbit_func'] == 'dummy_trace_orbit_func':
    global_pars['trace_orbit_func'] = traceorbit.dummy_trace_orbit_func
elif global_pars['trace_orbit_func'] == 'epicyclic':
    log_message('trace_orbit: epicyclic')
    global_pars['trace_orbit_func'] = traceorbit.trace_epicyclic_orbit
else:
    global_pars['trace_orbit_func'] = traceorbit.trace_cartesian_orbit

##################################
### READ DATA ####################
##################################
# Stellar data
#~ data_dict = tabletool.build_data_dict_from_table(global_pars['data_table'], mask_good=mask_good)
data_dict = tabletool.build_data_dict_from_table(
    global_pars['data_table'],
    get_background_overlaps=global_pars['use_background'])
#~ print('ONECOME', len(data_dict['means']), global_pars['data_table'])

# Membership: memb_probs is what we get from the expectation step
if os.path.exists(local_pars['filename_membership']):
    memb_probs = np.load(local_pars['filename_membership'])
else:
    # This is first run and we have to start somewhere
    nstars = data_dict['means'].shape[0]
    init_memb_probs = np.ones((nstars, ncomps)) / ncomps
    print('MEMB PROBS INIT EQUAL')

    # Add background
    if global_pars['use_background']:
        memb_probs = np.hstack((init_memb_probs, np.zeros((nstars, 1))))
Ejemplo n.º 27
0
    historical = 'c_XU' in data_table.colnames

    # If data cuts provided, then apply them
    if config.config['banyan_assoc_name'] != '':
        bounds = get_region(config.config['banyan_assoc_name'])
    elif config.data_bound is not None:
        bounds = (config.data_bound['lower_bound'],
                  config.data_bound['upper_bound'])
    else:
        bounds = None

    if bounds is not None:
        log_message('Applying data cuts')
        star_means = tabletool.build_data_dict_from_table(
                datafile,
                main_colnames=config.cart_colnames.get('main_colnames', None),
                only_means=True,
                historical=historical,
        )
        data_mask = np.where(
                np.all(star_means < bounds[1], axis=1)
                & np.all(star_means > bounds[0], axis=1))
        data_table = data_table[data_mask]
    log_message('Data table has {} rows'.format(len(data_table)))


    # By the end of this, data will be a astropy table
    # with cartesian data written in
    # columns in default way.
    if config.config['convert_to_cartesian']:
        # Performs conversion in place (in memory) on `data_table`
        if (not 'c_XU' in data_table.colnames and
Ejemplo n.º 28
0
def run_fit_helper(true_comp,
                   starcounts,
                   measurement_error,
                   burnin_step=None,
                   run_name='default',
                   trace_orbit_func=None,
                   Component=EllipComponent,
                   init_pars=None):
    py_vers = sys.version[0]
    save_dir = 'temp_data/%s_compfitter_%s/' % (py_vers, run_name)
    data_filename = save_dir + 'synth_data.fits'.format(py_vers, run_name)
    plot_dir = save_dir
    print("---------", save_dir)

    if not os.path.isdir(save_dir):
        os.mkdir(save_dir)

    log_filename = save_dir + 'log.log'.format(py_vers, run_name)

    logging.basicConfig(level=logging.INFO,
                        filename=log_filename,
                        filemode='w')

    synth_data = SynthData(pars=true_comp.get_pars(),
                           starcounts=starcounts,
                           measurement_error=measurement_error,
                           Components=Component)

    synth_data.synthesise_everything()
    tabletool.convert_table_astro2cart(synth_data.table,
                                       write_table=True,
                                       filename=data_filename)

    print("newPars ------------------------------ \n", init_pars)
    if init_pars is None:
        internal_pars = None
    else:
        internal_pars = Component.internalise(init_pars)

    res = cf.fit_comp(data=synth_data.table,
                      plot_it=True,
                      burnin_steps=burnin_step,
                      store_burnin_chains=True,
                      plot_dir=plot_dir,
                      save_dir=save_dir,
                      trace_orbit_func=trace_orbit_func,
                      optimisation_method='emcee',
                      Component=Component,
                      init_pars=internal_pars)

    comps_filename = save_dir + 'true_and_best_comp.py'
    best_comp = res[0]
    EllipComponent.store_raw_components(comps_filename, [true_comp, best_comp])

    star_pars = tabletool.build_data_dict_from_table(synth_data.table)
    plot_results(true_comp,
                 best_fit_comp=res[0],
                 star_pars=star_pars,
                 plt_dir=save_dir)

    return res
Ejemplo n.º 29
0
from astropy.table import Table
import numpy as np

import sys
sys.path.insert(0, '..')
from chronostar import tabletool

orig_table_path = '../data/paper1/beta_Pictoris_with_gaia_small_everything_final.fits'

orig_table = Table.read(orig_table_path)

res_dir = '../results/beta_Pictoris_with_gaia_small_inv2/6/E/final/'
final_memb = np.load(res_dir + 'final_membership.npy')

recons_star_pars, table_ixs =\
    tabletool.build_data_dict_from_table(orig_table, return_table_ixs=True)

# --------------------------------------------------
# --  Insert/replace membership probabilities ------
# --------------------------------------------------

# need to add new column for comp_F
# simpler just to remove all membership probability columns and append
# to end, this keeps them together without reshuffling of columns
existing_colnames = ['comp_' + char for char in 'ABCDE'] + ['comp_background']
print(existing_colnames)

for colname in existing_colnames:
    del orig_table[colname]

new_colnames = ['comp_' + char for char in 'ABCDEF'] + ['comp_background']
Ejemplo n.º 30
0
if __name__ == '__main__':
    logging.basicConfig(level=logging.INFO, filename='temp_logs/groupfitter.log')
    save_dir = 'temp_data/'
    group_savefile = save_dir + 'origins_stat.npy'
    xyzuvw_init_savefile = save_dir + 'xyzuvw_init_stat.npy'
    astro_savefile = save_dir + 'astro_table_stat.txt'
    xyzuvw_conv_savefile = save_dir + 'xyzuvw_conv_stat.fits'

    pars = np.array([0., 0., 0., 0., 0., 0., 5., 2., 1e-8])
    starcount = 100
    error_frac = 1.
    synth_data = SynthData(pars=pars, starcounts=starcount)
    synth_data.synthesise_everything()
    tabletool.convert_table_astro2cart(synth_data.table)
    data = tabletool.build_data_dict_from_table(synth_data.table)

    stat_file = 'stat_dumps/groupfitter.stat'
    # best_fit, chain, lnprob = \
    cProfile.run(
        "groupfitter.fit_comp(data=data, plot_it=True,"
        "convergence_tol=2., burnin_steps=400, plot_dir='temp_plots/',"
        "save_dir='temp_data/')",
        stat_file,
    )

    stat = pstats.Stats(stat_file)
    stat.sort_stats('cumtime')
    stat.print_stats(0.1)

Ejemplo n.º 31
0
        )
        print("Applying tick parameters")
        for ax in fig.axes:
            ax.tick_params(direction='in',
                           labelsize='x-large',
                           top=True,
                           right=True)
        print("... saving")
        plt.savefig(plot_name)

if PLOT_BPMG_REAL:
    # PLOTTING ITERATION 6E
    # star_pars_file = '../../data/beta_Pictoris_with_gaia_small_xyzuvw.fits'
    table_file = '../../data/paper1/beta_Pictoris_corrected_everything.fits'
    bpmg_table = Table.read(table_file)
    star_pars, table_ixs = tabletool.build_data_dict_from_table(
        bpmg_table, return_table_ixs=True)
    nstars = len(table_ixs[0])
    fit_name = 'bpmg_and_nearby'
    rdir = '../../results/beta_Pictoris_with_gaia_small_inv2/6/E/final/'

    memb_file = rdir + 'final_membership.npy'
    comp_file = rdir + 'final_comps.npy'

    z = np.load(memb_file)
    comps = SphereComponent.load_raw_components(comp_file)

    # Assign markers based on BANYAN membership
    banyan_markers = np.array(nstars * ['.'])
    banyan_membs = bpmg_table['banyan_assoc'][table_ixs]

    # Assign markers to each star (via `banyan_markers`) whilst concurrently
Ejemplo n.º 32
0
mpl.use('Agg')
import matplotlib.pyplot as plt
import numpy as np
import sys
sys.path.insert(0, '..')

from chronostar.component import SphereComponent
from chronostar import tabletool
from chronostar import likelihood
from chronostar import expectmax

component_file = '../results/all_nonbg_scocen_comps.npy'
membership_file = '../results/all_scocen_total_membership.npy'
joined_table = '../data/scocen/joined_scocen_no_duplicates.fit'

star_pars = tabletool.build_data_dict_from_table(joined_table, historical=True)
all_comps = SphereComponent.load_raw_components(component_file)
init_z = np.load(membership_file)

# pop manually determined duplicates
if True:
    all_comps.pop(9)
    all_comps.pop(6)
    init_z = init_z[(np.array([0, 1, 2, 3, 4, 5, 7, 8]), )]

print(len(all_comps))
print(len(init_z))
init_z.shape = (1, -1)

memberships = expectmax.expectation(star_pars,
                                    all_comps,
Ejemplo n.º 33
0
if __name__ == '__main__':
    logging.basicConfig(level=logging.INFO, filename='compfitter.log')
    save_dir = ''
    group_savefile = save_dir + 'origins_stat.npy'
    xyzuvw_init_savefile = save_dir + 'xyzuvw_init_stat.npy'
    astro_savefile = save_dir + 'astro_table_stat.txt'
    xyzuvw_conv_savefile = save_dir + 'xyzuvw_conv_stat.fits'

    pars = np.array([0., 0., 0., 0., 0., 0., 5., 2., 1e-8])
    starcount = 100
    error_frac = 1.
    synth_data = SynthData(pars=pars, starcounts=starcount)
    synth_data.synthesise_everything()
    tabletool.convert_table_astro2cart(synth_data.table)
    data = tabletool.build_data_dict_from_table(synth_data.table)

    stat_file = 'compfitter.stat'
    # best_fit, chain, lnprob = \
    cProfile.run(
        "compfitter.fit_comp(data=data, plot_it=True,"
        "convergence_tol=2., burnin_steps=400, plot_dir='',"
        "save_dir='')",
        stat_file,
    )

    stat = pstats.Stats(stat_file)
    stat.sort_stats('cumtime')
    stat.print_stats(0.3)

Ejemplo n.º 34
0
            config.config['banyan_assoc_name'],
            pos_margin=config.advanced.get('pos_margin', 30.),
            vel_margin=config.advanced.get('vel_margin', 5.),
            scale_margin=config.advanced.get('scale_margin', None),
        )
    elif config.data_bound is not None:
        bounds = (config.data_bound['lower_bound'],
                  config.data_bound['upper_bound'])
    else:
        bounds = None

    if bounds is not None:
        log_message('Applying data cuts')
        star_means = tabletool.build_data_dict_from_table(
            datafile,
            main_colnames=config.cart_colnames.get('main_colnames', None),
            only_means=True,
            historical=historical,
        )
        data_mask = np.where(
            np.all(star_means < bounds[1], axis=1)
            & np.all(star_means > bounds[0], axis=1))
        data_table = data_table[data_mask]
    log_message('Data table has {} rows'.format(len(data_table)))

    # By the end of this, data will be a astropy table
    # with cartesian data written in
    # columns in default way.
    if config.config['convert_to_cartesian']:
        log_message('Trying to convert to cartesian')
        # Performs conversion in place (in memory) on `data_table`
        if (not 'c_XU' in data_table.colnames
Ejemplo n.º 35
0
def test_pythonFuncs():
    """
    TODO: remove the requirements of file, have data stored in file?
    """
    true_comp_mean = np.zeros(6)
    true_comp_dx = 2.
    true_comp_dv = 2.
    true_comp_covmatrix = np.identity(6)
    true_comp_covmatrix[:3, :3] *= true_comp_dx**2
    true_comp_covmatrix[3:, 3:] *= true_comp_dv**2
    true_comp_age = 1e-10
    true_comp = SphereComponent(
        attributes={
            'mean': true_comp_mean,
            'covmatrix': true_comp_covmatrix,
            'age': true_comp_age,
        })
    nstars = 100
    synth_data = SynthData(pars=true_comp.get_pars(), starcounts=nstars)
    synth_data.synthesise_everything()
    tabletool.convert_table_astro2cart(synth_data.table)

    star_data = tabletool.build_data_dict_from_table(synth_data.table)
    # star_data['means'] = star_data['means']
    # star_data['covs'] = star_data['covs']
    group_mean = true_comp.get_mean()
    group_cov = true_comp.get_covmatrix()

    # Test overlap with true component
    co1s = []
    co2s = []
    for i, (scov, smn) in enumerate(zip(star_data['covs'],
                                        star_data['means'])):
        co1s.append(co1(group_cov, group_mean, scov, smn))
        co2s.append(co2(group_cov, group_mean, scov, smn))
    co1s = np.array(co1s)
    co2s = np.array(co2s)
    co3s = np.exp(
        p_lno(group_cov, group_mean, star_data['covs'], star_data['means']))
    assert np.allclose(co1s, co2s)
    assert np.allclose(co2s, co3s)
    assert np.allclose(co1s, co3s)

    # Test overlap with neighbouring star (with the aim of testing
    # tiny overlap values). Note that most overlaps go to 0, but the
    # log overlaps retain the information
    co1s = []
    co2s = []
    for i, (scov, smn) in enumerate(zip(star_data['covs'],
                                        star_data['means'])):
        co1s.append(
            co1(star_data['covs'][15], star_data['means'][15], scov, smn))
        co2s.append(
            co2(star_data['covs'][15], star_data['means'][15], scov, smn))
    co1s = np.array(co1s)
    co2s = np.array(co2s)
    lnos = p_lno(star_data['covs'][15], star_data['means'][15],
                 star_data['covs'], star_data['means'])
    co3s = np.exp(lnos)
    assert np.allclose(co1s, co2s)
    assert np.allclose(co2s, co3s)
    assert np.allclose(co1s, co3s)
Ejemplo n.º 36
0
my_synth_data = SynthData(pars=my_free_pars,
                          starcounts=NSTARS,
                          Components=EllipComponent)
#~ my_synth_data.generate_all_init_cartesian()
my_synth_data.synthesise_everything()  # Don't actually need everything

# mean_colnames = [el for el in 'XYZUVW']
# mean_colnames = [el+'0' for el in 'xyzuvw']     # Use this for initial star positions
mean_colnames = [el + '_now' for el in 'xyzuvw'
                 ]  # Use this for current day star positions,
# will need to uncomment synthesise_everything() though

means = tt.build_data_dict_from_table(
    my_synth_data.table[:],
    main_colnames=mean_colnames,
    only_means=True,
)

my_table = my_synth_data.table

plt.clf()
# plt.plot(means[:,0], means[:,3], '.')
plt.plot(my_table['x_now'], my_table['u_now'], '.', c='r')
plt.plot(my_table['x0'], my_table['u0'], '.', c='b')
my_synth_data.components[0].plot('X',
                                 'U',
                                 comp_then=True,
                                 comp_now=True,
                                 comp_orbit=True)
plt.xlabel('X')
Ejemplo n.º 37
0
# for step_ix in range(burnin_chain.shape[1]):
# lims = 6 * [None]

stride = 20
nplots = int(nsteps / stride)
print('Construction {} plots in total'.format(nplots))

# Some constants
dims = [(0, 1), (0, 3), (1, 4), (2, 5)]
labels = 'XYZUVW'
units = 3 * ['pc'] + 3 * ['km/s']
base_figure_file = 'base_figure.pkl'
star_data_file = data_dir + 'synth_for_plot_data.fit'

star_data = tabletool.build_data_dict_from_table(star_data_file)

# Set up base subplots, plotting everything that is the same across iterative
# plots. We will then store this via Pickle to save time
base_fig, base_ax = plt.subplots(nrows=2, ncols=2)
base_fig.set_size_inches(8, 8)
base_fig.set_tight_layout(True)

lims = 6 * [None]
for ax, (dim1, dim2) in zip(base_ax.flatten(), dims):
    true_comp.plot(ax=ax,
                   dim1=dim1,
                   dim2=dim2,
                   comp_now=False,
                   comp_then=True,
                   comp_orbit=True,
Ejemplo n.º 38
0
#         origins = np.array(origins.item())
#     weights = np.array([origin.nstars for origin in origins])
#     for dim1, dim2 in ('xy', 'uv', 'xu', 'yv', 'zw', 'xw'):
#         plt.clf()
#         fp.plotPaneWithHists(dim1, dim2, star_pars=star_pars_file,
#                              groups=origins, weights=weights,
#                              group_now=True, with_bg=with_bg,
#                              no_bg_covs=with_bg,
#                              )
#         plt.savefig(rdir + 'pre_plot_{}{}.pdf'.format(dim1,dim2))

# Now choose if handling incremental fit or plain fit
true_memb = None
ncomps = 1
if type(data_file) is str:
    star_pars = tt.build_data_dict_from_table(data_file)
print("nstars: {}".format(star_pars['means'].shape[0]))
while os.path.isdir(rdir + '{}/'.format(ncomps)):
    print("ncomps: {}".format(ncomps))
    if ncomps == 1:
        plotEveryIter(rdir + '{}/'.format(ncomps),
                      star_pars,
                      bg_hists,
                      true_memb=true_memb)
    else:
        for i in range(ncomps - 1):
            print("sub directory {}".format(chr(ord('A') + i)))
            subrdir = rdir + '{}/{}/'.format(ncomps, chr(ord('A') + i))
            if os.path.isdir(subrdir):
                plotEveryIter(subrdir,
                              star_pars,
Ejemplo n.º 39
0
def test_2comps_and_background():
    """
    Synthesise a file with negligible error, retrieve initial
    parameters

    Takes a while... maybe this belongs in integration unit_tests

    Performance of test is a bit tricky to callibrate. Since we are skipping
    any temporal evolution for speed reasons, we model two
    isotropic Gaussians. Now if these Gaussians are too far apart, NaiveFit
    will gravitate to one of the Gaussians during the 1 component fit, and then
    struggle to discover the second Gaussian.

    If the Gaussians are too close, then both will be characteresied by the
    1 component fit, and the BIC will decide two Gaussians components are
    overkill.

    I think I've addressed this by having the two groups have
    large number of stars.
    """
    using_bg = True

    run_name = '2comps_and_background'

    logging.info(60 * '-')
    logging.info(15 * '-' + '{:^30}'.format('TEST: ' + run_name) + 15 * '-')
    logging.info(60 * '-')

    savedir = 'temp_data/{}_naive_{}/'.format(PY_VERS, run_name)
    mkpath(savedir)
    data_filename = savedir + '{}_naive_{}_data.fits'.format(PY_VERS, run_name)
    log_filename = 'temp_data/{}_naive_{}/log.log'.format(PY_VERS, run_name)

    logging.basicConfig(level=logging.INFO,
                        filemode='w',
                        filename=log_filename)

    ### INITIALISE SYNTHETIC DATA ###

    # DON'T CHANGE THE AGE! BECAUSE THIS TEST DOESN'T USE ANY ORBIT INTEGRATION!!!
    # Note: if peaks are too far apart, it will be difficult for
    # chronostar to identify the 2nd when moving from a 1-component
    # to a 2-component fit.
    uniform_age = 1e-10
    sphere_comp_pars = np.array([
        #  X,  Y, Z, U, V, W, dX, dV,  age,
        [0, 0, 0, 0, 0, 0, 10., 5, uniform_age],
        [30, 0, 0, 0, 5, 0, 10., 5, uniform_age],
    ])
    starcounts = [100, 150]
    ncomps = sphere_comp_pars.shape[0]
    nstars = np.sum(starcounts)

    background_density = 1e-9

    # initialise z appropriately
    true_memb_probs = np.zeros((np.sum(starcounts), ncomps))
    start = 0
    for i in range(ncomps):
        true_memb_probs[start:start + starcounts[i], i] = 1.0
        start += starcounts[i]

    try:
        # Check if the synth data has already been constructed
        data_dict = tabletool.build_data_dict_from_table(data_filename)
    except:
        synth_data = SynthData(
            pars=sphere_comp_pars,
            starcounts=starcounts,
            Components=SphereComponent,
            background_density=background_density,
        )
        synth_data.synthesise_everything()

        tabletool.convert_table_astro2cart(synth_data.table,
                                           write_table=True,
                                           filename=data_filename)

        background_count = len(synth_data.table) - np.sum(starcounts)
        # insert background densities
        synth_data.table['background_log_overlap'] =\
            len(synth_data.table) * [np.log(background_density)]

        synth_data.table.write(data_filename, overwrite=True)

    origins = [SphereComponent(pars) for pars in sphere_comp_pars]

    ### SET UP PARAMETER FILE ###
    fit_pars = {
        'results_dir': savedir,
        'data_table': data_filename,
        'trace_orbit_func': 'dummy_trace_orbit_func',
        'return_results': True,
        'par_log_file': savedir + 'fit_pars.log',
        'overwrite_prev_run': True,
        # 'nthreads':18,
        'nthreads': 3,
    }

    ### INITIALISE AND RUN A NAIVE FIT ###
    naivefit = NaiveFit(fit_pars=fit_pars)
    result, score = naivefit.run_fit()

    best_comps = result['comps']
    memb_probs = result['memb_probs']

    # Check membership has ncomps + 1 (bg) columns
    n_fitted_comps = memb_probs.shape[-1] - 1
    assert ncomps == n_fitted_comps

    ### CHECK RESULT ###
    # No guarantee of order, so check if result is permutated
    #  also we drop the bg memberships for permutation reasons
    perm = expectmax.get_best_permutation(memb_probs[:nstars, :ncomps],
                                          true_memb_probs)

    memb_probs = memb_probs[:nstars]

    logging.info('Best permutation is: {}'.format(perm))

    n_misclassified_stars = np.sum(
        np.abs(true_memb_probs - np.round(memb_probs[:, perm])))

    # Check fewer than 15% of association stars are misclassified
    try:
        assert n_misclassified_stars / nstars * 100 < 15
    except AssertionError:
        import pdb
        pdb.set_trace()

    for origin, best_comp in zip(origins, np.array(best_comps)[perm, ]):
        assert (isinstance(origin, SphereComponent)
                and isinstance(best_comp, SphereComponent))
        o_pars = origin.get_pars()
        b_pars = best_comp.get_pars()

        logging.info("origin pars:   {}".format(o_pars))
        logging.info("best fit pars: {}".format(b_pars))
        assert np.allclose(origin.get_mean(), best_comp.get_mean(), atol=5.)
        assert np.allclose(origin.get_sphere_dx(),
                           best_comp.get_sphere_dx(),
                           atol=2.5)
        assert np.allclose(origin.get_sphere_dv(),
                           best_comp.get_sphere_dv(),
                           atol=2.5)
        assert np.allclose(origin.get_age(), best_comp.get_age(), atol=1.)
Ejemplo n.º 40
0
                                        col_name=bg_lnol_colname)

if config.config['overwrite_datafile']:
    data_table.write(datafile, overwrite=True)
elif config.config['data_savefile'] != '':
    data_table.write(config.config['data_savefile'], overwrite=True)

# LOAD DATA DICT FROM ORIGINAL DATA
# LOAD DATA DICT FROM MANIPULATED DATA
# THEN CALCULATE MEMB PROBS BOTH TIMES

# Convert data table into numpy arrays of mean and covariance matrices
log_message('Building data dictionary')
data_dict = tabletool.build_data_dict_from_table(
        data_table,
        get_background_overlaps=config.config['include_background_distribution'],
        historical=historical,
)


# Save data_dict of the original data
# np.save('data_dict_original.npy', data_dict)

# MEMB PROBS for data WITH RV available
data_dict_original=np.load('data_dict_original.npy')
memb_probs_with_rv = expectmax.expectation(data=data_dict_original, comps=bp_comp_with_rv)
print(memb_probs_with_rv)


# Some values are nan. Mask out all stars that have any of values in their covariance matrices
# equal to nan.
Ejemplo n.º 41
0
def fit_comp(data, memb_probs=None, init_pos=None, init_pars=None,
             burnin_steps=1000, Component=SphereComponent, plot_it=False,
             pool=None, convergence_tol=0.25, plot_dir='', save_dir='',
             sampling_steps=None, max_iter=None, trace_orbit_func=None):
    """Fits a single 6D gaussian to a weighted set (by membership
    probabilities) of stellar phase-space positions.

    Stores the final sampling chain and lnprob in `save_dir`, but also
    returns the best fit (walker step corresponding to maximum lnprob),
    sampling chain and lnprob.

    If neither init_pos nor init_pars are provided, then the weighted
    mean and covariance of the provided data set are calculated, then
    used to generate a sample parameter list (using Component). Walkers
    are then initialised around this parameter list.

    Parameters
    ----------
    data: dict -or- astropy.table.Table -or- path to astrop.table.Table
        if dict, should have following structure:
            'means': [nstars,6] float array_like
                the central estimates of star phase-space properties
            'covs': [nstars,6,6] float array_like
                the phase-space covariance matrices of stars
            'bg_lnols': [nstars] float array_like (opt.)
                the log overlaps of stars with whatever pdf describes
                the background distribution of stars.
        if table, see tabletool.build_data_dict_from_table to see
        table requirements.
    memb_probs: [nstars] float array_like
        Membership probability (from 0.0 to 1.0) for each star to the
        component being fitted.
    init_pos: [ngroups, npars] array
        The precise locations at which to initiate the walkers. Generally
        the saved locations from a previous, yet similar run.
    init_pars: [npars] array
        the position in parameter space about which walkers should be
        initialised. The standard deviation about each parameter is
        hardcoded as INIT_SDEV
    burnin_steps: int {1000}
        Number of steps per each burnin iteration
    Component: Implementation of AbstractComponent {Sphere Component}
        The class used to convert raw parametrisation of a model to
        actual model attributes.
    plot_it: bool {False}
        Whether to generate plots of the lnprob in 'plot_dir'
    pool: MPIPool object {None}
        pool of threads to execute walker steps concurrently
    convergence_tol: float {0.25}
        How many standard devaitions an lnprob chain is allowed to vary
        from its mean over the course of a burnin stage and still be
        considered "converged". Default value allows the median of the
        final 20 steps to differ by 0.25 of its standard deviations from
        the median of the first 20 steps.
    plot_dir: str {''}
        The directory in which to store plots
    save_dir: str {''}
        The directory in which to store results and/or byproducts of fit
    sampling_steps: int {None}
        If this is set, after convergence, a sampling stage will be
        entered. Only do this if a very fine map of the parameter
        distributions is required, since the burnin stage already
        characterises a converged solution for "burnin_steps".
    max_iter: int {None}
        The maximum iterations permitted to run. (Useful for expectation
        maximisation implementation triggering an abandonment of rubbish
        components). If left as None, then run will continue until
        convergence.
    trace_orbit_func: function {None}
        A function to trace cartesian oribts through the Galactic potential.
        If left as None, will use traceorbit.trace_cartesian_orbit (base
        signature of any alternate function on this ones)

    Returns
    -------
    best_component
        The component model which yielded the highest posterior probability
    chain
        [nwalkers, nsteps, npars] array of all samples
    probability
        [nwalkers, nsteps] array of probabilities for each sample
    """
    # TIDYING INPUT
    if not isinstance(data, dict):
        data = tabletool.build_data_dict_from_table(data)
    if memb_probs is None:
        memb_probs = np.ones(len(data['means']))
    # Ensure plot_dir has a single trailing '/'
    if plot_dir != '':
        plot_dir = plot_dir.rstrip('/') + '/'
    if plot_it and plot_dir != '':
        if not os.path.exists(plot_dir):
            os.mkdir(plot_dir)
    npars = len(Component.PARAMETER_FORMAT)
    nwalkers = 2*npars

    # Initialise the emcee sampler
    if init_pos is None:
        init_pos = get_init_emcee_pos(data=data, memb_probs=memb_probs,
                                      init_pars=init_pars, Component=Component,
                                      nwalkers=nwalkers)
    sampler = emcee.EnsembleSampler(
            nwalkers, npars, lnprob_func,
            args=[data, memb_probs, trace_orbit_func],
            pool=pool,
    )

    # PERFORM BURN IN
    state = None
    converged = False
    cnt = 0
    logging.info("Beginning burnin loop")
    burnin_lnprob_res = np.zeros((nwalkers,0))

    # burn in until converged or the (optional) max_iter is reached
    while (not converged) and cnt != max_iter:
        logging.info("Burning in cnt: {}".format(cnt))
        sampler.reset()
        init_pos, lnprob, state = sampler.run_mcmc(init_pos, burnin_steps, state)
        converged = burnin_convergence(sampler.lnprobability,
                                       tol=convergence_tol)
        logging.info("Burnin status: {}".format(converged))

        if plot_it and plt_avail:
            plt.clf()
            plt.plot(sampler.lnprobability.T)
            plt.savefig(plot_dir+"burnin_lnprobT{:02}.png".format(cnt))

        # If about to burnin again, help out the struggling walkers by shifting
        # them to the best walker's position
        if not converged:
            best_ix = np.argmax(lnprob)
            poor_ixs = np.where(lnprob < np.percentile(lnprob, 33))
            for ix in poor_ixs:
                init_pos[ix] = init_pos[best_ix]

        burnin_lnprob_res = np.hstack((
            burnin_lnprob_res, sampler.lnprobability
        ))
        cnt += 1

    logging.info("Burnt in, with convergence: {}".format(converged))
    if plot_it and plt_avail:
        plt.clf()
        plt.plot(burnin_lnprob_res.T)
        plt.savefig(plot_dir+"burnin_lnprobT.png")

    # SAMPLING STAGE
    if not sampling_steps:
        logging.info("Taking final burnin segment as sampling stage"\
                     .format(converged))
    else:
        logging.info("Entering sampling stage for {} steps".format(
            sampling_steps
        ))
        sampler.reset()
        # Don't need to keep track of any outputs
        sampler.run_mcmc(init_pos, sampling_steps, state)
        logging.info("Sampling done")

    # save the chain for later inspection
    np.save(save_dir+"final_chain.npy", sampler.chain)
    np.save(save_dir+"final_lnprob.npy", sampler.lnprobability)

    if plot_it and plt_avail:
        logging.info("Plotting final lnprob")
        plt.clf()
        plt.plot(sampler.lnprobability.T)
        plt.savefig(plot_dir+"lnprobT.png")
        logging.info("Plotting done")

    # Identify the best component
    final_best_ix = np.argmax(sampler.lnprobability)
    best_sample = sampler.flatchain[final_best_ix]
    best_component = Component(emcee_pars=best_sample)

    # Determining the median and span of each parameter
    med_and_span = calc_med_and_span(sampler.chain)
    logging.info("Results:\n{}".format(med_and_span))

    return best_component, sampler.chain, sampler.lnprobability
Ejemplo n.º 42
0
                       dim1=dim1,
                       dim2=dim2,
                       comp_now=True,
                       comp_then=True,
                       color='blue')
        best_comp.plot(ax=ax,
                       dim1=dim1,
                       dim2=dim2,
                       comp_now=True,
                       comp_then=True,
                       color='red')


# Fancy pancy animation type stuff
Writer = animation.writers['ffmpeg']
writer = Writer(fps=20, metadata=dict(artist='Me'), bitrate=1800)

data = tabletool.build_data_dict_from_table(datafile)

figsize = 10
fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(figsize, figsize))
fig.set_tight_layout(True)

ani = matplotlib.animation.FuncAnimation(fig,
                                         animate,
                                         frames=N_MAX_ITERS,
                                         repeat=True)

save_filename = 'convergence_movie.mp4'
ani.save(save_filename, writer=writer)