Exemple #1
0
def test_expectation():
    """
    Super basic, generates some association stars along
    with some background stars and checks membership allocation
    is correct
    """

    age = 1e-5
    ass_pars1 = np.array([0, 0, 0, 0, 0, 0, 5., 2., age])
    comp1 = SphereComponent(ass_pars1)
    ass_pars2 = np.array([100., 0, 0, 20, 0, 0, 5., 2., age])
    comp2 = SphereComponent(ass_pars2)
    starcounts = [100,100]
    synth_data = SynthData(pars=[ass_pars1, ass_pars2],
                           starcounts=starcounts)
    synth_data.synthesise_everything()
    tabletool.convert_table_astro2cart(synth_data.table)

    true_memb_probs = np.zeros((np.sum(starcounts), 2))
    true_memb_probs[:starcounts[0], 0] = 1.
    true_memb_probs[starcounts[0]:, 1] = 1.

    # star_means, star_covs = tabletool.buildDataFromTable(synth_data.astr_table)
    # all_lnols = em.getAllLnOverlaps(
    #         synth_data.astr_table, [comp1, comp2]
    # )

    fitted_memb_probs = em.expectation(
            tabletool.build_data_dict_from_table(synth_data.table),
            [comp1, comp2]
    )

    assert np.allclose(true_memb_probs, fitted_memb_probs, atol=1e-10)
def test_convertSynthTableToCart():
    """
    Checks that current day measured cartesian values (with negligbile
    measurement error) match the true current day cartesian values
    """
    AGE = 40.
    PARS = np.array([
        [0., 0., 0., 0., 0., 0., 10., 5., AGE],
    ])
    STARCOUNTS = [50] #, 30]
    COMPONENTS = SphereComponent
    MEASUREMENT_ERROR = 1e-10

    # Generate synthetic data
    synth_data = SynthData(pars=PARS, starcounts=STARCOUNTS,
                           Components=COMPONENTS,
                           measurement_error=MEASUREMENT_ERROR,
                           )
    synth_data.synthesise_everything()

    # Convert (inplace) astrometry to cartesian
    tabletool.convert_table_astro2cart(synth_data.table)

    # Check consistency between true current-day kinematics and measured
    # current-day kinematics (with negliglbe error)
    for dim in 'XYZUVW':
        dim_now = dim.lower() + '_now'
        assert np.allclose(synth_data.table[dim_now],
                           synth_data.table[dim])
def test_convertAstrTableToCart():
    """
    Using a historical table, confirm that cartesian conversion yields
    same results by comparing the cartesian means and covariance matrices
    are identical.

    Gets historical cartesian data from building data from table cart cols.

    Gets updated cartesian data from building astro data from table cols,
    converting to cartesian (stored back into table) then building data
    from newly inserted table cart cols.
    """
    # hist_filename = '../data/paper1/historical_beta_Pictoris_with_gaia_small_everything_final.fits'
    hist_table = Table.read(HIST_FILE_NAME)

    # curr_filename = '../data/paper1/beta_Pictoris_with_gaia_small_everything_final.fits'
    curr_table = Table.read(CURR_FILE_NAME)
    # Drop stars that have gone through any binary checking
    # hist_table = Table(hist_table[100:300])
    # curr_table = Table(curr_table[100:300])

    # load in original means and covs
    orig_cart_data =\
        tabletool.build_data_dict_from_table(table=hist_table, cartesian=True,
                                             historical=True)

    tabletool.convert_table_astro2cart(table=curr_table, write_table=False)

    cart_data = tabletool.build_data_dict_from_table(curr_table,
                                                     cartesian=True)

    assert np.allclose(orig_cart_data['means'], cart_data['means'])
    assert np.allclose(hist_table['dX'], curr_table['X_error'])
    assert np.allclose(orig_cart_data['covs'], cart_data['covs'])
def test_swigImplementation():
    """
    Compares the swigged c implementation against the python one in
    likelihood.py
    """
    true_comp_mean = np.zeros(6)
    true_comp_dx = 2.
    true_comp_dv = 2.
    true_comp_covmatrix = np.identity(6)
    true_comp_covmatrix[:3,:3] *= true_comp_dx**2
    true_comp_covmatrix[3:,3:] *= true_comp_dv**2
    true_comp_age = 1e-10
    true_comp = SphereComponent(attributes={
        'mean':true_comp_mean,
        'covmatrix':true_comp_covmatrix,
        'age':true_comp_age,
    })
    nstars = 100
    synth_data = SynthData(pars=true_comp.get_pars(), starcounts=nstars)
    synth_data.synthesise_everything()
    tabletool.convert_table_astro2cart(synth_data.table)

    star_data = tabletool.build_data_dict_from_table(synth_data.table)

    p_lnos = p_lno(true_comp.get_covmatrix(), true_comp.get_mean(),
                   star_data['covs'], star_data['means'])
    c_lnos = c_lno(true_comp.get_covmatrix(), true_comp.get_mean(),
                   star_data['covs'], star_data['means'], nstars)

    assert np.allclose(p_lnos, c_lnos)
    assert np.isfinite(p_lnos).all()
    assert np.isfinite(c_lnos).all()
def run_fit_helper(
    true_comp,
    starcounts,
    measurement_error,
    burnin_step=None,
    run_name='default',
    trace_orbit_func=None,
):
    py_vers = sys.version[0]
    data_filename = 'temp_data/{}_compfitter_{}.fits'.format(py_vers, run_name)
    log_filename = 'logs/{}_compfitter_{}.log'.format(py_vers, run_name)
    plot_dir = 'temp_plots/{}_compfitter_{}'.format(py_vers, run_name)
    save_dir = 'temp_data/'
    logging.basicConfig(level=logging.INFO,
                        filename=log_filename,
                        filemode='w')
    synth_data = SynthData(pars=true_comp.get_pars(),
                           starcounts=starcounts,
                           measurement_error=measurement_error)
    synth_data.synthesise_everything()
    tabletool.convert_table_astro2cart(synth_data.table,
                                       write_table=True,
                                       filename=data_filename)
    res = gf.fit_comp(
        data=synth_data.table,
        plot_it=True,
        burnin_steps=burnin_step,
        plot_dir=plot_dir,
        save_dir=save_dir,
        trace_orbit_func=trace_orbit_func,
    )
    return res
Exemple #6
0
def test_swigImplementation():
    """
    Compares the swigged c implementation against the python one in
    likelihood.py
    """
    true_comp_mean = np.zeros(6)
    true_comp_dx = 2.
    true_comp_dv = 2.
    true_comp_covmatrix = np.identity(6)
    true_comp_covmatrix[:3, :3] *= true_comp_dx**2
    true_comp_covmatrix[3:, 3:] *= true_comp_dv**2
    true_comp_age = 1e-10
    true_comp = SphereComponent(
        attributes={
            'mean': true_comp_mean,
            'covmatrix': true_comp_covmatrix,
            'age': true_comp_age,
        })
    nstars = 100
    synth_data = SynthData(pars=true_comp.get_pars(), starcounts=nstars)
    synth_data.synthesise_everything()
    tabletool.convert_table_astro2cart(synth_data.table)

    star_data = tabletool.build_data_dict_from_table(synth_data.table)

    p_lnos = p_lno(true_comp.get_covmatrix(), true_comp.get_mean(),
                   star_data['covs'], star_data['means'])
    c_lnos = c_lno(true_comp.get_covmatrix(), true_comp.get_mean(),
                   star_data['covs'], star_data['means'], nstars)

    assert np.allclose(p_lnos, c_lnos)
    assert np.isfinite(p_lnos).all()
    assert np.isfinite(c_lnos).all()
def run_fit_helper(true_comp, starcounts, measurement_error,
                   burnin_step=None,
                   run_name='default',
                   trace_orbit_func=None,
                   ):
    py_vers = sys.version[0]
    data_filename = 'temp_data/{}_compfitter_{}.fits'.format(py_vers, run_name)
    log_filename = 'logs/{}_compfitter_{}.log'.format(py_vers, run_name)
    plot_dir = 'temp_plots/{}_compfitter_{}'.format(py_vers, run_name)
    save_dir = 'temp_data/'
    logging.basicConfig(level=logging.INFO,
                        filename=log_filename,
                        filemode='w')
    synth_data = SynthData(pars=true_comp.get_pars(),
                           starcounts=starcounts,
                           measurement_error=measurement_error)
    synth_data.synthesise_everything()
    tabletool.convert_table_astro2cart(synth_data.table,
                                       write_table=True,
                                       filename=data_filename)
    res = gf.fit_comp(
            data=synth_data.table,
            plot_it=True,
            burnin_steps=burnin_step,
            plot_dir=plot_dir,
            save_dir=save_dir,
            trace_orbit_func=trace_orbit_func,
    )
    return res
def test_convertAstrTableToCart():
    """
    Using a historical table, confirm that cartesian conversion yields
    same results by comparing the cartesian means and covariance matrices
    are identical.

    Gets historical cartesian data from building data from table cart cols.

    Gets updated cartesian data from building astro data from table cols,
    converting to cartesian (stored back into table) then building data
    from newly inserted table cart cols.
    """
    hist_filename = '../data/paper1/historical_beta_Pictoris_with_gaia_small_everything_final.fits'
    hist_table = Table.read(hist_filename)

    curr_filename = '../data/paper1/beta_Pictoris_with_gaia_small_everything_final.fits'
    curr_table = Table.read(curr_filename)
    # Drop stars that have gone through any binary checking
    hist_table = Table(hist_table[100:300])
    curr_table = Table(curr_table[100:300])

    # load in original means and covs
    orig_cart_data =\
        tabletool.build_data_dict_from_table(table=hist_table, cartesian=True,
                                             historical=True)

    tabletool.convert_table_astro2cart(table=curr_table, write_table=False)

    cart_data = tabletool.build_data_dict_from_table(curr_table, cartesian=True)

    assert np.allclose(orig_cart_data['means'], cart_data['means'])
    assert np.allclose(hist_table['dX'], curr_table['X_error'])
    assert np.allclose(orig_cart_data['covs'], cart_data['covs'])
def test_convertSynthTableToCart():
    """
    Checks that current day measured cartesian values (with negligbile
    measurement error) match the true current day cartesian values
    """
    AGE = 40.
    PARS = np.array([
        [0., 0., 0., 0., 0., 0., 10., 5., AGE],
    ])
    STARCOUNTS = [50]  #, 30]
    COMPONENTS = SphereComponent
    MEASUREMENT_ERROR = 1e-10

    # Generate synthetic data
    synth_data = SynthData(
        pars=PARS,
        starcounts=STARCOUNTS,
        Components=COMPONENTS,
        measurement_error=MEASUREMENT_ERROR,
    )
    synth_data.synthesise_everything()

    # Convert (inplace) astrometry to cartesian
    tabletool.convert_table_astro2cart(synth_data.table)

    # Check consistency between true current-day kinematics and measured
    # current-day kinematics (with negliglbe error)
    for dim in 'XYZUVW':
        dim_now = dim.lower() + '_now'
        assert np.allclose(synth_data.table[dim_now], synth_data.table[dim])
def test_pythonFuncs():
    """
    TODO: remove the requirements of file, have data stored in file?
    """
    true_comp_mean = np.zeros(6)
    true_comp_dx = 2.
    true_comp_dv = 2.
    true_comp_covmatrix = np.identity(6)
    true_comp_covmatrix[:3, :3] *= true_comp_dx ** 2
    true_comp_covmatrix[3:, 3:] *= true_comp_dv ** 2
    true_comp_age = 1e-10
    true_comp = SphereComponent(attributes={
        'mean': true_comp_mean,
        'covmatrix': true_comp_covmatrix,
        'age': true_comp_age,
    })
    nstars = 100
    synth_data = SynthData(pars=true_comp.get_pars(), starcounts=nstars)
    synth_data.synthesise_everything()
    tabletool.convert_table_astro2cart(synth_data.table)

    star_data = tabletool.build_data_dict_from_table(synth_data.table)
    # star_data['means'] = star_data['means']
    # star_data['covs'] = star_data['covs']
    group_mean = true_comp.get_mean()
    group_cov = true_comp.get_covmatrix()

    # Test overlap with true component
    co1s = []
    co2s = []
    for i, (scov, smn) in enumerate(zip(star_data['covs'], star_data['means'])):
        co1s.append(co1(group_cov, group_mean, scov, smn))
        co2s.append(co2(group_cov, group_mean, scov, smn))
    co1s = np.array(co1s)
    co2s = np.array(co2s)
    co3s = np.exp(p_lno(group_cov, group_mean,
                        star_data['covs'], star_data['means']))
    assert np.allclose(co1s, co2s)
    assert np.allclose(co2s, co3s)
    assert np.allclose(co1s, co3s)

    # Test overlap with neighbouring star (with the aim of testing
    # tiny overlap values). Note that most overlaps go to 0, but the
    # log overlaps retain the information
    co1s = []
    co2s = []
    for i, (scov, smn) in enumerate(zip(star_data['covs'], star_data['means'])):
        co1s.append(co1(star_data['covs'][15], star_data['means'][15],
                        scov, smn))
        co2s.append(co2(star_data['covs'][15], star_data['means'][15],
                        scov, smn))
    co1s = np.array(co1s)
    co2s = np.array(co2s)
    lnos = p_lno(star_data['covs'][15], star_data['means'][15],
                 star_data['covs'], star_data['means'])
    co3s = np.exp(lnos)
    assert np.allclose(co1s, co2s)
    assert np.allclose(co2s, co3s)
    assert np.allclose(co1s, co3s)
Exemple #11
0
def add_UVW_chronostar(tab):
    tabletool.convert_table_astro2cart(table=tab,
                                       main_colnames=None,
                                       error_colnames=None,
                                       corr_colnames=None,
                                       return_table=True)

    tab.write('ScoCen_box_result_with_kinematics.fits')
Exemple #12
0
def test_execution_simple_fit():
    """
    Don't test for correctness, but check that everything actually executes
    """
    run_name = 'quickdirty'
    logging.info(60 * '-')
    logging.info(15 * '-' + '{:^30}'.format('TEST: ' + run_name) + 15 * '-')
    logging.info(60 * '-')

    savedir = 'temp_data/{}_expectmax_{}/'.format(PY_VERS, run_name)
    mkpath(savedir)
    data_filename = savedir + '{}_expectmax_{}_data.fits'.format(
        PY_VERS, run_name)
    log_filename = 'temp_data/{}_expectmax_{}/log.log'.format(
        PY_VERS, run_name)
    logging.basicConfig(level=logging.INFO,
                        filemode='w',
                        filename=log_filename)

    uniform_age = 1e-10
    sphere_comp_pars = np.array([
        # X, Y, Z, U, V, W, dX, dV,  age,
        [0, 0, 0, 0, 0, 0, 10., 5, uniform_age],
    ])
    starcount = 100

    background_density = 1e-9

    ncomps = sphere_comp_pars.shape[0]

    # true_memb_probs = np.zeros((starcount, ncomps))
    # true_memb_probs[:,0] = 1.

    synth_data = SynthData(
        pars=sphere_comp_pars,
        starcounts=[starcount],
        Components=SphereComponent,
        background_density=background_density,
    )
    synth_data.synthesise_everything()

    tabletool.convert_table_astro2cart(synth_data.table)
    background_count = len(synth_data.table) - starcount

    # insert background densities
    synth_data.table['background_log_overlap'] =\
        len(synth_data.table) * [np.log(background_density)]
    synth_data.table.write(data_filename, overwrite=True)

    origins = [SphereComponent(pars) for pars in sphere_comp_pars]

    best_comps, med_and_spans, memb_probs = \
        expectmax.fit_many_comps(data=synth_data.table, ncomps=ncomps,
                                 rdir=savedir, burnin=10, sampling_steps=10,
                                 trace_orbit_func=dummy_trace_orbit_func,
                                 use_background=True, ignore_stable_comps=False,
                                 max_em_iterations=200)
Exemple #13
0
def test_lnprob_func():
    """
    Generates two components. Generates a synthetic data set based on the
    first component. Confrims that the lnprob is larger for the first
    component than the second.
    """
    measurement_error = 1e-10
    star_count = 500
    tiny_age = 1e-10
    dim = 6
    comp_covmatrix = np.identity(dim)
    comp_means = {
        'comp1': np.zeros(dim),
        'comp2': 10 * np.ones(dim)
    }
    comps = {}
    data = {}

    for comp_name in comp_means.keys():
        comp = SphereComponent(attributes={
            'mean':comp_means[comp_name],
            'covmatrix':comp_covmatrix,
            'age':tiny_age
        })

        synth_data = SynthData(pars=[comp.get_pars()], starcounts=star_count,
                                measurement_error=measurement_error)
        synth_data.synthesise_everything()
        tabletool.convert_table_astro2cart(synth_data.table)
        data[comp_name] = tabletool.build_data_dict_from_table(synth_data.table)
        comps[comp_name] = comp

    lnprob_comp1_data1 = likelihood.lnprob_func(pars=comps['comp1'].get_pars(),
                                                data=data['comp1'])
    lnprob_comp2_data1 = likelihood.lnprob_func(pars=comps['comp2'].get_pars(),
                                                data=data['comp1'])
    lnprob_comp1_data2 = likelihood.lnprob_func(pars=comps['comp1'].get_pars(),
                                                data=data['comp2'])
    lnprob_comp2_data2 = likelihood.lnprob_func(pars=comps['comp2'].get_pars(),
                                                data=data['comp2'])
    
    print(lnprob_comp1_data1)
    print(lnprob_comp2_data1)
    print(lnprob_comp1_data2)
    print(lnprob_comp2_data2)
    
    assert lnprob_comp1_data1 > lnprob_comp2_data1
    assert lnprob_comp2_data2 > lnprob_comp1_data2

    # Check that the different realisations only differ by 20%
    assert np.isclose(lnprob_comp1_data1, lnprob_comp2_data2, rtol=2e-1)
    assert np.isclose(lnprob_comp1_data2, lnprob_comp2_data1, rtol=2e-1)
def test_badColNames():
    """
    Check that columns have consistent (or absent) units across measurements
    and errors

    First test comparing column with degrees to column with mas/yr raises
    UserWarning
    Then test comparing colum with degrees to column without units raises
    no issue.
    """
    main_colnames, error_colnames, corr_colnames = \
        tabletool.get_colnames(cartesian=False)

    # main_colnames[5] = 'radial_velocity_best'
    # error_colnames[5] = 'radial_velocity_error_best'
    # corrupt ordering of column names
    corrupted_error_colnames = list(error_colnames)
    corrupted_error_colnames[0], corrupted_error_colnames[3] =\
        error_colnames[3], error_colnames[0]

    filename = '../data/paper1/beta_Pictoris_with_gaia_small_everything_final.fits'
    table = Table.read(filename)

    # Only need a handful of rows
    table = Table(table[:10])

    # Catch when units are inconsistent
    try:
        tabletool.convert_table_astro2cart(
                table,
                main_colnames=main_colnames,
                error_colnames=corrupted_error_colnames,
                corr_colnames=corr_colnames
        )
    except Exception as e:
        assert type(e) == exceptions.UserWarning

    # In the case where units have not been provided, then just leave it be
    try:
        error_colnames[0] = 'ra_dec_corr'
        tabletool.convert_table_astro2cart(table,
                                           main_colnames=main_colnames,
                                           error_colnames=error_colnames,
                                           corr_colnames=corr_colnames)

    except:
        assert False
def test_badColNames():
    """
    Check that columns have consistent (or absent) units across measurements
    and errors

    First test comparing column with degrees to column with mas/yr raises
    UserWarning
    Then test comparing colum with degrees to column without units raises
    no issue.
    """
    main_colnames, error_colnames, corr_colnames = \
        tabletool.get_colnames(cartesian=False)

    # main_colnames[5] = 'radial_velocity_best'
    # error_colnames[5] = 'radial_velocity_error_best'
    # corrupt ordering of column names
    corrupted_error_colnames = list(error_colnames)
    corrupted_error_colnames[0], corrupted_error_colnames[3] =\
        error_colnames[3], error_colnames[0]

    # filename = '../data/paper1/beta_Pictoris_with_gaia_small_everything_final.fits'
    table = Table.read(CURR_FILE_NAME)

    # Only need a handful of rows
    table = Table(table[:10])

    # Catch when units are inconsistent
    try:
        tabletool.convert_table_astro2cart(
            table,
            astr_main_colnames=main_colnames,
            astr_error_colnames=corrupted_error_colnames,
            astr_corr_colnames=corr_colnames)
    except Exception as e:
        assert type(e) == exceptions.UserWarning

    # In the case where units have not been provided, then just leave it be
    try:
        error_colnames[0] = 'ra_dec_corr'
        tabletool.convert_table_astro2cart(table,
                                           astr_main_colnames=main_colnames,
                                           astr_error_colnames=error_colnames,
                                           astr_corr_colnames=corr_colnames)

    except:
        assert False
def test_lnprob_func():
    """
    Generates two components. Generates a synthetic data set based on the
    first component. Confrims that the lnprob is larger for the first
    component than the second.
    """
    measurement_error = 1e-10
    star_count = 500
    tiny_age = 1e-10
    dim = 6
    comp_covmatrix = np.identity(dim)
    comp_means = {
        'comp1': np.zeros(dim),
        'comp2': 10 * np.ones(dim)
    }
    comps = {}
    data = {}

    for comp_name in comp_means.keys():
        comp = SphereComponent(attributes={
            'mean':comp_means[comp_name],
            'covmatrix':comp_covmatrix,
            'age':tiny_age
        })

        synth_data = SynthData(pars=[comp.get_pars()], starcounts=star_count,
                                measurement_error=measurement_error)
        synth_data.synthesise_everything()
        tabletool.convert_table_astro2cart(synth_data.table)
        data[comp_name] = tabletool.build_data_dict_from_table(synth_data.table)
        comps[comp_name] = comp

    lnprob_comp1_data1 = likelihood.lnprob_func(pars=comps['comp1'].get_pars(),
                                                data=data['comp1'])
    lnprob_comp2_data1 = likelihood.lnprob_func(pars=comps['comp2'].get_pars(),
                                                data=data['comp1'])
    lnprob_comp1_data2 = likelihood.lnprob_func(pars=comps['comp1'].get_pars(),
                                                data=data['comp2'])
    lnprob_comp2_data2 = likelihood.lnprob_func(pars=comps['comp2'].get_pars(),
                                                data=data['comp2'])
    assert lnprob_comp1_data1 > lnprob_comp2_data1
    assert lnprob_comp2_data2 > lnprob_comp1_data2

    # Check that the different realisations only differ by 10%
    assert np.isclose(lnprob_comp1_data1, lnprob_comp2_data2, rtol=1e-1)
    assert np.isclose(lnprob_comp1_data2, lnprob_comp2_data1, rtol=1e-1)
Exemple #17
0
def test_get_region():
    """
    Test whether get_region applies data cut successfully.

    Synthesise two data sets, one which made up of Component A, and the other
    made up of Component A & B. Then, check if applying a get_region cut on
    the combined data set, with the Component A set as reference, only returns
    the Component A stars.
    """
    data_a_filename = 'temp_data/test_get_region_A.fits'
    synth_dataset_a = synthdata.SynthData(pars=PARS[0],
                                          starcounts=STARCOUNTS[0])
    np.random.seed(0)
    synth_dataset_a.synthesise_everything(filename=data_a_filename,
                                          overwrite=True)
    tabletool.convert_table_astro2cart(synth_dataset_a.table,
                                       write_table=True,
                                       filename=data_a_filename)

    data_both_filename = 'temp_data/test_get_region_both.fits'
    synth_dataset_both = synthdata.SynthData(pars=PARS, starcounts=STARCOUNTS)
    np.random.seed(0)
    synth_dataset_both.synthesise_everything(filename=data_both_filename,
                                             overwrite=True)

    # Prepare .par file
    par_file = 'temp_data/test_get_region.par'

    with open(par_file, 'w') as fp:
        fp.write('par_log_file = temp_data/test_get_region_pars.log\n')
        fp.write('input_file = {}\n'.format(data_both_filename))

        fp.write('convert_astrometry = True\n')

        fp.write('apply_cart_cuts = True\n')
        fp.write('cut_on_region = True\n')
        fp.write('cut_ref_table = {}\n'.format(data_a_filename))

        # fp.write('output_file = {}\n'.format())
        fp.write('return_data_table = True\n')

    # Apply datatool to synthetically generated dataset
    data_table = datatool.prepare_data(par_file)

    assert len(data_table) == len(synth_dataset_a.table)
Exemple #18
0
def add_missing_tims_stars_to_my_set():
    # My table
    d = Table.read('data_table_cartesian_with_bg_ols.fits')

    # Existing Tim's table
    usco = Table.read('usco_res/usco_run_subset.fit')
    ucl = Table.read('ucl_res/ucl_run_subset.fit')
    lcc = Table.read('lcc_res/lcc_run_subset.fit')
    tim = vstack([usco, ucl, lcc])
    tim_existing = unique(tim, keys='source_id')

    tim_missing = Table.read('missing_columns_for_tims_stars.fits')

    tim = join(tim_existing, tim_missing, keys='source_id')
    print tim.colnames

    tim.remove_columns([
        'c_VW', 'astrometric_primary_flag', 'c_XZ', 'c_XY', 'c_ZU', 'c_ZV',
        'c_ZW', 'c_XV', 'c_XW', 'c_XU', 'c_UW', 'c_UV', 'c_YU', 'c_YW', 'c_YV',
        'c_YZ', 'dZ', 'dX', 'dY', 'dV', 'dW', 'dU'
    ])

    both = vstack([d, tim])
    print len(both)
    both = unique(both, keys='source_id')
    print len(both)

    # This table is masked. Unmask:
    both = both.filled()

    tabletool.convert_table_astro2cart(table=both, return_table=True)

    # WRITE
    both.write('data_table_cartesian_including_tims_stars_with_bg_ols.fits',
               format='fits',
               overwrite=True)

    ct = set(tim.colnames)
    cd = set(d.colnames)
    print
    print 'Colnames in mine and not in Tims set', cd.difference(ct)
    print
    print 'Colnames in Tims and not in my set', ct.difference(cd)
Exemple #19
0
def test_maximisation_gradient_descent_with_multiprocessing_tech():
    """
    Added by MZ 2020 - 07 - 13
    
    Test if maximisation works when using gradient descent and multiprocessing.
    NOTE: this is not a test if maximisation returns appropriate results but
    it only tests if the code runs withour errors. This is mainly to test
    multiprocessing.
    """
    
    
    age = 1e-5
    ass_pars1 = np.array([0, 0, 0, 0, 0, 0, 5., 2., age])
    comp1 = SphereComponent(ass_pars1)
    starcounts = [100,]
    synth_data = SynthData(pars=[ass_pars1,],
                           starcounts=starcounts)
    synth_data.synthesise_everything()
    tabletool.convert_table_astro2cart(synth_data.table)

    true_memb_probs = np.zeros((np.sum(starcounts), 1))
    true_memb_probs[:starcounts[0], 0] = 1.
    #~ true_memb_probs[starcounts[0]:, 1] = 1.
    
    ncomps = len(starcounts)
    
    noise = np.random.rand(ass_pars1.shape[0])*5
    
    all_init_pars = [ass_pars1 + noise]

    new_comps, all_samples, _, all_init_pos, success_mask =\
        expectmax.maximisation(synth_data.table, ncomps, 
                true_memb_probs, 100, 'iter00',
                all_init_pars,
                optimisation_method='Nelder-Mead',
                nprocess_ncomp=True,
                )
Exemple #20
0
def test_fit_one_comp_with_background():
    """
    Synthesise a file with negligible error, retrieve initial
    parameters

    Takes a while...

    Parameters
    ----------

    """
    run_name = 'background'

    logging.info(60 * '-')
    logging.info(15 * '-' + '{:^30}'.format('TEST: ' + run_name) + 15 * '-')
    logging.info(60 * '-')

    savedir = 'temp_data/{}_expectmax_{}/'.format(PY_VERS, run_name)
    mkpath(savedir)
    data_filename = savedir + '{}_expectmax_{}_data.fits'.format(
        PY_VERS, run_name)
    log_filename = 'temp_data/{}_expectmax_{}/log.log'.format(
        PY_VERS, run_name)

    logging.basicConfig(level=logging.INFO,
                        filemode='w',
                        filename=log_filename)
    uniform_age = 1e-10
    sphere_comp_pars = np.array([
        # X, Y, Z, U, V, W, dX, dV,  age,
        [0, 0, 0, 0, 0, 0, 10., 5, uniform_age],
    ])
    starcount = 200

    background_density = 1e-9

    ncomps = sphere_comp_pars.shape[0]

    # true_memb_probs = np.zeros((starcount, ncomps))
    # true_memb_probs[:,0] = 1.

    synth_data = SynthData(
        pars=sphere_comp_pars,
        starcounts=[starcount],
        Components=SphereComponent,
        background_density=background_density,
    )
    synth_data.synthesise_everything()

    tabletool.convert_table_astro2cart(synth_data.table)
    background_count = len(synth_data.table) - starcount
    logging.info('Generated {} background stars'.format(background_count))

    # insert background densities
    synth_data.table['background_log_overlap'] =\
        len(synth_data.table) * [np.log(background_density)]
    synth_data.table.write(data_filename, overwrite=True)

    origins = [SphereComponent(pars) for pars in sphere_comp_pars]

    best_comps, med_and_spans, memb_probs = \
        expectmax.fit_many_comps(data=synth_data.table, ncomps=ncomps,
                                 rdir=savedir, burnin=500, sampling_steps=5000,
                                 trace_orbit_func=dummy_trace_orbit_func,
                                 use_background=True, ignore_stable_comps=False,
                                 max_em_iterations=200)

    # return best_comps, med_and_spans, memb_probs

    # Check parameters are close
    assert np.allclose(sphere_comp_pars, best_comps[0].get_pars(), atol=1.5)

    # Check most assoc members are correctly classified
    recovery_count_threshold = 0.95 * starcount
    recovery_count_actual = np.sum(memb_probs[:starcount, 0] > 0.5)
    assert recovery_count_threshold < recovery_count_actual

    # Check most background stars are correctly classified
    # Number of bg stars classified as members should be less than 5%
    # of all background stars
    contamination_count_threshold = 0.05 * len(memb_probs[starcount:])
    contamination_count_actual = np.sum(memb_probs[starcount:, 0] > 0.5)
    assert contamination_count_threshold > contamination_count_actual

    # Check reported membership probabilities are consistent with recovery
    # rate (within 5%)
    mean_membership_confidence = np.mean(memb_probs[:starcount, 0])
    assert np.isclose(recovery_count_actual / starcount,
                      mean_membership_confidence,
                      atol=0.05)
"""
Add very large RV errors for stars with no known RVs.
Convert to cartesian.
"""

import numpy as np
import sys
sys.path.insert(0, '..')
from chronostar import tabletool
from astropy.table import Table

datafile = '../data/ScoCen_box_result.fits'
d = tabletool.read(datafile)

# Set missing radial velocities (nan) to 0
d['radial_velocity'] = np.nan_to_num(d['radial_velocity'])

# Set missing radial velocity errors (nan) to 1e+10
d['radial_velocity_error'][np.isnan(d['radial_velocity_error'])] = 1e+4

print('Convert to cartesian')
tabletool.convert_table_astro2cart(table=d, return_table=True)

d.write(
    '/priv/mulga1/marusa/chronostar/data/ScoCen_box_result_15M_ready_for_bg_ols.fits'
)
print('Cartesian written.', len(d))
    historical = False

    log_message('Data table has {} rows'.format(len(data_table)))

    # data_table['radial_velocity'] = data_table['radial_velocity_best']
    # data_table['radial_velocity_error'] = data_table['radial_velocity_error_best']
    #
    # By the end of this, data will be a astropy table
    # with cartesian data written in
    # columns in default way.
    if config.config['convert_to_cartesian']:
        print('Converting to cartesian')
        # Performs conversion in place (in memory) on `data_table`
        tabletool.convert_table_astro2cart(
            table=data_table,
            main_colnames=config.astro_colnames.get('main_colnames', None),
            error_colnames=config.astro_colnames.get('error_colnames', None),
            corr_colnames=config.astro_colnames.get('corr_colnames', None),
            return_table=True)

    # Calculate background overlaps, storing in data
    bg_lnol_colname = 'background_log_overlap'
    if config.config['include_background_distribution']:
        print("Calculating background overlaps")
        # Only calculate if missing
        if bg_lnol_colname not in data_table.colnames:
            log_message('Calculating background densities')
            # background_means = tabletool.build_data_dict_from_table(
            #         config.config['kernel_density_input_datafile'],
            #         only_means=True,
            # )
            # star_means = tabletool.build_data_dict_from_table(
Exemple #23
0
def test_pythonFuncs():
    """
    TODO: remove the requirements of file, have data stored in file?
    """
    true_comp_mean = np.zeros(6)
    true_comp_dx = 2.
    true_comp_dv = 2.
    true_comp_covmatrix = np.identity(6)
    true_comp_covmatrix[:3, :3] *= true_comp_dx**2
    true_comp_covmatrix[3:, 3:] *= true_comp_dv**2
    true_comp_age = 1e-10
    true_comp = SphereComponent(
        attributes={
            'mean': true_comp_mean,
            'covmatrix': true_comp_covmatrix,
            'age': true_comp_age,
        })
    nstars = 100
    synth_data = SynthData(pars=true_comp.get_pars(), starcounts=nstars)
    synth_data.synthesise_everything()
    tabletool.convert_table_astro2cart(synth_data.table)

    star_data = tabletool.build_data_dict_from_table(synth_data.table)
    # star_data['means'] = star_data['means']
    # star_data['covs'] = star_data['covs']
    group_mean = true_comp.get_mean()
    group_cov = true_comp.get_covmatrix()

    # Test overlap with true component
    co1s = []
    co2s = []
    for i, (scov, smn) in enumerate(zip(star_data['covs'],
                                        star_data['means'])):
        co1s.append(co1(group_cov, group_mean, scov, smn))
        co2s.append(co2(group_cov, group_mean, scov, smn))
    co1s = np.array(co1s)
    co2s = np.array(co2s)
    co3s = np.exp(
        p_lno(group_cov, group_mean, star_data['covs'], star_data['means']))
    assert np.allclose(co1s, co2s)
    assert np.allclose(co2s, co3s)
    assert np.allclose(co1s, co3s)

    # Test overlap with neighbouring star (with the aim of testing
    # tiny overlap values). Note that most overlaps go to 0, but the
    # log overlaps retain the information
    co1s = []
    co2s = []
    for i, (scov, smn) in enumerate(zip(star_data['covs'],
                                        star_data['means'])):
        co1s.append(
            co1(star_data['covs'][15], star_data['means'][15], scov, smn))
        co2s.append(
            co2(star_data['covs'][15], star_data['means'][15], scov, smn))
    co1s = np.array(co1s)
    co2s = np.array(co2s)
    lnos = p_lno(star_data['covs'][15], star_data['means'][15],
                 star_data['covs'], star_data['means'])
    co3s = np.exp(lnos)
    assert np.allclose(co1s, co2s)
    assert np.allclose(co2s, co3s)
    assert np.allclose(co1s, co3s)
def test_fit_many_comps():
    """
    Synthesise a file with negligible error, retrieve initial
    parameters

    Takes a while... maybe this belongs in integration unit_tests
    """

    run_name = 'stationary'
    savedir = 'temp_data/{}_expectmax_{}/'.format(PY_VERS, run_name)
    mkpath(savedir)
    data_filename = savedir + '{}_expectmax_{}_data.fits'.format(PY_VERS,
                                                                 run_name)
    # log_filename = 'temp_data/{}_expectmax_{}/log.log'.format(PY_VERS,
    #                                                           run_name)

    logging.basicConfig(level=logging.INFO, filemode='w',
                        filename=log_filename)
    uniform_age = 1e-10
    sphere_comp_pars = np.array([
        #  X,  Y,  Z, U, V, W, dX, dV,  age,
        [-50,-50,-50, 0, 0, 0, 10.,  5, uniform_age],
        [ 50, 50, 50, 0, 0, 0, 10.,  5, uniform_age],
    ])
    starcounts = [200,200]
    ncomps = sphere_comp_pars.shape[0]

    # initialise z appropriately
    # start = 0
    # for i in range(ngroups):
    #     nstars_in_group = int(group_pars[i,-1])
    #     z[start:start+nstars_in_group,i] = 1.0
    #     start += nstars_in_group

    true_memb_probs = np.zeros((np.sum(starcounts), ncomps))
    true_memb_probs[:200,0] = 1.
    true_memb_probs[200:,1] = 1.

    synth_data = SynthData(pars=sphere_comp_pars, starcounts=starcounts,
                           Components=SphereComponent,
                           )
    synth_data.synthesise_everything()
    tabletool.convert_table_astro2cart(synth_data.table,
                                       write_table=True,
                                       filename=data_filename)

    origins = [SphereComponent(pars) for pars in sphere_comp_pars]

    best_comps, med_and_spans, memb_probs = \
        expectmax.fit_many_comps(data=synth_data.table,
                                 ncomps=ncomps,
                                 rdir=savedir,
                                 trace_orbit_func=dummy_trace_orbit_func, )

    # compare fit with input
    try:
        assert np.allclose(true_memb_probs, memb_probs)
    except AssertionError:
        # If not close, check if flipping component order fixes things
        memb_probs = memb_probs[:,::-1]
        best_comps = best_comps[::-1]
        assert np.allclose(true_memb_probs, memb_probs)
    for origin, best_comp in zip(origins, best_comps):
        assert (isinstance(origin, SphereComponent) and
                isinstance(best_comp, SphereComponent))
        o_pars = origin.get_pars()
        b_pars = best_comp.get_pars()

        logging.info("origin pars:   {}".format(o_pars))
        logging.info("best fit pars: {}".format(b_pars))
        assert np.allclose(origin.get_mean(),
                           best_comp.get_mean(),
                           atol=5.)
        assert np.allclose(origin.get_sphere_dx(),
                           best_comp.get_sphere_dx(),
                           atol=2.)
        assert np.allclose(origin.get_sphere_dv(),
                           best_comp.get_sphere_dv(),
                           atol=2.)
        assert np.allclose(origin.get_age(),
                           best_comp.get_age(),
                           atol=1.)
def test_fit_one_comp_with_background():
    """
    Synthesise a file with negligible error, retrieve initial
    parameters

    Takes a while... maybe this belongs in integration unit_tests
    """
    run_name = 'background'
    savedir = 'temp_data/{}_expectmax_{}/'.format(PY_VERS, run_name)
    mkpath(savedir)
    data_filename = savedir + '{}_expectmax_{}_data.fits'.format(PY_VERS,
                                                                 run_name)
    # log_filename = 'temp_data/{}_expectmax_{}/log.log'.format(PY_VERS,
    #                                                           run_name)

    logging.basicConfig(level=logging.INFO, filemode='w',
                        filename=log_filename)
    uniform_age = 1e-10
    sphere_comp_pars = np.array([
        # X, Y, Z, U, V, W, dX, dV,  age,
        [ 0, 0, 0, 0, 0, 0, 10.,  5, uniform_age],
    ])
    starcount = 100

    background_density = 1e-9

    ncomps = sphere_comp_pars.shape[0]

    # true_memb_probs = np.zeros((starcount, ncomps))
    # true_memb_probs[:,0] = 1.

    synth_data = SynthData(pars=sphere_comp_pars, starcounts=[starcount],
                           Components=SphereComponent,
                           background_density=background_density,
                           )
    synth_data.synthesise_everything()

    tabletool.convert_table_astro2cart(synth_data.table,
                                       write_table=True,
                                       filename=data_filename)
    background_count = len(synth_data.table) - starcount

    # insert background densities
    synth_data.table['background_log_overlap'] =\
        len(synth_data.table) * [np.log(background_density)]

    origins = [SphereComponent(pars) for pars in sphere_comp_pars]

    best_comps, med_and_spans, memb_probs = \
        expectmax.fit_many_comps(data=synth_data.table,
                                 ncomps=ncomps,
                                 rdir=savedir,
                                 trace_orbit_func=dummy_trace_orbit_func,
                                 use_background=True)

    return best_comps, med_and_spans, memb_probs

    # Check parameters are close
    assert np.allclose(sphere_comp_pars, best_comps[0].get_pars(),
                       atol=1.)

    # Check most assoc members are correctly classified
    recovery_count_threshold = 0.95 * starcounts[0]
    recovery_count_actual =  np.sum(np.round(memb_probs[:starcount,0]))
    assert recovery_count_threshold < recovery_count_actual

    # Check most background stars are correctly classified
    contamination_count_threshold = 0.05 * len(memb_probs[100:])
    contamination_count_actual = np.sum(np.round(memb_probs[starcount:,0]))
    assert contamination_count_threshold < contamination_count_actual

    # Check reported membership probabilities are consistent with recovery
    # rate (within 5%)
    mean_membership_confidence = np.mean(memb_probs[:starcount,0])
    assert np.isclose(recovery_count_actual/100., mean_membership_confidence,
                      atol=0.05)
                                 comp_then=True,
                                 comp_now=True,
                                 comp_orbit=True)
plt.xlabel('Y')
plt.ylabel('V')
plt.savefig('../scripts/synthData_plot_of_yv.png')
plt.clf()

xu_pos = means[:, np.array([0, 3])]

print('means')
print(means)

data_filename = '../scripts/synthData_ellip.fits'
tt.convert_table_astro2cart(my_synth_data.table,
                            write_table=True,
                            filename=data_filename)
#  res = compfitter.fit_comp(
#          data=my_synth_data.table,
#          plot_it=True,
#          burnin_steps=burnin_step,
#          plot_dir=plot_dir,
#          save_dir=save_dir,
#          trace_orbit_func=trace_orbit_func,
#  )

# my_table = my_synth_data.table
# print(len(my_table))
# print(len(means))
# print(my_table.colnames)
# my_table['X'] = means[:,0]
Exemple #27
0
def run_fit_helper(true_comp,
                   starcounts,
                   measurement_error,
                   burnin_step=None,
                   run_name='default',
                   trace_orbit_func=None,
                   Component=EllipComponent,
                   init_pars=None):
    py_vers = sys.version[0]
    save_dir = 'temp_data/%s_compfitter_%s/' % (py_vers, run_name)
    data_filename = save_dir + 'synth_data.fits'.format(py_vers, run_name)
    plot_dir = save_dir
    print("---------", save_dir)

    if not os.path.isdir(save_dir):
        os.mkdir(save_dir)

    log_filename = save_dir + 'log.log'.format(py_vers, run_name)

    logging.basicConfig(level=logging.INFO,
                        filename=log_filename,
                        filemode='w')

    synth_data = SynthData(pars=true_comp.get_pars(),
                           starcounts=starcounts,
                           measurement_error=measurement_error,
                           Components=Component)

    synth_data.synthesise_everything()
    tabletool.convert_table_astro2cart(synth_data.table,
                                       write_table=True,
                                       filename=data_filename)

    print("newPars ------------------------------ \n", init_pars)
    if init_pars is None:
        internal_pars = None
    else:
        internal_pars = Component.internalise(init_pars)

    res = cf.fit_comp(data=synth_data.table,
                      plot_it=True,
                      burnin_steps=burnin_step,
                      store_burnin_chains=True,
                      plot_dir=plot_dir,
                      save_dir=save_dir,
                      trace_orbit_func=trace_orbit_func,
                      optimisation_method='emcee',
                      Component=Component,
                      init_pars=internal_pars)

    comps_filename = save_dir + 'true_and_best_comp.py'
    best_comp = res[0]
    EllipComponent.store_raw_components(comps_filename, [true_comp, best_comp])

    star_pars = tabletool.build_data_dict_from_table(synth_data.table)
    plot_results(true_comp,
                 best_fit_comp=res[0],
                 star_pars=star_pars,
                 plt_dir=save_dir)

    return res
from chronostar import compfitter

if __name__ == '__main__':
    logging.basicConfig(level=logging.INFO, filename='temp_logs/groupfitter.log')
    save_dir = 'temp_data/'
    group_savefile = save_dir + 'origins_stat.npy'
    xyzuvw_init_savefile = save_dir + 'xyzuvw_init_stat.npy'
    astro_savefile = save_dir + 'astro_table_stat.txt'
    xyzuvw_conv_savefile = save_dir + 'xyzuvw_conv_stat.fits'

    pars = np.array([0., 0., 0., 0., 0., 0., 5., 2., 1e-8])
    starcount = 100
    error_frac = 1.
    synth_data = SynthData(pars=pars, starcounts=starcount)
    synth_data.synthesise_everything()
    tabletool.convert_table_astro2cart(synth_data.table)
    data = tabletool.build_data_dict_from_table(synth_data.table)

    stat_file = 'stat_dumps/groupfitter.stat'
    # best_fit, chain, lnprob = \
    cProfile.run(
        "groupfitter.fit_comp(data=data, plot_it=True,"
        "convergence_tol=2., burnin_steps=400, plot_dir='temp_plots/',"
        "save_dir='temp_data/')",
        stat_file,
    )

    stat = pstats.Stats(stat_file)
    stat.sort_stats('cumtime')
    stat.print_stats(0.1)
Exemple #29
0
def test_fit_many_comps():
    """
    Synthesise a file with negligible error, retrieve initial
    parameters

    Takes a while... maybe this belongs in integration unit_tests
    """

    run_name = 'stationary'

    logging.info(60 * '-')
    logging.info(15 * '-' + '{:^30}'.format('TEST: ' + run_name) + 15 * '-')
    logging.info(60 * '-')

    savedir = 'temp_data/{}_expectmax_{}/'.format(PY_VERS, run_name)
    mkpath(savedir)
    data_filename = savedir + '{}_expectmax_{}_data.fits'.format(
        PY_VERS, run_name)
    log_filename = 'temp_data/{}_expectmax_{}/log.log'.format(
        PY_VERS, run_name)

    logging.basicConfig(level=logging.INFO,
                        filemode='w',
                        filename=log_filename)
    uniform_age = 1e-10
    sphere_comp_pars = np.array([
        #   X,  Y,  Z, U, V, W, dX, dV,  age,
        [-50, -50, -50, 0, 0, 0, 10., 5, uniform_age],
        [50, 50, 50, 0, 0, 0, 10., 5, uniform_age],
    ])
    starcounts = [20, 50]
    ncomps = sphere_comp_pars.shape[0]

    # initialise z appropriately
    true_memb_probs = np.zeros((np.sum(starcounts), ncomps))
    start = 0
    for i in range(ncomps):
        true_memb_probs[start:start + starcounts[i], i] = 1.0
        start += starcounts[i]

    # Initialise some random membership probablities
    # Normalising such that each row sums to 1
    init_memb_probs = np.random.rand(np.sum(starcounts), ncomps)
    init_memb_probs = (init_memb_probs.T / init_memb_probs.sum(axis=1)).T

    synth_data = SynthData(
        pars=sphere_comp_pars,
        starcounts=starcounts,
        Components=SphereComponent,
    )
    synth_data.synthesise_everything()
    tabletool.convert_table_astro2cart(synth_data.table,
                                       write_table=True,
                                       filename=data_filename)

    origins = [SphereComponent(pars) for pars in sphere_comp_pars]

    best_comps, med_and_spans, memb_probs = \
        expectmax.fit_many_comps(data=synth_data.table, ncomps=ncomps,
                                 rdir=savedir, init_memb_probs=init_memb_probs,
                                 trace_orbit_func=dummy_trace_orbit_func,
                                 ignore_stable_comps=False)

    perm = expectmax.get_best_permutation(memb_probs, true_memb_probs)

    logging.info('Best permutation is: {}'.format(perm))

    assert np.allclose(true_memb_probs, memb_probs[:, perm])

    for origin, best_comp in zip(origins, np.array(best_comps)[perm, ]):
        assert (isinstance(origin, SphereComponent)
                and isinstance(best_comp, SphereComponent))
        o_pars = origin.get_pars()
        b_pars = best_comp.get_pars()

        logging.info("origin pars:   {}".format(o_pars))
        logging.info("best fit pars: {}".format(b_pars))
        assert np.allclose(origin.get_mean(), best_comp.get_mean(), atol=5.)
        assert np.allclose(origin.get_sphere_dx(),
                           best_comp.get_sphere_dx(),
                           atol=2.)
        assert np.allclose(origin.get_sphere_dv(),
                           best_comp.get_sphere_dv(),
                           atol=2.)
        assert np.allclose(origin.get_age(), best_comp.get_age(), atol=1.)
Exemple #30
0
from chronostar import compfitter

if __name__ == '__main__':
    logging.basicConfig(level=logging.INFO, filename='compfitter.log')
    save_dir = ''
    group_savefile = save_dir + 'origins_stat.npy'
    xyzuvw_init_savefile = save_dir + 'xyzuvw_init_stat.npy'
    astro_savefile = save_dir + 'astro_table_stat.txt'
    xyzuvw_conv_savefile = save_dir + 'xyzuvw_conv_stat.fits'

    pars = np.array([0., 0., 0., 0., 0., 0., 5., 2., 1e-8])
    starcount = 100
    error_frac = 1.
    synth_data = SynthData(pars=pars, starcounts=starcount)
    synth_data.synthesise_everything()
    tabletool.convert_table_astro2cart(synth_data.table)
    data = tabletool.build_data_dict_from_table(synth_data.table)

    stat_file = 'compfitter.stat'
    # best_fit, chain, lnprob = \
    cProfile.run(
        "compfitter.fit_comp(data=data, plot_it=True,"
        "convergence_tol=2., burnin_steps=400, plot_dir='',"
        "save_dir='')",
        stat_file,
    )

    stat = pstats.Stats(stat_file)
    stat.sort_stats('cumtime')
    stat.print_stats(0.3)
Exemple #31
0
def test_fit_stability_mixed_comps():
    """
    Have a fit with some iterations that have a mix of stable and
    unstable comps.

    TODO: Maybe give 2 similar comps tiny age but overlapping origins
    """

    run_name = 'mixed_stability'

    logging.info(60 * '-')
    logging.info(15 * '-' + '{:^30}'.format('TEST: ' + run_name) + 15 * '-')
    logging.info(60 * '-')

    savedir = 'temp_data/{}_expectmax_{}/'.format(PY_VERS, run_name)
    mkpath(savedir)
    data_filename = savedir + '{}_expectmax_{}_data.fits'.format(
        PY_VERS, run_name)
    log_filename = 'temp_data/{}_expectmax_{}/log.log'.format(
        PY_VERS, run_name)

    logging.basicConfig(level=logging.INFO,
                        filemode='w',
                        filename=log_filename)

    shared_cd_mean = np.zeros(6)
    tiny_age = 0.1
    medium_age = 10.

    #    origin_1 = traceorbit.trace_cartesian_orbit(shared_cd_mean, times=-medium_age)
    #    origin_2 = traceorbit.trace_cartesian_orbit(shared_cd_mean, times=-2*medium_age)
    #
    #    cd_mean_3 = np.array([-200,200,0,0,50,0.])
    #    origin_3 = traceorbit.trace_cartesian_orbit(cd_mean_3, times=-tiny_age)
    #
    #    sphere_comp_pars = np.array([
    #        #   X,  Y,  Z, U, V, W, dX, dV,  age,
    #        np.hstack((origin_1, 10., 5., medium_age)),   # Next two comps share a current day origin
    #        np.hstack((origin_2, 10., 5., 2*medium_age)), #  so hopefully will need several iterations to\
    #                                                      #  disentangle
    #         np.hstack((origin_3, 10., 5., tiny_age)),     # a distinct comp that is stable quickly
    #     ])
    uniform_age = 1e-10
    sphere_comp_pars = np.array([
        #   X,  Y,  Z, U, V, W, dX, dV,  age,
        [50, 0, 0, 0, 50, 0, 10., 5,
         uniform_age],  # Very distant (and stable) comp
        [0, -20, 0, 0, -5, 0, 10., 5, uniform_age],  # Overlapping comp 1
        [0, 20, 0, 0, 5, 0, 10., 5, uniform_age],  # Overlapping comp 2
    ])
    starcounts = [50, 100, 200]
    ncomps = sphere_comp_pars.shape[0]

    # initialise z appropriately
    true_memb_probs = np.zeros((np.sum(starcounts), ncomps))
    start = 0
    for i in range(ncomps):
        true_memb_probs[start:start + starcounts[i], i] = 1.0
        start += starcounts[i]

    # Initialise some random membership probablities
    #  which will serve as our starting guess
    init_memb_probs = np.random.rand(np.sum(starcounts), ncomps)
    # To aid a component in quickly becoming stable, initialse the memberships
    # correclty for stars belonging to this component
    init_memb_probs[:starcounts[0]] = 0.
    init_memb_probs[:starcounts[0], 0] = 1.
    init_memb_probs[starcounts[0]:, 0] = 0.

    # Normalising such that each row sums to 1
    init_memb_probs = (init_memb_probs.T / init_memb_probs.sum(axis=1)).T

    synth_data = SynthData(
        pars=sphere_comp_pars,
        starcounts=starcounts,
        Components=SphereComponent,
    )
    synth_data.synthesise_everything()
    tabletool.convert_table_astro2cart(synth_data.table,
                                       write_table=True,
                                       filename=data_filename)

    origins = [SphereComponent(pars) for pars in sphere_comp_pars]
    SphereComponent.store_raw_components(savedir + 'origins.npy', origins)

    best_comps, med_and_spans, memb_probs = \
        expectmax.fit_many_comps(data=synth_data.table, ncomps=ncomps,
                                 rdir=savedir, init_memb_probs=init_memb_probs,
                                 trace_orbit_func=dummy_trace_orbit_func,
                                 ignore_stable_comps=True)

    perm = expectmax.get_best_permutation(memb_probs, true_memb_probs)

    logging.info('Best permutation is: {}'.format(perm))

    # Calculate the membership difference, we divide by 2 since
    # incorrectly allocated stars are double counted
    total_diff = 0.5 * np.sum(np.abs(true_memb_probs - memb_probs[:, perm]))

    # Assert that expected membership is less than 10%
    assert total_diff < 0.1 * np.sum(starcounts)

    for origin, best_comp in zip(origins, np.array(best_comps)[perm, ]):
        assert (isinstance(origin, SphereComponent)
                and isinstance(best_comp, SphereComponent))
        o_pars = origin.get_pars()
        b_pars = best_comp.get_pars()

        logging.info("origin pars:   {}".format(o_pars))
        logging.info("best fit pars: {}".format(b_pars))
        assert np.allclose(origin.get_mean(), best_comp.get_mean(), atol=5.)
        assert np.allclose(origin.get_sphere_dx(),
                           best_comp.get_sphere_dx(),
                           atol=2.)
        assert np.allclose(origin.get_sphere_dv(),
                           best_comp.get_sphere_dv(),
                           atol=2.)
        assert np.allclose(origin.get_age(), best_comp.get_age(), atol=1.)
Exemple #32
0
def test_convert_astrometry():
    """
    Use a synethetically generated set of astrometry, convert
    to cartesian both manually, and via datatool

    In order to compare results, the datatool cartesian conversion
    will be stored in alternatively named columns
    """
    synth_table_filename = 'temp_data/test_convert_astrometry_data.fits'

    synth_dataset = synthdata.SynthData(pars=PARS, starcounts=STARCOUNTS)

    synth_dataset.synthesise_everything(filename=synth_table_filename,
                                        overwrite=True)

    tabletool.convert_table_astro2cart(synth_table_filename,
                                       write_table=True,
                                       filename=synth_table_filename)

    # Prepare a pars file
    par_file = 'temp_data/test_convert_astrometry.par'

    alt_cart_main_colnames = ['{}_alt'.format(dim) for dim in DIMS]
    alt_cart_error_colnames = ['{}_error_alt'.format(dim) for dim in DIMS]
    alt_cart_corr_colnames = []
    for i, colname1 in enumerate(DIMS):
        for colname2 in DIMS[i + 1:]:
            alt_cart_corr_colnames.append('{}_{}_corr_alt'.format(
                colname1, colname2))

    with open(par_file, 'w') as fp:
        fp.write('par_log_file = temp_data/test_convert_astrometry_pars.log\n')
        fp.write('input_file = {}\n'.format(synth_table_filename))

        fp.write('convert_astrometry = True\n')

        fp.write('{} = {}\n'.format('cart_main_colnames',
                                    alt_cart_main_colnames).replace("'", ''))
        fp.write('{} = {}\n'.format('cart_error_colnames',
                                    alt_cart_error_colnames).replace("'", ''))
        fp.write('{} = {}\n'.format('cart_corr_colnames',
                                    alt_cart_corr_colnames).replace("'", ''))

        fp.write('overwrite_datafile = True\n')
        fp.write('output_file = {}\n'.format(synth_table_filename))
        fp.write('return_data_table = True\n')

    # Apply datatool to synthetically generated dataset
    data_table = datatool.prepare_data(par_file)

    main_colnames, error_colnames, corr_colnames = tabletool.get_colnames(
        cartesian=True)

    for orig, alt in zip([main_colnames, error_colnames, corr_colnames], [
            alt_cart_main_colnames, alt_cart_error_colnames,
            alt_cart_corr_colnames
    ]):

        for orig_colname, alt_colname in zip(orig, alt):
            assert np.allclose(data_table[orig_colname],
                               data_table[alt_colname],
                               rtol=1e-5)
            print(
                np.max(
                    np.abs(data_table[alt_colname] -
                           data_table[orig_colname])))
def test_2comps_and_background():
    """
    Synthesise a file with negligible error, retrieve initial
    parameters

    Takes a while... maybe this belongs in integration unit_tests

    Performance of test is a bit tricky to callibrate. Since we are skipping
    any temporal evolution for speed reasons, we model two
    isotropic Gaussians. Now if these Gaussians are too far apart, NaiveFit
    will gravitate to one of the Gaussians during the 1 component fit, and then
    struggle to discover the second Gaussian.

    If the Gaussians are too close, then both will be characteresied by the
    1 component fit, and the BIC will decide two Gaussians components are
    overkill.

    I think I've addressed this by having the two groups have
    large number of stars.
    """
    using_bg = True

    run_name = '2comps_and_background'

    logging.info(60 * '-')
    logging.info(15 * '-' + '{:^30}'.format('TEST: ' + run_name) + 15 * '-')
    logging.info(60 * '-')

    savedir = 'temp_data/{}_naive_{}/'.format(PY_VERS, run_name)
    mkpath(savedir)
    data_filename = savedir + '{}_naive_{}_data.fits'.format(PY_VERS, run_name)
    log_filename = 'temp_data/{}_naive_{}/log.log'.format(PY_VERS, run_name)

    logging.basicConfig(level=logging.INFO,
                        filemode='w',
                        filename=log_filename)

    ### INITIALISE SYNTHETIC DATA ###

    # DON'T CHANGE THE AGE! BECAUSE THIS TEST DOESN'T USE ANY ORBIT INTEGRATION!!!
    # Note: if peaks are too far apart, it will be difficult for
    # chronostar to identify the 2nd when moving from a 1-component
    # to a 2-component fit.
    uniform_age = 1e-10
    sphere_comp_pars = np.array([
        #  X,  Y, Z, U, V, W, dX, dV,  age,
        [0, 0, 0, 0, 0, 0, 10., 5, uniform_age],
        [30, 0, 0, 0, 5, 0, 10., 5, uniform_age],
    ])
    starcounts = [100, 150]
    ncomps = sphere_comp_pars.shape[0]
    nstars = np.sum(starcounts)

    background_density = 1e-9

    # initialise z appropriately
    true_memb_probs = np.zeros((np.sum(starcounts), ncomps))
    start = 0
    for i in range(ncomps):
        true_memb_probs[start:start + starcounts[i], i] = 1.0
        start += starcounts[i]

    try:
        # Check if the synth data has already been constructed
        data_dict = tabletool.build_data_dict_from_table(data_filename)
    except:
        synth_data = SynthData(
            pars=sphere_comp_pars,
            starcounts=starcounts,
            Components=SphereComponent,
            background_density=background_density,
        )
        synth_data.synthesise_everything()

        tabletool.convert_table_astro2cart(synth_data.table,
                                           write_table=True,
                                           filename=data_filename)

        background_count = len(synth_data.table) - np.sum(starcounts)
        # insert background densities
        synth_data.table['background_log_overlap'] =\
            len(synth_data.table) * [np.log(background_density)]

        synth_data.table.write(data_filename, overwrite=True)

    origins = [SphereComponent(pars) for pars in sphere_comp_pars]

    ### SET UP PARAMETER FILE ###
    fit_pars = {
        'results_dir': savedir,
        'data_table': data_filename,
        'trace_orbit_func': 'dummy_trace_orbit_func',
        'return_results': True,
        'par_log_file': savedir + 'fit_pars.log',
        'overwrite_prev_run': True,
        # 'nthreads':18,
        'nthreads': 3,
    }

    ### INITIALISE AND RUN A NAIVE FIT ###
    naivefit = NaiveFit(fit_pars=fit_pars)
    result, score = naivefit.run_fit()

    best_comps = result['comps']
    memb_probs = result['memb_probs']

    # Check membership has ncomps + 1 (bg) columns
    n_fitted_comps = memb_probs.shape[-1] - 1
    assert ncomps == n_fitted_comps

    ### CHECK RESULT ###
    # No guarantee of order, so check if result is permutated
    #  also we drop the bg memberships for permutation reasons
    perm = expectmax.get_best_permutation(memb_probs[:nstars, :ncomps],
                                          true_memb_probs)

    memb_probs = memb_probs[:nstars]

    logging.info('Best permutation is: {}'.format(perm))

    n_misclassified_stars = np.sum(
        np.abs(true_memb_probs - np.round(memb_probs[:, perm])))

    # Check fewer than 15% of association stars are misclassified
    try:
        assert n_misclassified_stars / nstars * 100 < 15
    except AssertionError:
        import pdb
        pdb.set_trace()

    for origin, best_comp in zip(origins, np.array(best_comps)[perm, ]):
        assert (isinstance(origin, SphereComponent)
                and isinstance(best_comp, SphereComponent))
        o_pars = origin.get_pars()
        b_pars = best_comp.get_pars()

        logging.info("origin pars:   {}".format(o_pars))
        logging.info("best fit pars: {}".format(b_pars))
        assert np.allclose(origin.get_mean(), best_comp.get_mean(), atol=5.)
        assert np.allclose(origin.get_sphere_dx(),
                           best_comp.get_sphere_dx(),
                           atol=2.5)
        assert np.allclose(origin.get_sphere_dv(),
                           best_comp.get_sphere_dv(),
                           atol=2.5)
        assert np.allclose(origin.get_age(), best_comp.get_age(), atol=1.)

    log_message('Data table has {} rows'.format(len(data_table)))

    # data_table['radial_velocity'] = data_table['radial_velocity_best']
    # data_table['radial_velocity_error'] = data_table['radial_velocity_error_best']
    #
    # By the end of this, data will be a astropy table
    # with cartesian data written in
    # columns in default way.
    if config.config['convert_to_cartesian']:
        print('Converting to cartesian')
        # Performs conversion in place (in memory) on `data_table`
        tabletool.convert_table_astro2cart(
                table=data_table,
                main_colnames=config.astro_colnames.get('main_colnames', None),
                error_colnames=config.astro_colnames.get('error_colnames', None),
                corr_colnames=config.astro_colnames.get('corr_colnames', None),
                return_table=True)


    # Calculate background overlaps, storing in data
    bg_lnol_colname = 'background_log_overlap'
    if config.config['include_background_distribution']:
        print("Calculating background overlaps")
        # Only calculate if missing
        if bg_lnol_colname not in data_table.colnames:
            log_message('Calculating background densities')
            # background_means = tabletool.build_data_dict_from_table(
            #         config.config['kernel_density_input_datafile'],
            #         only_means=True,
            # )