def load_results(self, teff, snr): starname = "{}{}{}N{}".format(self.prefix, self.namebase, teff, snr) directory = os.path.join(self.base, starname, self.mode) df_store = pd.DataFrame() # print(directory) dbs = glob.glob( os.path.join(directory, "*_coadd_{}_chisqr_results.db".format(self.mode))) for dbname in dbs: # print(dbname) match = re.search("N\d{1,4}-(\d{1,5})_coadd_" + "{}".format(self.mode), dbname, flags=0) obsnum = match.group(1) try: table = load_sql_table(dbname, verbose=False, echo=False) dbdf = pd.read_sql( sa.select(table.c).order_by( table.c[self.chi2_val].asc()).limit(1), table.metadata.bind) dbdf["snr"] = snr # Add SNR column dbdf["obsnum"] = obsnum # Add Obsnum column df_store = dbdf.append(df_store) except Exception as e: print(e) print( f"Didn't get Database for teff={teff}-snr={snr}-obsnum={obsnum}" ) # Coerce to be numeric columns c = df_store.columns[df_store.dtypes.eq(object)] df_store[c] = df_store[c].apply(pd.to_numeric, errors='coerce', axis=0) return SnrDistribution(df_store, snr=snr, teff=teff)
def test_simple_database_returns_correctly_from_sql_db(tmpdir): fname = tmpdir.join("test_db.db") x = np.linspace(1, 5, 20) y = x ** 2 z = x + y df = pd.DataFrame({"x": x, "y": y, "z": z}) assert np.all(df.x.values == x) assert np.all(df.y.values == y) assert np.all(df.z.values == z) database_name = 'sqlite:///{0}'.format(fname) engine = sa.create_engine(database_name) df.to_sql('test_table', engine, if_exists='append') db_table = load_sql_table(fname, name="test_table") df = pd.read_sql( sa.select([db_table.c["x"], db_table.c["y"], db_table.c["z"]]), db_table.metadata.bind) # NEED to query out x from database inot dataframe. # select x, y, z from db_tbale using sqlalchemy assert np.all(df.x.values == x) assert np.all(df.y.values == y) assert np.all(df.z.values == z)
def load_fixed_snr(teff, snr): base = "/home/jneal/Phd/Analysis/sims_variable_params_same_snr/analysis/" starname = "NOISESCRIPT{}N{}".format(teff, snr) directory = os.path.join(base, starname, "iam") df_store = pd.DataFrame() dbs = glob.glob(os.path.join(directory, "*_coadd_iam_chisqr_results.db")) print(len(dbs)) for dbname in dbs: match = re.search("N\d{1,4}-(\d{1,5})_coadd_iam", dbname, flags=0) obsnum = match.group(1) try: table = load_sql_table(dbname, verbose=False, echo=False) dbdf = pd.read_sql( sa.select(table.c).order_by(table.c[chi2_val].asc()).limit(1), table.metadata.bind) dbdf["snr"] = snr # Add SNR column dbdf["obsnum"] = obsnum # Add Obsnum column df_store = dbdf.append(df_store) except Exception as e: print(e) print( f"Didn't get Database for teff={teff}-snr={snr}-obsnum={obsnum}" ) # print("Results") # print("Host Temperature = 5200 K, Companion Temperature = {}".format(teff)) # df_store["median_alpha"] = df_store.apply(lambda row: np.median([row.alpha_1, row.alpha_2, row.alpha_3, row.alpha_4]), axis=1) # print(df_store[["snr", "obsnum", "coadd_chi2", "teff_1", "teff_2", "median_alpha"]]) return df_store
def main(database, echo=False, mode="parabola"): path, star, obsnum, chip = decompose_database_name(database) os.makedirs(os.path.join(path, "plots"), exist_ok=True) # make dir for plots teff, logg, fe_h = closest_model_params(*get_host_params(star)) params = { "path": path, "star": star, "obsnum": obsnum, "chip": chip, "teff": teff, "logg": logg, "fe_h": fe_h } sqlite_db = 'sqlite:///{}'.format(database) try: engine = sa.create_engine(sqlite_db, echo=echo) table_names = engine.table_names() except Exception as e: print("\nAccessing sqlite_db = {}\n".format(sqlite_db)) print("cwd =", os.getcwd()) raise e print("Table names in database =", engine.table_names()) if len(table_names) == 1: tb_name = table_names[0] else: raise ValueError("Database has two many tables {}".format(table_names)) db_table = load_sql_table(database) print("Mode =", mode) if mode == "fixed_host_params": fix_host_parameters_reduced_gamma(engine, params, tb_name) fix_host_parameters(engine, params, tb_name) elif mode == "param_limits": get_column_limits(engine, params, tb_name) elif mode == "parabola": parabola_plots(db_table, params) elif mode == "smallest_chi2": smallest_chi2_values(engine, params, tb_name) elif mode == "contour": alpha_rv_contour(engine, params, tb_name) elif mode == "contour": alpha_rv_contour_old(engine, params, tb_name) elif mode == "test": test_figure(engine, params, tb_name) print("Done") return 0
def load_min_chi2(teff, noises): df_store = pd.DataFrame() for snr in noises: obsnum = 1 starname = "NOISESCRIPT{}N{}".format(teff, snr) directory = os.path.join(home, "analysis", starname, "iam") dbname = f"{starname}-{obsnum}_coadd_iam_chisqr_results.db" try: table = load_sql_table(os.path.join(directory,dbname), verbose=False, echo=False) chi2_val = "coadd_chi2" dbdf = pd.read_sql(sa.select(table.c).order_by(table.c[chi2_val].asc()).limit(1), table.metadata.bind) dbdf["snr"] = snr # Add SNR column df_store = dbdf.append(df_store) except Exception as e: print(e) print(f"Didn't get Database for {teff}-{snr}") df_store["median_alpha"] = df_store.apply(lambda row: np.median([row.alpha_1, row.alpha_2, row.alpha_3, row.alpha_4]), axis=1) return df_store
def test_bhm_db_main(sim_config, tmpdir): simulators = sim_config simulators.paths["output_dir"] = str(tmpdir) # make directory ! # Setup star = "test_star" star = star.upper() obsnum = "11" suffix = "_test" # Gen fake param file setup_bhm_dirs(star) num = 20 # Standard values teff = np.linspace(3000, 5000, num) logg = np.linspace(0.5, 6, num) feh = np.linspace(-3, 1, num) gamma = np.linspace(-20, 20, num) print(tmpdir.join(star, "bhm")) # assert tmpdir.join(star, "bhm").check(dir=True) for chip in range(1, 5): # "TEST_STAR - 11_2_bhm_chisqr_results_test *.csv" fname = tmpdir.join( star, "bhm", "{0}-{1}_{2}_bhm_chisqr_results{3}.csv".format( star, obsnum, chip, suffix)) print("fname", fname) chi2 = chip + gamma + teff / logg npix = (985 - chip) * np.ones_like(teff) df = pd.DataFrame({ 'teff_1': teff, 'logg_1': logg, 'feh_1': feh, 'gamma': gamma, 'chi2': chi2, "npix": npix }) df.to_csv(fname) # database_name = 'sqlite:///{0}'.format(fname) # engine = sa.create_engine(database_name) # df.to_sql('test_table', engine, if_exists='append') expected_db_name = tmpdir.join( star, "bhm", "{0}-{1}_coadd_bhm_chisqr_results{2}.db".format(star, obsnum, suffix)) assert expected_db_name.check(file=0) # make 4 databases to add together() res = bhm_db_main(star, obsnum, suffix, replace=False, verbose=True, chunksize=5, move=False) assert res is None assert expected_db_name.check(file=1) db_table = load_sql_table(expected_db_name) assert isinstance(db_table, sa.Table) df = pd.read_sql(sa.select(db_table.c), db_table.metadata.bind) assert isinstance(df, pd.DataFrame) assert np.allclose(df.teff_1.values, teff) assert np.allclose(df.logg_1.values, logg) assert np.allclose(df.feh_1.values, feh) assert np.allclose(df.gamma.values, gamma) assert len(df) == num x = gamma + teff / logg assert np.allclose(df.chi2_1, 1 + x) assert np.allclose(df.chi2_2, 2 + x) assert np.allclose(df.chi2_3, 3 + x) assert np.allclose(df.chi2_4, 4 + x) assert np.allclose(df.coadd_chi2, 10 + 4 * x) assert np.all(df.npix_1 == (985 - 1)) assert np.all(df.npix_2 == (985 - 2)) assert np.all(df.npix_3 == (985 - 3)) assert np.all(df.npix_4 == (985 - 4))
def main(star, obsnum, suffix=None, echo=False, mode="parabola", verbose=False, npars=3): star = star.upper() suffix = "" if suffix is None else suffix database = os.path.join( simulators.paths["output_dir"], star, "iam", "{0}-{1}_coadd_iam_chisqr_results{2}.db".format(star, obsnum, suffix)) if verbose: print("Database name ", database) print("Database exists", os.path.isfile(database)) if not os.path.isfile(database): raise IOError("Database '{0}' does not exist.".format(database)) path, dbstar, db_obsnum, chip = decompose_database_name(database) assert dbstar == star, "{} == {}".format(dbstar, star) assert str(db_obsnum) == str(obsnum), "{} == {}".format(db_obsnum, obsnum) assert chip == "coadd", "{} == {}".format(chip, "coadd") os.makedirs(os.path.join(path, "plots"), exist_ok=True) # make dir for plots teff, logg, fe_h = closest_model_params(*get_host_params(star)) params = { "path": path, "star": star, "obsnum": obsnum, "chip": chip, "suffix": suffix, "teff": int(teff), "logg": float(logg), "fe_h": float(fe_h), "npars": npars } db_table = load_sql_table(database, verbose=verbose, echo=echo) # Put pixel counts in params params["npix"] = get_npix_values(db_table) if verbose: print("Mode =", mode) try: if mode == "fixed_host_params": try: fix_host_parameters_reduced_gamma(db_table, params) fix_host_parameters(db_table, params) except Exception as e: print(e) elif mode == "param_limits": get_column_limits(db_table, params) elif mode == "parabola": parabola_plots(db_table, params) elif mode == "smallest_chi2": smallest_chi2_values(db_table, params) elif mode == "contour": contours(db_table, params) elif mode == "test": test_figure(db_table, params) elif mode == "rvplot": rv_plot(db_table, params) elif mode == "chi2_parabola": chi2_parabola_plots(db_table, params) chi2_individual_parabola_plots(db_table, params) elif mode == "compare_spectra": compare_spectra(db_table, params) elif mode == "contrast": contrast_iam_results(db_table, params) elif mode == "all": try: fix_host_parameters_reduced_gamma(db_table, params) fix_host_parameters(db_table, params) except: pass plt.close("all") try: get_column_limits(db_table, params) except: pass plt.close("all") try: smallest_chi2_values(db_table, params) except: pass plt.close("all") try: parabola_plots(db_table, params) except: pass plt.close("all") try: test_figure(db_table, params) except: pass plt.close("all") try: chi2_parabola_plots(db_table, params) except: pass plt.close("all") try: chi2_individual_parabola_plots(db_table, params) except: pass plt.close("all") try: compare_spectra(db_table, params) except: pass plt.close("all") try: contours(db_table, params) except: pass plt.close("all") try: contrast_iam_results(db_table, params) except: pass plt.close("all") else: warnings.warn("Incorrect Mode in iam analysis") except Exception as e: print(e) plt.close("all") return 1 plt.close("all") print("Done") return 0
def test_sql_table_with_no_table(tmpdir): db_name = tmpdir.join("db_no_tables.db") with pytest.raises(ValueError): load_sql_table(db_name, echo=False, verbose=False)
def test_sql_table_with_more_than_one_table(tmpdir): db_name = tmpdir.join("db with many tables.db") assert False # need to make the db with pytest.raises(ValueError): load_sql_table(db_name, echo=False, verbose=False)
def test_load_sql_table_with_invalid_table(db_name, invalid_name): with pytest.raises(NameError): load_sql_table(db_name, name=invalid_name, echo=False, verbose=False)
def test_load_sql_table(db_name): table = load_sql_table(db_name, name="chi2_table", echo=False, verbose=False) assert isinstance(table, sa.Table)
def db_table(db_name): table = load_sql_table(db_name, name="chi2_table", echo=False, verbose=False) return table
def test_iam_db_main_multiple_host_model(sim_config, tmpdir): simulators = sim_config simulators.paths["output_dir"] = str(tmpdir) # Setup star = "test_star" star = star.upper() obsnum = "11" suffix = "_test" # Gen fake param file print("before dirs") list_files(str(tmpdir)) setup_iam_dirs(star) print("after dirs") list_files(str(tmpdir)) num = 20 # Standard values teff = np.linspace(3000, 5000, 4) logg = np.linspace(3.5, 4.5, 3) feh = np.linspace(-0.5, 0.5, 2) teff2 = np.linspace(2300, 4300, num) logg2 = np.linspace(1.5, 5, num) feh2 = np.linspace(-2, 2, num) rv = np.linspace(-15, 15, num) gamma = np.linspace(-20, 20, num) import itertools for chip in range(1, 5): for t, l, f in itertools.product(teff, logg, feh): fname = tmpdir.join(star, "iam", "{0}-{1}_{2}_iam_chisqr_results{3}[{4}_{5}_{6}].csv".format(star, obsnum, chip, suffix, t, l, f)) chi2 = chip + (f + gamma + t / l) * (feh2 + rv + teff2 / logg2) npix = (985 - chip) * np.ones_like(chi2) # print("chi2 shape", chi2.shape) # print("tshape", t.shape) # print("tgamma shape", gamma.shape) df = pd.DataFrame({'gamma': gamma, 'teff_2': teff2, 'logg_2': logg2, 'feh_2': feh2, "rv": rv, 'chi2': chi2, "npix": npix}) df["teff_1"] = t df["logg_1"] = l df["feh_1"] = f df.to_csv(fname) print("after df.to_csv") list_files(str(tmpdir)) expected_db_name = tmpdir.join(star, "iam", "{0}-{1}_coadd_iam_chisqr_results{2}.db".format(star, obsnum, suffix)) assert expected_db_name.check(file=0) # make 4 databases to add together() res = iam_db_main(star, obsnum, suffix, replace=False, verbose=False, chunksize=5, move=False) # move=True does not test well. print("After iam db main") assert res is None assert expected_db_name.check(file=1) db_table = load_sql_table(expected_db_name) assert isinstance(db_table, sa.Table) df = pd.read_sql( sa.select(db_table.c), db_table.metadata.bind) print("df head", df.head()) print("types", df.dtypes) assert isinstance(df, pd.DataFrame) assert len(df) == num * (len(teff) * len(feh) * len(logg)) x = (df.feh_1 + df.gamma + df.teff_1 / df.logg_1) * (df.feh_2 + df.rv + df.teff_2 / df.logg_2) assert np.allclose(df.chi2_1, 1 + x) assert np.allclose(df.chi2_2, 2 + x) assert np.allclose(df.chi2_3, 3 + x) assert np.allclose(df.chi2_4, 4 + x) assert np.allclose(df.coadd_chi2, 10 + 4 * x) assert np.all(df.npix_1 == (985 - 1)) assert np.all(df.npix_2 == (985 - 2)) assert np.all(df.npix_3 == (985 - 3)) assert np.all(df.npix_4 == (985 - 4)) assert np.allclose(np.unique(df.teff_1.values), teff) assert np.allclose(np.unique(df.logg_1.values), logg) assert np.allclose(np.unique(df.feh_1.values), feh) assert np.allclose(np.unique(df.teff_2.values), teff2) assert np.allclose(np.unique(df.logg_2.values), logg2) assert np.allclose(np.unique(df.feh_2.values), feh2) assert np.allclose(np.unique(df.gamma.values), gamma) assert np.allclose(np.unique(df.rv.values), rv)
def test_iam_db_main_single_host_model(sim_config, tmpdir): simulators = sim_config simulators.paths["output_dir"] = str(tmpdir) # Setup star = "test_star" star = star.upper() obsnum = "11" suffix = "_test" # Gen fake param file setup_iam_dirs(star) list_files(str(tmpdir)) num = 20 # Setting values teff = 3000 logg = 4.5 feh = 0.0 teff2 = np.linspace(2300, 4300, num) logg2 = np.linspace(1.5, 5, num) feh2 = np.linspace(-2, 2, num) rv = np.linspace(-15, 15, num) gamma = np.linspace(-20, 20, num) for chip in range(1, 5): fname = tmpdir.join(star, "iam", "{0}-{1}_{2}_iam_chisqr_results{3}[{4}_{5}_{6}].csv".format( star, obsnum, chip, suffix, teff, logg, feh)) chi2 = chip + (feh + gamma + teff / logg) * (feh2 + rv + teff2 / logg2) npix = (985 - chip) * np.ones_like(teff) df = pd.DataFrame({'gamma': gamma, 'teff_2': teff2, 'logg_2': logg2, 'feh_2': feh2, "rv": rv, 'chi2': chi2, "npix": npix}) df.to_csv(fname) list_files(str(tmpdir)) expected_db_name = tmpdir.join(star, "iam", "{0}-{1}_coadd_iam_chisqr_results{2}.db".format(star, obsnum, suffix)) assert expected_db_name.check(file=0) # make 4 databases to add together() res = iam_db_main(star, obsnum, suffix, replace=False, verbose=True, chunksize=5, move=False) assert res is None assert expected_db_name.check(file=1) db_table = load_sql_table(expected_db_name) assert isinstance(db_table, sa.Table) df = pd.read_sql( sa.select(db_table.c), db_table.metadata.bind) assert isinstance(df, pd.DataFrame) assert np.all(df.teff_1.values == teff) assert np.all(df.logg_1.values == logg) assert np.all(df.feh_1.values == feh) assert np.allclose(df.teff_2.values, teff2) assert np.allclose(df.logg_2.values, logg2) assert np.allclose(df.feh_2.values, feh2) assert np.allclose(df.gamma.values, gamma) assert np.allclose(df.rv.values, rv) assert len(df) == num x = (feh + gamma + teff / logg) * (feh2 + rv + teff2 / logg2) assert np.allclose(df.chi2_1, 1 + x) assert np.allclose(df.chi2_2, 2 + x) assert np.allclose(df.chi2_3, 3 + x) assert np.allclose(df.chi2_4, 4 + x) assert np.allclose(df.coadd_chi2, 10 + 4 * x) assert np.all(df.npix_1 == (985 - 1)) assert np.all(df.npix_2 == (985 - 2)) assert np.all(df.npix_3 == (985 - 3)) assert np.all(df.npix_4 == (985 - 4))