def find_percentile(my_dir, file_dir, filter1, filter2, filter3, flux_filter1, flux_filter2, flux_filter3, flux_filter1_err, flux_filter2_err, flux_filter3_err, z): all_percentile = [] data_point = (flux_filter1, flux_filter2, flux_filter3) data_point = np.asarray(data_point) mc_file_name = file_name(my_dir, file_dir, z) mc_dict = load(mc_file_name) mc_dict = add_error(mc_dict, filter1, flux_filter1_err) mc_dict = add_error(mc_dict, filter2, flux_filter2_err) mc_dict = add_error(mc_dict, filter3, flux_filter3_err) # print 'redshift: ', z Ia_percentile = kde3d(mc_dict['type_Ia']['flux'][filter1], mc_dict['type_Ia']['flux'][filter2], mc_dict['type_Ia']['flux'][filter3], data_point) Ibc_percentile = kde3d(mc_dict['type_Ibc']['flux'][filter1], mc_dict['type_Ibc']['flux'][filter2], mc_dict['type_Ibc']['flux'][filter3], data_point) II_percentile = kde3d(mc_dict['type_II']['flux'][filter1], mc_dict['type_II']['flux'][filter2], mc_dict['type_II']['flux'][filter3], data_point) my_percentile = (Ia_percentile, Ibc_percentile, II_percentile) my_percentile = np.asarray(my_percentile) all_percentile.append(my_percentile) all_percentile = np.squeeze(all_percentile) return all_percentile
def get_data(my_dir, file_dir, filter1, filter2, filter3, flux_filter1_err, flux_filter2_err, flux_filter3_err, z): indices = [i for i, x in enumerate(file_dir) if x == '_'] n = int(file_dir[1:indices[0]]) n_Ibc_start = int(n) n_II_start = 2 * n_Ibc_start mc_file_name = file_name(my_dir, file_dir, z) mc_dict = load(mc_file_name) mc_dict = add_error(mc_dict, filter1, flux_filter1_err) mc_dict = add_error(mc_dict, filter2, flux_filter2_err) mc_dict = add_error(mc_dict, filter3, flux_filter3_err) all_filter1 = np.concatenate( (mc_dict['type_Ia']['flux'][filter1], mc_dict['type_Ibc']['flux'][filter1], mc_dict['type_II']['flux'][filter1])) all_filter2 = np.concatenate( (mc_dict['type_Ia']['flux'][filter2], mc_dict['type_Ibc']['flux'][filter2], mc_dict['type_II']['flux'][filter2])) all_filter3 = np.concatenate( (mc_dict['type_Ia']['flux'][filter3], mc_dict['type_Ibc']['flux'][filter3], mc_dict['type_II']['flux'][filter3])) data = [all_filter1, all_filter2, all_filter3] data = zip(*data) all_sources = pd.DataFrame(data, columns=[filter1, filter2, filter3], index=range(3*n)) all_sources['Type'] = 'Type Ia' all_sources.ix[n_Ibc_start:n_II_start, 'Type'] = 'Type Ibc' all_sources.ix[n_II_start:, 'Type'] = 'Type II' all_features = copy.copy(all_sources) all_label = all_sources["Type"] del all_features["Type"] X = copy.copy(all_features.values) Y = copy.copy(all_label.values) return X, Y
def flux_fluxDiff_arrays(my_dir, file_dir, filter1, filter2, z): mc_file_name = file_name(my_dir, file_dir, z) mc_dict = load(mc_file_name) type_Ia_flux_diff = (mc_dict['type_Ia']['flux'][filter2] - mc_dict['type_Ia']['flux'][filter1]) type_Ibc_flux_diff = (mc_dict['type_Ibc']['flux'][filter2] - mc_dict['type_Ibc']['flux'][filter1]) type_II_flux_diff = (mc_dict['type_II']['flux'][filter2] - mc_dict['type_II']['flux'][filter1]) flux = [mc_dict['type_Ia']['flux'][filter1], mc_dict['type_Ibc']['flux'][filter1], mc_dict['type_II']['flux'][filter1]] diff = [type_Ia_flux_diff, type_Ibc_flux_diff, type_II_flux_diff] for i, item in enumerate(diff): flux[i] = mask(flux[i], 98) diff[i] = mask(diff[i], 98) return flux, diff
def get_data(my_dir, file_dir, filter1, filter2, filter3, flux_filter1_err, flux_filter2_err, flux_filter3_err, z): indices = [i for i, x in enumerate(file_dir) if x == '_'] n = int(file_dir[1:indices[0]]) n_Ibc_start = int(n) n_II_start = 2 * n_Ibc_start mc_file_name = file_name(my_dir, file_dir, z) mc_dict = load(mc_file_name) mc_dict = add_error(mc_dict, filter1, flux_filter1_err) mc_dict = add_error(mc_dict, filter2, flux_filter2_err) mc_dict = add_error(mc_dict, filter3, flux_filter3_err) all_filter1 = np.concatenate((mc_dict['type_Ia']['flux'][filter1], mc_dict['type_Ibc']['flux'][filter1], mc_dict['type_II']['flux'][filter1])) all_filter2 = np.concatenate((mc_dict['type_Ia']['flux'][filter2], mc_dict['type_Ibc']['flux'][filter2], mc_dict['type_II']['flux'][filter2])) all_filter3 = np.concatenate((mc_dict['type_Ia']['flux'][filter3], mc_dict['type_Ibc']['flux'][filter3], mc_dict['type_II']['flux'][filter3])) data = [all_filter1, all_filter2, all_filter3] data = zip(*data) all_sources = pd.DataFrame(data, columns=[filter1, filter2, filter3], index=range(3 * n)) all_sources['Type'] = 'Type Ia' all_sources.ix[n_Ibc_start:n_II_start, 'Type'] = 'Type Ibc' all_sources.ix[n_II_start:, 'Type'] = 'Type II' all_features = copy.copy(all_sources) all_label = all_sources["Type"] del all_features["Type"] X = copy.copy(all_features.values) Y = copy.copy(all_label.values) return X, Y
def flux_fluxDiff_arrays(my_dir, file_dir, filter1, filter2, z): mc_file_name = file_name(my_dir, file_dir, z) mc_dict = load(mc_file_name) type_Ia_flux_diff = (mc_dict['type_Ia']['flux'][filter2] - mc_dict['type_Ia']['flux'][filter1]) type_Ibc_flux_diff = (mc_dict['type_Ibc']['flux'][filter2] - mc_dict['type_Ibc']['flux'][filter1]) type_II_flux_diff = (mc_dict['type_II']['flux'][filter2] - mc_dict['type_II']['flux'][filter1]) flux = [ mc_dict['type_Ia']['flux'][filter1], mc_dict['type_Ibc']['flux'][filter1], mc_dict['type_II']['flux'][filter1] ] diff = [type_Ia_flux_diff, type_Ibc_flux_diff, type_II_flux_diff] for i, item in enumerate(diff): flux[i] = mask(flux[i], 98) diff[i] = mask(diff[i], 98) return flux, diff
def combined2D(final_pdf, my_dir, file_dir, filter1, filter2, flux_filter1, flux_filter2, flux_filter1_err, flux_filter2_err, outdir, file_with_RF_and_SF_arrays, photoz_plot_name, random_forest_plot_name, survival_function_plot_name, final_pdf_plot_name, photo_z_type=None, photo_z_file=None, photo_z_redshift_file=None, mu=None, sigma=None): if not os.path.exists(outdir): os.makedirs(outdir) if not os.path.isfile(file_with_RF_and_SF_arrays): file_name = save_arrays(my_dir, file_dir, filter1, filter2, flux_filter1, flux_filter2, flux_filter1_err, flux_filter2_err, outdir) my_dict = load(file_name) else: my_dict = load(file_with_RF_and_SF_arrays) rf = my_dict['rf'] sf = my_dict['sf'] z = my_dict['rf_z'] rf = np.asarray(rf) sf = np.asarray(sf) if photo_z_type is not False: photo_z, my_z = iterator('photo_z', my_dir, file_dir, filter1, filter2, flux_filter1, flux_filter2, flux_filter1_err, flux_filter2_err, photo_z_type=photo_z_type, photo_z_file=photo_z_file, photo_z_redshift_file=photo_z_redshift_file, mu=mu, sigma=sigma) if photo_z_type == 'file': title_photoz = 'Galaxy Photo-z from file' else: title_photoz = ("Galaxy Photo-z: Gaussian( mu=%.2f, sigma=%.2f)" % (mu, sigma)) if final_pdf == 'RF+SF+photoz': plt.clf() title = ['Random Forest', 'Survival Function', 'Photo-z'] plot(z, rf, 'PDF', 'Random Forest', outdir, outname=random_forest_plot_name) plot(z, sf, '1 - CDF', 'Survival Function', outdir, outname=survival_function_plot_name) plot(z, photo_z, 'PDF', title_photoz, outdir, outname=photoz_plot_name) first_product = np.multiply(sf, rf) product = (np.multiply(first_product.T, photo_z)).T x = [z, z, z, z] y = [rf, sf, photo_z, product] subplot(x, y, title, outdir, final_pdf_plot_name, title_photoz) # norm_product = product/product.sum() elif final_pdf == 'RF+SF': plt.clf() title = ['Random Forest', 'Survival Function', ''] plot(z, rf, 'PDF', 'Random Forest', outdir, outname=random_forest_plot_name) plot(z, sf, '1 - CDF', 'Survival Function', outdir, outname=survival_function_plot_name) product = np.multiply(rf, sf) x = [z, z, [], z] y = [rf, sf, [], product] subplot(x, y, title, outdir, final_pdf_plot_name) # norm_product = product/product.sum() elif final_pdf == 'RF+photoz': plt.clf() title = ['Random Forest', '', 'Photo-z'] plot(z, rf, 'PDF', 'Random Forest', outdir, outname=random_forest_plot_name) plot(z, photo_z, 'PDF', title_photoz, outdir, outname=photoz_plot_name) product = np.multiply(rf.T, photo_z) product = product.T # norm_product = product/product.sum() x = [z, [], z, z] y = [rf, [], photo_z, product] subplot(x, y, title, outdir, final_pdf_plot_name, title_photoz) elif final_pdf == 'SF+photoz': plt.clf() title = ['', 'Survival Function', 'Photo-z'] plot(z, sf, '1 - CDF', 'Survival Function', outdir, outname=survival_function_plot_name) plot(z, photo_z, 'PDF', title_photoz, outdir, outname=photoz_plot_name) product = np.multiply(sf.T, photo_z) product = product.T # norm_product = product/product.sum() x = [[], z, z, z] y = [[], sf, photo_z, product] subplot(x, y, title, outdir, final_pdf_plot_name, title_photoz) elif final_pdf == 'RF': plt.clf() plot(z, rf, 'PDF', 'Random Forest', outdir, outname=random_forest_plot_name) elif final_pdf == 'SF': plt.clf() plot(z, sf, '1 - CDF', 'Survival Function', outdir, outname=survival_function_plot_name) elif final_pdf == 'photoz': plt.clf() plot(z, photo_z, 'PDF', title_photoz, outdir, outname=photoz_plot_name)
def combined2D(final_pdf, my_dir, file_dir, filter1, filter2, flux_filter1, flux_filter2, flux_filter1_err, flux_filter2_err, outdir, file_with_RF_and_SF_arrays, photoz_plot_name, random_forest_plot_name, survival_function_plot_name, final_pdf_plot_name, photo_z_type=None, photo_z_file=None, photo_z_redshift_file=None, mu=None, sigma=None): if not os.path.exists(outdir): os.makedirs(outdir) if not os.path.isfile(file_with_RF_and_SF_arrays): file_name = save_arrays( my_dir, file_dir, filter1, filter2, flux_filter1, flux_filter2, flux_filter1_err, flux_filter2_err, outdir) my_dict = load(file_name) else: my_dict = load(file_with_RF_and_SF_arrays) rf = my_dict['rf'] sf = my_dict['sf'] z = my_dict['rf_z'] rf = np.asarray(rf) sf = np.asarray(sf) if photo_z_type is not False: photo_z, my_z = iterator('photo_z', my_dir, file_dir, filter1, filter2, flux_filter1, flux_filter2, flux_filter1_err, flux_filter2_err, photo_z_type=photo_z_type, photo_z_file=photo_z_file, photo_z_redshift_file=photo_z_redshift_file, mu=mu, sigma=sigma) if photo_z_type == 'file': title_photoz = 'Galaxy Photo-z from file' else: title_photoz = ("Galaxy Photo-z: Gaussian( mu=%.2f, sigma=%.2f)" % (mu, sigma)) if final_pdf == 'RF+SF+photoz': plt.clf() title = ['Random Forest', 'Survival Function', 'Photo-z'] plot(z, rf, 'PDF', 'Random Forest', outdir, outname=random_forest_plot_name) plot(z, sf, '1 - CDF', 'Survival Function', outdir, outname=survival_function_plot_name) plot(z, photo_z, 'PDF', title_photoz, outdir, outname=photoz_plot_name) first_product = np.multiply(sf, rf) product = (np.multiply(first_product.T, photo_z)).T x = [z, z, z, z] y = [rf, sf, photo_z, product] subplot(x, y, title, outdir, final_pdf_plot_name, title_photoz) # norm_product = product/product.sum() elif final_pdf == 'RF+SF': plt.clf() title = ['Random Forest', 'Survival Function', ''] plot(z, rf, 'PDF', 'Random Forest', outdir, outname=random_forest_plot_name) plot(z, sf, '1 - CDF', 'Survival Function', outdir, outname=survival_function_plot_name) product = np.multiply(rf, sf) x = [z, z, [], z] y = [rf, sf, [], product] subplot(x, y, title, outdir, final_pdf_plot_name) # norm_product = product/product.sum() elif final_pdf == 'RF+photoz': plt.clf() title = ['Random Forest', '', 'Photo-z'] plot(z, rf, 'PDF', 'Random Forest', outdir, outname=random_forest_plot_name) plot(z, photo_z, 'PDF', title_photoz, outdir, outname=photoz_plot_name) product = np.multiply(rf.T, photo_z) product = product.T # norm_product = product/product.sum() x = [z, [], z, z] y = [rf, [], photo_z, product] subplot(x, y, title, outdir, final_pdf_plot_name, title_photoz) elif final_pdf == 'SF+photoz': plt.clf() title = ['', 'Survival Function', 'Photo-z'] plot(z, sf, '1 - CDF', 'Survival Function', outdir, outname=survival_function_plot_name) plot(z, photo_z, 'PDF', title_photoz, outdir, outname=photoz_plot_name) product = np.multiply(sf.T, photo_z) product = product.T # norm_product = product/product.sum() x = [[], z, z, z] y = [[], sf, photo_z, product] subplot(x, y, title, outdir, final_pdf_plot_name, title_photoz) elif final_pdf == 'RF': plt.clf() plot(z, rf, 'PDF', 'Random Forest', outdir, outname=random_forest_plot_name) elif final_pdf == 'SF': plt.clf() plot(z, sf, '1 - CDF', 'Survival Function', outdir, outname=survival_function_plot_name) elif final_pdf == 'photoz': plt.clf() plot(z, photo_z, 'PDF', title_photoz, outdir, outname=photoz_plot_name)