Example #1
0
def find_percentile(my_dir, file_dir, filter1, filter2, filter3, flux_filter1,
                    flux_filter2, flux_filter3, flux_filter1_err,
                    flux_filter2_err, flux_filter3_err, z):

    all_percentile = []
    data_point = (flux_filter1, flux_filter2, flux_filter3)
    data_point = np.asarray(data_point)

    mc_file_name = file_name(my_dir, file_dir, z)
    mc_dict = load(mc_file_name)
    mc_dict = add_error(mc_dict, filter1, flux_filter1_err)
    mc_dict = add_error(mc_dict, filter2, flux_filter2_err)
    mc_dict = add_error(mc_dict, filter3, flux_filter3_err)

    # print 'redshift: ', z

    Ia_percentile = kde3d(mc_dict['type_Ia']['flux'][filter1],
                          mc_dict['type_Ia']['flux'][filter2],
                          mc_dict['type_Ia']['flux'][filter3], data_point)

    Ibc_percentile = kde3d(mc_dict['type_Ibc']['flux'][filter1],
                           mc_dict['type_Ibc']['flux'][filter2],
                           mc_dict['type_Ibc']['flux'][filter3], data_point)

    II_percentile = kde3d(mc_dict['type_II']['flux'][filter1],
                          mc_dict['type_II']['flux'][filter2],
                          mc_dict['type_II']['flux'][filter3], data_point)

    my_percentile = (Ia_percentile, Ibc_percentile, II_percentile)
    my_percentile = np.asarray(my_percentile)
    all_percentile.append(my_percentile)
    all_percentile = np.squeeze(all_percentile)

    return all_percentile
Example #2
0
def get_data(my_dir, file_dir, filter1, filter2, filter3, flux_filter1_err,
             flux_filter2_err, flux_filter3_err, z):

    indices = [i for i, x in enumerate(file_dir) if x == '_']
    n = int(file_dir[1:indices[0]])
    n_Ibc_start = int(n)
    n_II_start = 2 * n_Ibc_start

    mc_file_name = file_name(my_dir, file_dir, z)
    mc_dict = load(mc_file_name)
    mc_dict = add_error(mc_dict, filter1, flux_filter1_err)
    mc_dict = add_error(mc_dict, filter2, flux_filter2_err)
    mc_dict = add_error(mc_dict, filter3, flux_filter3_err)

    all_filter1 = np.concatenate(
                (mc_dict['type_Ia']['flux'][filter1],
                 mc_dict['type_Ibc']['flux'][filter1],
                 mc_dict['type_II']['flux'][filter1]))

    all_filter2 = np.concatenate(
                (mc_dict['type_Ia']['flux'][filter2],
                 mc_dict['type_Ibc']['flux'][filter2],
                 mc_dict['type_II']['flux'][filter2]))

    all_filter3 = np.concatenate(
                (mc_dict['type_Ia']['flux'][filter3],
                 mc_dict['type_Ibc']['flux'][filter3],
                 mc_dict['type_II']['flux'][filter3]))

    data = [all_filter1, all_filter2, all_filter3]
    data = zip(*data)
    all_sources = pd.DataFrame(data, columns=[filter1, filter2, filter3],
                               index=range(3*n))
    all_sources['Type'] = 'Type Ia'
    all_sources.ix[n_Ibc_start:n_II_start, 'Type'] = 'Type Ibc'
    all_sources.ix[n_II_start:, 'Type'] = 'Type II'
    all_features = copy.copy(all_sources)
    all_label = all_sources["Type"]
    del all_features["Type"]

    X = copy.copy(all_features.values)
    Y = copy.copy(all_label.values)

    return X, Y
Example #3
0
def flux_fluxDiff_arrays(my_dir, file_dir, filter1, filter2, z):

    mc_file_name = file_name(my_dir, file_dir, z)
    mc_dict = load(mc_file_name)
    type_Ia_flux_diff = (mc_dict['type_Ia']['flux'][filter2] -
                         mc_dict['type_Ia']['flux'][filter1])
    type_Ibc_flux_diff = (mc_dict['type_Ibc']['flux'][filter2] -
                          mc_dict['type_Ibc']['flux'][filter1])
    type_II_flux_diff = (mc_dict['type_II']['flux'][filter2] -
                         mc_dict['type_II']['flux'][filter1])
    flux = [mc_dict['type_Ia']['flux'][filter1],
            mc_dict['type_Ibc']['flux'][filter1],
            mc_dict['type_II']['flux'][filter1]]
    diff = [type_Ia_flux_diff, type_Ibc_flux_diff, type_II_flux_diff]
    for i, item in enumerate(diff):
        flux[i] = mask(flux[i], 98)
        diff[i] = mask(diff[i], 98)

    return flux, diff
Example #4
0
def get_data(my_dir, file_dir, filter1, filter2, filter3, flux_filter1_err,
             flux_filter2_err, flux_filter3_err, z):

    indices = [i for i, x in enumerate(file_dir) if x == '_']
    n = int(file_dir[1:indices[0]])
    n_Ibc_start = int(n)
    n_II_start = 2 * n_Ibc_start

    mc_file_name = file_name(my_dir, file_dir, z)
    mc_dict = load(mc_file_name)
    mc_dict = add_error(mc_dict, filter1, flux_filter1_err)
    mc_dict = add_error(mc_dict, filter2, flux_filter2_err)
    mc_dict = add_error(mc_dict, filter3, flux_filter3_err)

    all_filter1 = np.concatenate((mc_dict['type_Ia']['flux'][filter1],
                                  mc_dict['type_Ibc']['flux'][filter1],
                                  mc_dict['type_II']['flux'][filter1]))

    all_filter2 = np.concatenate((mc_dict['type_Ia']['flux'][filter2],
                                  mc_dict['type_Ibc']['flux'][filter2],
                                  mc_dict['type_II']['flux'][filter2]))

    all_filter3 = np.concatenate((mc_dict['type_Ia']['flux'][filter3],
                                  mc_dict['type_Ibc']['flux'][filter3],
                                  mc_dict['type_II']['flux'][filter3]))

    data = [all_filter1, all_filter2, all_filter3]
    data = zip(*data)
    all_sources = pd.DataFrame(data,
                               columns=[filter1, filter2, filter3],
                               index=range(3 * n))
    all_sources['Type'] = 'Type Ia'
    all_sources.ix[n_Ibc_start:n_II_start, 'Type'] = 'Type Ibc'
    all_sources.ix[n_II_start:, 'Type'] = 'Type II'
    all_features = copy.copy(all_sources)
    all_label = all_sources["Type"]
    del all_features["Type"]

    X = copy.copy(all_features.values)
    Y = copy.copy(all_label.values)

    return X, Y
Example #5
0
def flux_fluxDiff_arrays(my_dir, file_dir, filter1, filter2, z):

    mc_file_name = file_name(my_dir, file_dir, z)
    mc_dict = load(mc_file_name)
    type_Ia_flux_diff = (mc_dict['type_Ia']['flux'][filter2] -
                         mc_dict['type_Ia']['flux'][filter1])
    type_Ibc_flux_diff = (mc_dict['type_Ibc']['flux'][filter2] -
                          mc_dict['type_Ibc']['flux'][filter1])
    type_II_flux_diff = (mc_dict['type_II']['flux'][filter2] -
                         mc_dict['type_II']['flux'][filter1])
    flux = [
        mc_dict['type_Ia']['flux'][filter1],
        mc_dict['type_Ibc']['flux'][filter1],
        mc_dict['type_II']['flux'][filter1]
    ]
    diff = [type_Ia_flux_diff, type_Ibc_flux_diff, type_II_flux_diff]
    for i, item in enumerate(diff):
        flux[i] = mask(flux[i], 98)
        diff[i] = mask(diff[i], 98)

    return flux, diff
Example #6
0
def combined2D(final_pdf,
               my_dir,
               file_dir,
               filter1,
               filter2,
               flux_filter1,
               flux_filter2,
               flux_filter1_err,
               flux_filter2_err,
               outdir,
               file_with_RF_and_SF_arrays,
               photoz_plot_name,
               random_forest_plot_name,
               survival_function_plot_name,
               final_pdf_plot_name,
               photo_z_type=None,
               photo_z_file=None,
               photo_z_redshift_file=None,
               mu=None,
               sigma=None):

    if not os.path.exists(outdir):
        os.makedirs(outdir)

    if not os.path.isfile(file_with_RF_and_SF_arrays):
        file_name = save_arrays(my_dir, file_dir, filter1, filter2,
                                flux_filter1, flux_filter2, flux_filter1_err,
                                flux_filter2_err, outdir)
        my_dict = load(file_name)

    else:
        my_dict = load(file_with_RF_and_SF_arrays)
    rf = my_dict['rf']
    sf = my_dict['sf']
    z = my_dict['rf_z']

    rf = np.asarray(rf)
    sf = np.asarray(sf)

    if photo_z_type is not False:
        photo_z, my_z = iterator('photo_z',
                                 my_dir,
                                 file_dir,
                                 filter1,
                                 filter2,
                                 flux_filter1,
                                 flux_filter2,
                                 flux_filter1_err,
                                 flux_filter2_err,
                                 photo_z_type=photo_z_type,
                                 photo_z_file=photo_z_file,
                                 photo_z_redshift_file=photo_z_redshift_file,
                                 mu=mu,
                                 sigma=sigma)
        if photo_z_type == 'file':
            title_photoz = 'Galaxy Photo-z from file'
        else:
            title_photoz = ("Galaxy Photo-z: Gaussian( mu=%.2f, sigma=%.2f)" %
                            (mu, sigma))

    if final_pdf == 'RF+SF+photoz':
        plt.clf()
        title = ['Random Forest', 'Survival Function', 'Photo-z']
        plot(z,
             rf,
             'PDF',
             'Random Forest',
             outdir,
             outname=random_forest_plot_name)
        plot(z,
             sf,
             '1 - CDF',
             'Survival Function',
             outdir,
             outname=survival_function_plot_name)
        plot(z, photo_z, 'PDF', title_photoz, outdir, outname=photoz_plot_name)

        first_product = np.multiply(sf, rf)
        product = (np.multiply(first_product.T, photo_z)).T

        x = [z, z, z, z]
        y = [rf, sf, photo_z, product]
        subplot(x, y, title, outdir, final_pdf_plot_name, title_photoz)
        # norm_product = product/product.sum()

    elif final_pdf == 'RF+SF':
        plt.clf()
        title = ['Random Forest', 'Survival Function', '']
        plot(z,
             rf,
             'PDF',
             'Random Forest',
             outdir,
             outname=random_forest_plot_name)
        plot(z,
             sf,
             '1 - CDF',
             'Survival Function',
             outdir,
             outname=survival_function_plot_name)
        product = np.multiply(rf, sf)

        x = [z, z, [], z]
        y = [rf, sf, [], product]
        subplot(x, y, title, outdir, final_pdf_plot_name)

        # norm_product = product/product.sum()

    elif final_pdf == 'RF+photoz':
        plt.clf()
        title = ['Random Forest', '', 'Photo-z']
        plot(z,
             rf,
             'PDF',
             'Random Forest',
             outdir,
             outname=random_forest_plot_name)
        plot(z, photo_z, 'PDF', title_photoz, outdir, outname=photoz_plot_name)

        product = np.multiply(rf.T, photo_z)
        product = product.T
        # norm_product = product/product.sum()

        x = [z, [], z, z]
        y = [rf, [], photo_z, product]
        subplot(x, y, title, outdir, final_pdf_plot_name, title_photoz)

    elif final_pdf == 'SF+photoz':
        plt.clf()
        title = ['', 'Survival Function', 'Photo-z']
        plot(z,
             sf,
             '1 - CDF',
             'Survival Function',
             outdir,
             outname=survival_function_plot_name)
        plot(z, photo_z, 'PDF', title_photoz, outdir, outname=photoz_plot_name)

        product = np.multiply(sf.T, photo_z)
        product = product.T
        # norm_product = product/product.sum()

        x = [[], z, z, z]
        y = [[], sf, photo_z, product]
        subplot(x, y, title, outdir, final_pdf_plot_name, title_photoz)

    elif final_pdf == 'RF':
        plt.clf()
        plot(z,
             rf,
             'PDF',
             'Random Forest',
             outdir,
             outname=random_forest_plot_name)

    elif final_pdf == 'SF':
        plt.clf()
        plot(z,
             sf,
             '1 - CDF',
             'Survival Function',
             outdir,
             outname=survival_function_plot_name)

    elif final_pdf == 'photoz':
        plt.clf()
        plot(z, photo_z, 'PDF', title_photoz, outdir, outname=photoz_plot_name)
Example #7
0
def combined2D(final_pdf, my_dir, file_dir,
               filter1, filter2,
               flux_filter1, flux_filter2,
               flux_filter1_err, flux_filter2_err,
               outdir, file_with_RF_and_SF_arrays, photoz_plot_name,
               random_forest_plot_name, survival_function_plot_name,
               final_pdf_plot_name, photo_z_type=None, photo_z_file=None,
               photo_z_redshift_file=None, mu=None, sigma=None):

    if not os.path.exists(outdir):
        os.makedirs(outdir)

    if not os.path.isfile(file_with_RF_and_SF_arrays):
        file_name = save_arrays(
            my_dir, file_dir, filter1, filter2, flux_filter1,
            flux_filter2, flux_filter1_err, flux_filter2_err, outdir)
        my_dict = load(file_name)

    else:
        my_dict = load(file_with_RF_and_SF_arrays)
    rf = my_dict['rf']
    sf = my_dict['sf']
    z = my_dict['rf_z']

    rf = np.asarray(rf)
    sf = np.asarray(sf)

    if photo_z_type is not False:
        photo_z, my_z = iterator('photo_z', my_dir, file_dir, filter1,
                                 filter2, flux_filter1, flux_filter2,
                                 flux_filter1_err,
                                 flux_filter2_err,
                                 photo_z_type=photo_z_type,
                                 photo_z_file=photo_z_file,
                                 photo_z_redshift_file=photo_z_redshift_file,
                                 mu=mu, sigma=sigma)
        if photo_z_type == 'file':
            title_photoz = 'Galaxy Photo-z from file'
        else:
            title_photoz = ("Galaxy Photo-z: Gaussian( mu=%.2f, sigma=%.2f)"
                            % (mu, sigma))

    if final_pdf == 'RF+SF+photoz':
        plt.clf()
        title = ['Random Forest', 'Survival Function', 'Photo-z']
        plot(z, rf, 'PDF', 'Random Forest', outdir,
             outname=random_forest_plot_name)
        plot(z, sf, '1 - CDF', 'Survival Function', outdir,
             outname=survival_function_plot_name)
        plot(z, photo_z, 'PDF', title_photoz, outdir, outname=photoz_plot_name)

        first_product = np.multiply(sf, rf)
        product = (np.multiply(first_product.T, photo_z)).T

        x = [z, z, z, z]
        y = [rf, sf, photo_z, product]
        subplot(x, y, title, outdir, final_pdf_plot_name, title_photoz)
        # norm_product = product/product.sum()

    elif final_pdf == 'RF+SF':
        plt.clf()
        title = ['Random Forest', 'Survival Function', '']
        plot(z, rf, 'PDF', 'Random Forest', outdir,
             outname=random_forest_plot_name)
        plot(z, sf, '1 - CDF', 'Survival Function', outdir,
             outname=survival_function_plot_name)
        product = np.multiply(rf, sf)

        x = [z, z, [], z]
        y = [rf, sf, [], product]
        subplot(x, y, title, outdir, final_pdf_plot_name)

        # norm_product = product/product.sum()

    elif final_pdf == 'RF+photoz':
        plt.clf()
        title = ['Random Forest', '', 'Photo-z']
        plot(z, rf, 'PDF', 'Random Forest', outdir,
             outname=random_forest_plot_name)
        plot(z, photo_z, 'PDF', title_photoz, outdir, outname=photoz_plot_name)

        product = np.multiply(rf.T, photo_z)
        product = product.T
        # norm_product = product/product.sum()

        x = [z, [], z, z]
        y = [rf, [], photo_z, product]
        subplot(x, y, title, outdir, final_pdf_plot_name, title_photoz)

    elif final_pdf == 'SF+photoz':
        plt.clf()
        title = ['', 'Survival Function', 'Photo-z']
        plot(z, sf, '1 - CDF', 'Survival Function', outdir,
             outname=survival_function_plot_name)
        plot(z, photo_z, 'PDF', title_photoz, outdir, outname=photoz_plot_name)

        product = np.multiply(sf.T, photo_z)
        product = product.T
        # norm_product = product/product.sum()

        x = [[], z, z, z]
        y = [[], sf, photo_z, product]
        subplot(x, y, title, outdir, final_pdf_plot_name, title_photoz)

    elif final_pdf == 'RF':
        plt.clf()
        plot(z, rf, 'PDF', 'Random Forest', outdir,
             outname=random_forest_plot_name)

    elif final_pdf == 'SF':
        plt.clf()
        plot(z, sf, '1 - CDF', 'Survival Function', outdir,
             outname=survival_function_plot_name)

    elif final_pdf == 'photoz':
        plt.clf()
        plot(z, photo_z, 'PDF', title_photoz, outdir, outname=photoz_plot_name)