Exemple #1
0
def detection_efficiency_for_field(field, ccds=range(12), config=dict(), overwrite=False, indices=["eta","sigma_mu","j","k", "delta_chi_squared"], plot=True):
    """ Run a detection efficiency simulation for a PTF field """   
    
    # Get configuration variables or defaults
    min_number_of_good_observations = config.get("min_number_of_good_observations", 100)
    number_of_fpr_light_curves = config.get("number_of_fpr_light_curves", 10)
    number_of_fpr_simulations_per_light_curve = config.get("number_of_fpr_simulations_per_light_curve", 10)
    number_of_microlensing_light_curves = config.get("number_of_microlensing_light_curves", 10)
    number_of_microlensing_simulations_per_light_curve = config.get("number_of_microlensing_simulations_per_light_curve", 10)
    
    # Convenience variables for filenames
    file_base = "field{:06d}_Nperccd{}_Nevents{}".format(field.id, number_of_microlensing_light_curves, number_of_microlensing_simulations_per_light_curve) + ".{ext}"
    pickle_filename = os.path.join("data", "new_detection_efficiency", file_base.format(ext="pickle"))
    plot_filename = os.path.join("plots", "new_detection_efficiency", file_base.format(ext="pdf"))
    
    if not os.path.exists(os.path.dirname(pickle_filename)):
        os.mkdir(os.path.dirname(pickle_filename))
    
    if not os.path.exists(os.path.dirname(plot_filename)):
        os.mkdir(os.path.dirname(plot_filename))
    
    if os.path.exists(pickle_filename) and overwrite:
        logger.debug("Data file exists, but you want to overwrite it!")
        os.remove(pickle_filename)
        logger.debug("Data file deleted...")
    
    #print(pickle_filename, os.path.exists(pickle_filename))

    # If the cache pickle file doesn't exist, generate the data
    if not os.path.exists(pickle_filename):
        logger.info("Data file {} not found. Generating data...".format(pickle_filename))
        
        # Initialize my PDB statistic dictionary
        # I use a dictionary here because after doing some sub-selection the index arrays may 
        #   have difference lengths.
        pdb_statistics = dict()
        for index in indices:
            pdb_statistics[index] = np.array([])
            
        for ccd in field.ccds.values():
            if ccd.id not in ccds: continue
            
            logger.info(greenText("Starting with CCD {}".format(ccd.id)))
            chip = ccd.read()
            
            logger.info("Getting variability statistics from photometric database")
            source_ids = []
            pdb_statistics_array = []
            for source in chip.sources.where("(ngoodobs > {})".format(min_number_of_good_observations)):
                pdb_statistics_array.append(tuple([source_index_name_to_pdb_index(source,index) for index in indices]))
                source_ids.append(source["matchedSourceID"])
            pdb_statistics_array = np.array(pdb_statistics_array, dtype=[(index,float) for index in indices])
            
            logger.debug("Selected {} statistics".format(len(pdb_statistics_array)))
            
            # I use a dictionary here because after doing some sub-selection the index arrays may 
            #   have difference lengths.
            for index in indices:
                this_index_array = pdb_statistics_array[index]
                
                # This is where I need to define the selection distributions for each index.
                pdb_statistics[index] = np.append(pdb_statistics[index], prune_index_distribution(index, this_index_array))
            
            # Randomize the order of source_ids to prune through
            np.random.shuffle(source_ids)
            
            logger.info("Simulating light curves for false positive rate calculation")
            # Keep track of how many light curves we've used, break after we reach the specified number
            light_curve_count = 0
            for source_id in source_ids:
                light_curve = ccd.light_curve(source_id, barebones=True, clean=True)
                if len(light_curve.mjd) < min_number_of_good_observations: 
                    logger.debug("\tRejected source {}".format(source_id))
                    continue
                    
                logger.debug("\tSelected source {}".format(source_id))
                these_indices = vi.simulate_light_curves_compute_indices(light_curve, num_simulated=number_of_fpr_simulations_per_light_curve, indices=indices)
                try:
                    simulated_light_curve_statistics = np.hstack((simulated_light_curve_statistics, these_indices))
                except NameError:
                    simulated_light_curve_statistics = these_indices
                    
                light_curve_count += 1
                
                if light_curve_count >= number_of_fpr_light_curves:
                    break
                        
            logger.info("Starting microlensing event simulations")
            # Keep track of how many light curves we've used, break after we reach the specified number
            light_curve_count = 0            
            for source_id in source_ids:
                light_curve = ccd.light_curve(source_id, barebones=True, clean=True)
                if len(light_curve.mjd) < min_number_of_good_observations: 
                    logger.debug("\tRejected source {}".format(source_id))
                    continue
                
                logger.debug("\tSelected source {}".format(source_id))
                one_light_curve_statistics = vi.simulate_events_compute_indices(light_curve, events_per_light_curve=number_of_microlensing_simulations_per_light_curve, indices=indices)
                try:
                    simulated_microlensing_statistics = np.hstack((simulated_microlensing_statistics, one_light_curve_statistics))
                except NameError:
                    simulated_microlensing_statistics = one_light_curve_statistics

                light_curve_count += 1                
                if light_curve_count >= number_of_microlensing_light_curves:
                    break
            
            ccd.close()
        
        logger.info("Starting false positive rate calculation to get Nsigmas")
        # Now determine the N in N-sigma by computing the false positive rate and getting it to be ~0.01 (1%) for each index
        selection_criteria = {}
        for index in indices:
            logger.debug("\tIndex: {}".format(index))
            # Get the mean and standard deviation of the 'vanilla' distributions to select with
            mu,sigma = np.mean(pdb_statistics[index]), np.std(pdb_statistics[index])
            logger.debug("\t mu={}, sigma={}".format(mu, sigma))
            
            # Get the simulated statistics for this index
            these_statistics = np.log10(simulated_light_curve_statistics[index])
            
            # Start by selecting with Nsigma = 0
            Nsigma = 0.
            
            # Nsteps is the number of steps this routine has to take to converge -- just used for diagnostics
            Nsteps = 0
            while True:
                fpr = np.sum((these_statistics > (mu + Nsigma*sigma)) | (these_statistics < (mu - Nsigma*sigma))) / float(len(these_statistics))
                logger.debug("Step: {}, FPR: {}".format(Nsteps, fpr))
                
                # WARNING: If you don't use enough simulations, this may never converge!
                if fpr > 0.012: 
                    Nsigma += np.random.uniform(0., 0.05)
                elif fpr < 0.008:
                    Nsigma -= np.random.uniform(0., 0.05)
                else:
                    break
                
                Nsteps += 1
                
                if Nsteps > 1000:
                    logger.warn("{} didn't converge!".format(index))
                    break
                
            logger.info("{} -- Final Num. steps: {}, Final FPR: {}".format(index, Nsteps, fpr))
            logger.info("{} -- Final Nsigma={}, Nsigma*sigma={}".format(index, Nsigma, Nsigma*sigma))
            
            selection_criteria[index] = dict()
            selection_criteria[index]["upper"] = mu + Nsigma*sigma
            selection_criteria[index]["lower"] = mu - Nsigma*sigma
        
        f = open(pickle_filename, "w")
        pickle.dump((simulated_microlensing_statistics, selection_criteria), f)
        f.close()       
        
    f = open(pickle_filename, "r")
    (simulated_microlensing_statistics, selection_criteria) = pickle.load(f)
    f.close()
    
    # Now compute the detection efficiency of each index using the selection criteria from the false positive rate simulation
    selected_distributions = {}
    detection_efficiencies = {}
    for index in indices:
        #this_index_values = simulated_microlensing_statistics[index]
        this_index_values = np.log10(simulated_microlensing_statistics[index])
        
        """
        if index == "eta":
            selection = this_index_values > 0
            this_index_values = np.log10(this_index_values[selection])
        elif index == "sigma_mu":
            selection = np.ones_like(this_index_values).astype(bool)
            this_index_values = np.log10(np.fabs(this_index_values))
        elif index == "j":
            selection = this_index_values > 0
            this_index_values = np.log10(this_index_values[selection])
        elif index == "k":
            selection = np.ones_like(this_index_values).astype(bool)
            this_index_values = np.log10(this_index_values)
        elif index == "delta_chi_squared":
            selection = this_index_values > 0
            this_index_values = np.log10(this_index_values[selection])
        """
        
        selected_ml_statistics = simulated_microlensing_statistics[(this_index_values > selection_criteria[index]["upper"]) | (this_index_values < selection_criteria[index]["lower"])]
        selected_distributions[index] = selected_ml_statistics
        
        total_detection_efficiency = len(selected_ml_statistics) / float(len(simulated_microlensing_statistics[index]))
        print "{}, eff={}".format(index, total_detection_efficiency)
        detection_efficiencies[index] = total_detection_efficiency
    
    if plot:
        plot_distributions(selected_distributions, simulated_microlensing_statistics, detection_efficiencies, params=["tE", "u0", "m"], filename=plot_filename, indices=indices)
    
    return simulated_microlensing_statistics, selected_distributions
Exemple #2
0
def variability_indices_distributions(field_id=100018, overwrite=False):
    field = pdb.Field(field_id, "R")

    indices = ["eta", "j", "delta_chi_squared", "sigma_mu", "k"]
    number_of_microlensing_light_curves = 1000
    number_of_microlensing_simulations_per_light_curve = 100
    min_number_of_good_observations = 100

    # Convenience variables for filenames
    file_base = "field{:06d}_Nperccd{}_Nevents{}".format(field.id, number_of_microlensing_light_curves, number_of_microlensing_simulations_per_light_curve) + ".{ext}"
    pickle_filename = os.path.join("data", "var_indices", file_base.format(ext="pickle"))
    plot_filename = os.path.join("plots", "var_indices", file_base.format(ext="pdf"))

    if not os.path.exists(os.path.dirname(pickle_filename)):
        os.mkdir(os.path.dirname(pickle_filename))

    if not os.path.exists(os.path.dirname(plot_filename)):
        os.mkdir(os.path.dirname(plot_filename))

    if os.path.exists(pickle_filename) and overwrite:
        logger.debug("Data file exists, but you want to overwrite it!")
        os.remove(pickle_filename)
        logger.debug("Data file deleted...")

    # If the cache pickle file doesn't exist, generate the data
    if not os.path.exists(pickle_filename):
        logger.info("Data file {} not found. Generating data...".format(pickle_filename))

        # Initialize my PDB statistic dictionary
        # I use a dictionary here because after doing some sub-selection the index arrays may
        #   have difference lengths.
        pdb_statistics = dict()
        for index in indices:
            pdb_statistics[index] = np.array([])

        for ccd in field.ccds.values():
            print "Starting with CCD {}".format(ccd.id)
            chip = ccd.read()

            pdb_statistics_array = []

            logger.info("Starting microlensing event simulations")
            # Keep track of how many light curves we've used, break after we reach the specified number
            light_curve_count = 0
            for source in chip.sources.where("(ngoodobs > {})".format(min_number_of_good_observations)):
                source_id = source["matchedSourceID"]

                light_curve = ccd.light_curve(source_id, barebones=True, clean=True)
                if len(light_curve.mjd) < min_number_of_good_observations:
                    continue

                # Add the pre-simulation statistics to an array
                lc_var_indices = pa.compute_variability_indices(light_curve, indices, return_tuple=True)
                pdb_statistics_array.append(lc_var_indices)

                one_light_curve_statistics = vi.simulate_events_compute_indices(light_curve, events_per_light_curve=number_of_microlensing_simulations_per_light_curve, indices=indices)
                try:
                    simulated_microlensing_statistics = np.hstack((simulated_microlensing_statistics, one_light_curve_statistics))
                except NameError:
                    simulated_microlensing_statistics = one_light_curve_statistics

                light_curve_count += 1
                if light_curve_count >= number_of_microlensing_light_curves:
                    break

            pdb_statistics_array = np.array(pdb_statistics_array, dtype=[(index,float) for index in indices])

            try:
                all_pdb_statistics_array = np.hstack((all_pdb_statistics_array, pdb_statistics_array))
            except NameError:
                all_pdb_statistics_array = pdb_statistics_array

            ccd.close()

        f = open(pickle_filename, "w")
        pickle.dump((all_pdb_statistics_array, simulated_microlensing_statistics), f)
        f.close()

    f = open(pickle_filename, "r")
    all_pdb_statistics_array, simulated_microlensing_statistics = pickle.load(f)
    f.close()

    selection_criteria = {
		"eta" : 0.16167735855516213,
		"delta_chi_squared" : 1.162994709319348,
		"j" : 1.601729135628142
	}

    index_pairs = [("eta", "delta_chi_squared"), ("eta", "j"), ("delta_chi_squared", "j")]

    nbins = 100
    for x_index, y_index in index_pairs:
        fig, axes = plt.subplots(1, 2, sharey=True, figsize=(15,7.5))

        # Variable data
        x = simulated_microlensing_statistics[x_index]
        y = simulated_microlensing_statistics[y_index]

        pos_x = x[(x > 0) & (y > 0)]
        pos_y = y[(x > 0) & (y > 0)]

        xbins_pos = np.logspace(np.log10(pos_x.min()), np.log10(pos_x.max()), nbins)
        ybins_pos = np.logspace(np.log10(pos_y.min()), np.log10(pos_y.max()), nbins)

        #print pos_x, pos_y, xbins_pos, ybins_pos
        H_pos, xedges_pos, yedges_pos = np.histogram2d(pos_x, pos_y, bins=[xbins_pos, ybins_pos])

        # Non-variable data
        x = all_pdb_statistics_array[x_index]
        y = all_pdb_statistics_array[y_index]

        pos_x = x[(x > 0) & (y > 0)]
        pos_y = y[(x > 0) & (y > 0)]

        H_pos_boring, xedges_pos, yedges_pos = np.histogram2d(pos_x, pos_y, bins=[xedges_pos, yedges_pos])

        ax1 = axes[1]
        #ax1.imshow(np.log10(H), interpolation="none", cmap=cm.gist_heat)
        ax1.pcolormesh(xedges_pos, yedges_pos, np.where(H_pos > 0, np.log10(H_pos), 0.).T, cmap=cm.Blues)
        ax1.set_xscale("log")
        ax1.set_yscale("log")
        ax1.set_xlim(xedges_pos[0], xedges_pos[-1])
        ax1.set_ylim(yedges_pos[0], yedges_pos[-1])

        ax1.set_xlabel(pu.index_to_label(x_index), fontsize=28)
        ax1.axhline(10.**selection_criteria[y_index], color='r', linestyle='--')
        ax1.axvline(10.**selection_criteria[x_index], color='r', linestyle='--')

        if x_index == "eta":
            ax1.fill_between([xedges_pos[0], 10.**selection_criteria[x_index]], 10.**selection_criteria[y_index], yedges_pos[-1], facecolor='red', alpha=0.1)
        elif x_index == "delta_chi_squared":
            ax1.fill_between([10.**selection_criteria[x_index], xedges_pos[-1]], 10.**selection_criteria[y_index], yedges_pos[-1], facecolor='red', alpha=0.1)

        ax2 = axes[0]
        ax2.pcolormesh(xedges_pos, yedges_pos, np.where(H_pos_boring > 0, np.log10(H_pos_boring), 0.).T, cmap=cm.Blues)
        ax2.set_xscale("log")
        ax2.set_yscale("log")
        ax2.set_xlim(xedges_pos[0], xedges_pos[-1])
        ax2.set_ylim(yedges_pos[0], yedges_pos[-1])

        ax2.set_xlabel(pu.index_to_label(x_index), fontsize=28)
        ax2.set_ylabel(pu.index_to_label(y_index), fontsize=28)
        ax2.axhline(10.**selection_criteria[y_index], color='r', linestyle='--')
        ax2.axvline(10.**selection_criteria[x_index], color='r', linestyle='--')

        if x_index == "eta":
            ax2.fill_between([xedges_pos[0], 10.**selection_criteria[x_index]], 10.**selection_criteria[y_index], yedges_pos[-1], facecolor='red', alpha=0.1)
        elif x_index == "delta_chi_squared":
            ax2.fill_between([10.**selection_criteria[x_index], xedges_pos[-1]], 10.**selection_criteria[y_index], yedges_pos[-1], facecolor='red', alpha=0.1)

        for ax in fig.axes:
            for ticklabel in ax.get_xticklabels()+ax.get_yticklabels():
                ticklabel.set_fontsize(18)

        fig.savefig(os.path.join(pg.plots_path, "paper_figures", "{}_vs_{}.pdf".format(x_index, y_index)), bbox_inches="tight")