def detection_efficiency_for_field(field, ccds=range(12), config=dict(), overwrite=False, indices=["eta","sigma_mu","j","k", "delta_chi_squared"], plot=True): """ Run a detection efficiency simulation for a PTF field """ # Get configuration variables or defaults min_number_of_good_observations = config.get("min_number_of_good_observations", 100) number_of_fpr_light_curves = config.get("number_of_fpr_light_curves", 10) number_of_fpr_simulations_per_light_curve = config.get("number_of_fpr_simulations_per_light_curve", 10) number_of_microlensing_light_curves = config.get("number_of_microlensing_light_curves", 10) number_of_microlensing_simulations_per_light_curve = config.get("number_of_microlensing_simulations_per_light_curve", 10) # Convenience variables for filenames file_base = "field{:06d}_Nperccd{}_Nevents{}".format(field.id, number_of_microlensing_light_curves, number_of_microlensing_simulations_per_light_curve) + ".{ext}" pickle_filename = os.path.join("data", "new_detection_efficiency", file_base.format(ext="pickle")) plot_filename = os.path.join("plots", "new_detection_efficiency", file_base.format(ext="pdf")) if not os.path.exists(os.path.dirname(pickle_filename)): os.mkdir(os.path.dirname(pickle_filename)) if not os.path.exists(os.path.dirname(plot_filename)): os.mkdir(os.path.dirname(plot_filename)) if os.path.exists(pickle_filename) and overwrite: logger.debug("Data file exists, but you want to overwrite it!") os.remove(pickle_filename) logger.debug("Data file deleted...") #print(pickle_filename, os.path.exists(pickle_filename)) # If the cache pickle file doesn't exist, generate the data if not os.path.exists(pickle_filename): logger.info("Data file {} not found. Generating data...".format(pickle_filename)) # Initialize my PDB statistic dictionary # I use a dictionary here because after doing some sub-selection the index arrays may # have difference lengths. pdb_statistics = dict() for index in indices: pdb_statistics[index] = np.array([]) for ccd in field.ccds.values(): if ccd.id not in ccds: continue logger.info(greenText("Starting with CCD {}".format(ccd.id))) chip = ccd.read() logger.info("Getting variability statistics from photometric database") source_ids = [] pdb_statistics_array = [] for source in chip.sources.where("(ngoodobs > {})".format(min_number_of_good_observations)): pdb_statistics_array.append(tuple([source_index_name_to_pdb_index(source,index) for index in indices])) source_ids.append(source["matchedSourceID"]) pdb_statistics_array = np.array(pdb_statistics_array, dtype=[(index,float) for index in indices]) logger.debug("Selected {} statistics".format(len(pdb_statistics_array))) # I use a dictionary here because after doing some sub-selection the index arrays may # have difference lengths. for index in indices: this_index_array = pdb_statistics_array[index] # This is where I need to define the selection distributions for each index. pdb_statistics[index] = np.append(pdb_statistics[index], prune_index_distribution(index, this_index_array)) # Randomize the order of source_ids to prune through np.random.shuffle(source_ids) logger.info("Simulating light curves for false positive rate calculation") # Keep track of how many light curves we've used, break after we reach the specified number light_curve_count = 0 for source_id in source_ids: light_curve = ccd.light_curve(source_id, barebones=True, clean=True) if len(light_curve.mjd) < min_number_of_good_observations: logger.debug("\tRejected source {}".format(source_id)) continue logger.debug("\tSelected source {}".format(source_id)) these_indices = vi.simulate_light_curves_compute_indices(light_curve, num_simulated=number_of_fpr_simulations_per_light_curve, indices=indices) try: simulated_light_curve_statistics = np.hstack((simulated_light_curve_statistics, these_indices)) except NameError: simulated_light_curve_statistics = these_indices light_curve_count += 1 if light_curve_count >= number_of_fpr_light_curves: break logger.info("Starting microlensing event simulations") # Keep track of how many light curves we've used, break after we reach the specified number light_curve_count = 0 for source_id in source_ids: light_curve = ccd.light_curve(source_id, barebones=True, clean=True) if len(light_curve.mjd) < min_number_of_good_observations: logger.debug("\tRejected source {}".format(source_id)) continue logger.debug("\tSelected source {}".format(source_id)) one_light_curve_statistics = vi.simulate_events_compute_indices(light_curve, events_per_light_curve=number_of_microlensing_simulations_per_light_curve, indices=indices) try: simulated_microlensing_statistics = np.hstack((simulated_microlensing_statistics, one_light_curve_statistics)) except NameError: simulated_microlensing_statistics = one_light_curve_statistics light_curve_count += 1 if light_curve_count >= number_of_microlensing_light_curves: break ccd.close() logger.info("Starting false positive rate calculation to get Nsigmas") # Now determine the N in N-sigma by computing the false positive rate and getting it to be ~0.01 (1%) for each index selection_criteria = {} for index in indices: logger.debug("\tIndex: {}".format(index)) # Get the mean and standard deviation of the 'vanilla' distributions to select with mu,sigma = np.mean(pdb_statistics[index]), np.std(pdb_statistics[index]) logger.debug("\t mu={}, sigma={}".format(mu, sigma)) # Get the simulated statistics for this index these_statistics = np.log10(simulated_light_curve_statistics[index]) # Start by selecting with Nsigma = 0 Nsigma = 0. # Nsteps is the number of steps this routine has to take to converge -- just used for diagnostics Nsteps = 0 while True: fpr = np.sum((these_statistics > (mu + Nsigma*sigma)) | (these_statistics < (mu - Nsigma*sigma))) / float(len(these_statistics)) logger.debug("Step: {}, FPR: {}".format(Nsteps, fpr)) # WARNING: If you don't use enough simulations, this may never converge! if fpr > 0.012: Nsigma += np.random.uniform(0., 0.05) elif fpr < 0.008: Nsigma -= np.random.uniform(0., 0.05) else: break Nsteps += 1 if Nsteps > 1000: logger.warn("{} didn't converge!".format(index)) break logger.info("{} -- Final Num. steps: {}, Final FPR: {}".format(index, Nsteps, fpr)) logger.info("{} -- Final Nsigma={}, Nsigma*sigma={}".format(index, Nsigma, Nsigma*sigma)) selection_criteria[index] = dict() selection_criteria[index]["upper"] = mu + Nsigma*sigma selection_criteria[index]["lower"] = mu - Nsigma*sigma f = open(pickle_filename, "w") pickle.dump((simulated_microlensing_statistics, selection_criteria), f) f.close() f = open(pickle_filename, "r") (simulated_microlensing_statistics, selection_criteria) = pickle.load(f) f.close() # Now compute the detection efficiency of each index using the selection criteria from the false positive rate simulation selected_distributions = {} detection_efficiencies = {} for index in indices: #this_index_values = simulated_microlensing_statistics[index] this_index_values = np.log10(simulated_microlensing_statistics[index]) """ if index == "eta": selection = this_index_values > 0 this_index_values = np.log10(this_index_values[selection]) elif index == "sigma_mu": selection = np.ones_like(this_index_values).astype(bool) this_index_values = np.log10(np.fabs(this_index_values)) elif index == "j": selection = this_index_values > 0 this_index_values = np.log10(this_index_values[selection]) elif index == "k": selection = np.ones_like(this_index_values).astype(bool) this_index_values = np.log10(this_index_values) elif index == "delta_chi_squared": selection = this_index_values > 0 this_index_values = np.log10(this_index_values[selection]) """ selected_ml_statistics = simulated_microlensing_statistics[(this_index_values > selection_criteria[index]["upper"]) | (this_index_values < selection_criteria[index]["lower"])] selected_distributions[index] = selected_ml_statistics total_detection_efficiency = len(selected_ml_statistics) / float(len(simulated_microlensing_statistics[index])) print "{}, eff={}".format(index, total_detection_efficiency) detection_efficiencies[index] = total_detection_efficiency if plot: plot_distributions(selected_distributions, simulated_microlensing_statistics, detection_efficiencies, params=["tE", "u0", "m"], filename=plot_filename, indices=indices) return simulated_microlensing_statistics, selected_distributions
def variability_indices_distributions(field_id=100018, overwrite=False): field = pdb.Field(field_id, "R") indices = ["eta", "j", "delta_chi_squared", "sigma_mu", "k"] number_of_microlensing_light_curves = 1000 number_of_microlensing_simulations_per_light_curve = 100 min_number_of_good_observations = 100 # Convenience variables for filenames file_base = "field{:06d}_Nperccd{}_Nevents{}".format(field.id, number_of_microlensing_light_curves, number_of_microlensing_simulations_per_light_curve) + ".{ext}" pickle_filename = os.path.join("data", "var_indices", file_base.format(ext="pickle")) plot_filename = os.path.join("plots", "var_indices", file_base.format(ext="pdf")) if not os.path.exists(os.path.dirname(pickle_filename)): os.mkdir(os.path.dirname(pickle_filename)) if not os.path.exists(os.path.dirname(plot_filename)): os.mkdir(os.path.dirname(plot_filename)) if os.path.exists(pickle_filename) and overwrite: logger.debug("Data file exists, but you want to overwrite it!") os.remove(pickle_filename) logger.debug("Data file deleted...") # If the cache pickle file doesn't exist, generate the data if not os.path.exists(pickle_filename): logger.info("Data file {} not found. Generating data...".format(pickle_filename)) # Initialize my PDB statistic dictionary # I use a dictionary here because after doing some sub-selection the index arrays may # have difference lengths. pdb_statistics = dict() for index in indices: pdb_statistics[index] = np.array([]) for ccd in field.ccds.values(): print "Starting with CCD {}".format(ccd.id) chip = ccd.read() pdb_statistics_array = [] logger.info("Starting microlensing event simulations") # Keep track of how many light curves we've used, break after we reach the specified number light_curve_count = 0 for source in chip.sources.where("(ngoodobs > {})".format(min_number_of_good_observations)): source_id = source["matchedSourceID"] light_curve = ccd.light_curve(source_id, barebones=True, clean=True) if len(light_curve.mjd) < min_number_of_good_observations: continue # Add the pre-simulation statistics to an array lc_var_indices = pa.compute_variability_indices(light_curve, indices, return_tuple=True) pdb_statistics_array.append(lc_var_indices) one_light_curve_statistics = vi.simulate_events_compute_indices(light_curve, events_per_light_curve=number_of_microlensing_simulations_per_light_curve, indices=indices) try: simulated_microlensing_statistics = np.hstack((simulated_microlensing_statistics, one_light_curve_statistics)) except NameError: simulated_microlensing_statistics = one_light_curve_statistics light_curve_count += 1 if light_curve_count >= number_of_microlensing_light_curves: break pdb_statistics_array = np.array(pdb_statistics_array, dtype=[(index,float) for index in indices]) try: all_pdb_statistics_array = np.hstack((all_pdb_statistics_array, pdb_statistics_array)) except NameError: all_pdb_statistics_array = pdb_statistics_array ccd.close() f = open(pickle_filename, "w") pickle.dump((all_pdb_statistics_array, simulated_microlensing_statistics), f) f.close() f = open(pickle_filename, "r") all_pdb_statistics_array, simulated_microlensing_statistics = pickle.load(f) f.close() selection_criteria = { "eta" : 0.16167735855516213, "delta_chi_squared" : 1.162994709319348, "j" : 1.601729135628142 } index_pairs = [("eta", "delta_chi_squared"), ("eta", "j"), ("delta_chi_squared", "j")] nbins = 100 for x_index, y_index in index_pairs: fig, axes = plt.subplots(1, 2, sharey=True, figsize=(15,7.5)) # Variable data x = simulated_microlensing_statistics[x_index] y = simulated_microlensing_statistics[y_index] pos_x = x[(x > 0) & (y > 0)] pos_y = y[(x > 0) & (y > 0)] xbins_pos = np.logspace(np.log10(pos_x.min()), np.log10(pos_x.max()), nbins) ybins_pos = np.logspace(np.log10(pos_y.min()), np.log10(pos_y.max()), nbins) #print pos_x, pos_y, xbins_pos, ybins_pos H_pos, xedges_pos, yedges_pos = np.histogram2d(pos_x, pos_y, bins=[xbins_pos, ybins_pos]) # Non-variable data x = all_pdb_statistics_array[x_index] y = all_pdb_statistics_array[y_index] pos_x = x[(x > 0) & (y > 0)] pos_y = y[(x > 0) & (y > 0)] H_pos_boring, xedges_pos, yedges_pos = np.histogram2d(pos_x, pos_y, bins=[xedges_pos, yedges_pos]) ax1 = axes[1] #ax1.imshow(np.log10(H), interpolation="none", cmap=cm.gist_heat) ax1.pcolormesh(xedges_pos, yedges_pos, np.where(H_pos > 0, np.log10(H_pos), 0.).T, cmap=cm.Blues) ax1.set_xscale("log") ax1.set_yscale("log") ax1.set_xlim(xedges_pos[0], xedges_pos[-1]) ax1.set_ylim(yedges_pos[0], yedges_pos[-1]) ax1.set_xlabel(pu.index_to_label(x_index), fontsize=28) ax1.axhline(10.**selection_criteria[y_index], color='r', linestyle='--') ax1.axvline(10.**selection_criteria[x_index], color='r', linestyle='--') if x_index == "eta": ax1.fill_between([xedges_pos[0], 10.**selection_criteria[x_index]], 10.**selection_criteria[y_index], yedges_pos[-1], facecolor='red', alpha=0.1) elif x_index == "delta_chi_squared": ax1.fill_between([10.**selection_criteria[x_index], xedges_pos[-1]], 10.**selection_criteria[y_index], yedges_pos[-1], facecolor='red', alpha=0.1) ax2 = axes[0] ax2.pcolormesh(xedges_pos, yedges_pos, np.where(H_pos_boring > 0, np.log10(H_pos_boring), 0.).T, cmap=cm.Blues) ax2.set_xscale("log") ax2.set_yscale("log") ax2.set_xlim(xedges_pos[0], xedges_pos[-1]) ax2.set_ylim(yedges_pos[0], yedges_pos[-1]) ax2.set_xlabel(pu.index_to_label(x_index), fontsize=28) ax2.set_ylabel(pu.index_to_label(y_index), fontsize=28) ax2.axhline(10.**selection_criteria[y_index], color='r', linestyle='--') ax2.axvline(10.**selection_criteria[x_index], color='r', linestyle='--') if x_index == "eta": ax2.fill_between([xedges_pos[0], 10.**selection_criteria[x_index]], 10.**selection_criteria[y_index], yedges_pos[-1], facecolor='red', alpha=0.1) elif x_index == "delta_chi_squared": ax2.fill_between([10.**selection_criteria[x_index], xedges_pos[-1]], 10.**selection_criteria[y_index], yedges_pos[-1], facecolor='red', alpha=0.1) for ax in fig.axes: for ticklabel in ax.get_xticklabels()+ax.get_yticklabels(): ticklabel.set_fontsize(18) fig.savefig(os.path.join(pg.plots_path, "paper_figures", "{}_vs_{}.pdf".format(x_index, y_index)), bbox_inches="tight")