all_data = np.zeros((len(sourceIDs), Nfeatures)) for ii, f in enumerate(features): if f == "sigma_mu": pdb_f1, pdb_f2 = pdb_index_name[f] all_data[:, ii] = goodSources[pdb_f1] / goodSources[pdb_f2] else: pdb_f = pdb_index_name[f] all_data[:, ii] = goodSources[pdb_f] np.save("/home/aprice-whelan/tmp/all_data.npy", all_data) random_sourceIDs = sourceIDs[np.random.randint(len(sourceIDs), size=Nsources)] training_data = np.zeros((Nsources, Ntrials, Nfeatures)) for ii, sourceID in enumerate(random_sourceIDs): d = sourceData.readWhere("matchedSourceID == {0}".format(sourceID)) mjd = d["mjd"] mag = d["mag"] err = d["magErr"] for trial in range(Ntrials): lc = SimulatedLightCurve(mjd=mjd, mag=mag, error=err) lc.add_microlensing_event() stats = compute_variability_indices(lc, indices=features) training_data[ii, trial, :] = np.array([stats[x] for x in features]) np.save("/home/aprice-whelan/tmp/training_data.npy", training_data) chip.close()
all_data = np.zeros((len(sourceIDs), Nfeatures)) for ii, f in enumerate(features): if f == "sigma_mu": pdb_f1, pdb_f2 = pdb_index_name[f] all_data[:, ii] = goodSources[pdb_f1] / goodSources[pdb_f2] else: pdb_f = pdb_index_name[f] all_data[:, ii] = goodSources[pdb_f] np.save("/home/aprice-whelan/tmp/all_data.npy", all_data) random_sourceIDs = sourceIDs[np.random.randint(len(sourceIDs), size=Nsources)] training_data = np.zeros((Nsources, Ntrials, Nfeatures)) for ii, sourceID in enumerate(random_sourceIDs): d = sourceData.readWhere("matchedSourceID == {0}".format(sourceID)) mjd = d['mjd'] mag = d['mag'] err = d['magErr'] for trial in range(Ntrials): lc = SimulatedLightCurve(mjd=mjd, mag=mag, error=err) lc.add_microlensing_event() stats = compute_variability_indices(lc, indices=features) training_data[ii, trial, :] = np.array([stats[x] for x in features]) np.save("/home/aprice-whelan/tmp/training_data.npy", training_data) chip.close()
def select_candidates(field, selection_criteria, num_fit_attempts=10): """ Select candidates from a field given the log10(selection criteria) from mongodb. The current selection scheme is to first select on eta, then to sanity check with delta chi-squared by making sure it's positive and >10. """ eta_cut = 10**selection_criteria light_curves = [] for ccd in field.ccds.values(): logger.info("Starting with CCD {}".format(ccd.id)) chip = ccd.read() ####### APW @ MDM #print("Total:", len(chip.sources.read(field="matchedSourceID"))) #cdtn = "(ngoodobs > 10) " #& ((ngoodobs/nobs) > 0.5)" #print("Condition:", len(chip.sources.readWhere(cdtn, field="matchedSourceID"))) #continue ######################## cdtn = ("(ngoodobs > {}) & (vonNeumannRatio > 0.0) & " "(vonNeumannRatio < {}) & ((ngoodobs/nobs) > 0.5)") cdtn = cdtn.format(min_number_of_good_observations, eta_cut) source_ids = chip.sources.readWhere(cdtn, field="matchedSourceID") logger.info("\tSelected {} pre-candidates from PDB"\ .format(len(source_ids))) for source_id in source_ids: # APW: TODO -- this is still the biggest time hog!!! It turns # out it's still faster than reading the whole thing into # memory, though! light_curve = ccd.light_curve(source_id, barebones=True, clean=True) # If light curve doesn't have enough clean observations, skip it if light_curve != None and \ len(light_curve) < min_number_of_good_observations: continue # Compute the variability indices for the cleaned light curve try: ind_names = ["eta", "delta_chi_squared", "j", "k", "sigma_mu"] indices = pa.compute_variability_indices(light_curve, indices=ind_names) except ValueError: logger.warning("Failed to compute variability indices for " "light curve! {0}".format(light_curve)) return False light_curve.indices = indices light_curve.tags = [] light_curve.features = {} if light_curve.sdss_type() == "galaxy": light_curve.tags.append("galaxy") continue # If the object is not a Galaxy or has no SDSS data, try to get # the SDSS colors to see if it passes the Richards et al. # QSO color cut. sdss_colors = light_curve.sdss_colors("psf") qso_status = richards_qso(sdss_colors) if sdss_colors != None and qso_status: light_curve.tags.append("qso") candidate_status = pa.iscandidate(light_curve, lower_eta_cut=eta_cut) if candidate_status == "candidate" and \ "qso" not in light_curve.tags: light_curve.tags.append("candidate") light_curves.append(light_curve) continue if candidate_status == "subcandidate" and \ light_curve.indices["eta"] < eta_cut and not qso_status: # Try to do period analysis with AOV try: peak_period = light_curve.features["aov_period"] peak_power = light_curve.features["aov_power"] except KeyError: try: fp = pa.findPeaks_aov(light_curve.mjd.copy(), light_curve.mag.copy(), light_curve.error.copy(), 3, 1., 2.*light_curve.baseline, 1., 0.1, 20) except ZeroDivisionError: continue peak_period = fp["peak_period"][0] peak_power = max(fp["peak_period"]) light_curve.features["aov_period"] = peak_period light_curve.features["aov_power"] = peak_power if (peak_period < 2.*light_curve.baseline): if peak_power > 25.: light_curve.tags.append("variable star") if "subcandidate" in light_curve.tags: light_curve.tags.pop(light_curve.tags\ .index("subcandidate")) if light_curve not in light_curves: light_curves.append(light_curve) ccd.close() return light_curves
def test_iscandidate(plot=False): ''' Use test light curves to test selection: - Periodic - Bad data - Various simulated events - Flat light curve - Transients (SN, Nova, etc.) ''' np.random.seed(10) logger.setLevel(logging.DEBUG) from ptf.lightcurve import SimulatedLightCurve import ptf.db.mongodb as mongo db = mongo.PTFConnection() logger.info("---------------------------------------------------") logger.info(greenText("Periodic light curves")) logger.info("---------------------------------------------------") # Periodic light curves periodics = [(4588, 7, 13227), (4588, 2, 15432), (4588, 9, 17195), (2562, 10, 28317), (4721, 8, 11979), (4162, 2, 14360)] for field_id, ccd_id, source_id in periodics: periodic_light_curve = pdb.get_light_curve(field_id, ccd_id, source_id, clean=True) periodic_light_curve.indices = pa.compute_variability_indices(periodic_light_curve, indices=["eta", "delta_chi_squared", "j", "k", "sigma_mu"]) assert pa.iscandidate(periodic_light_curve, lower_eta_cut=10**db.fields.find_one({"_id" : field_id}, {"selection_criteria" : 1})["selection_criteria"]["eta"]) in ["subcandidate" , False] if plot: plot_lc(periodic_light_curve) logger.info("---------------------------------------------------") logger.info(greenText("Bad light curves")) logger.info("---------------------------------------------------") # Bad data bads = [(3756, 0, 14281), (1983, 10, 1580)] for field_id, ccd_id, source_id in bads: bad_light_curve = pdb.get_light_curve(field_id, ccd_id, source_id, clean=True) bad_light_curve.indices = pa.compute_variability_indices(bad_light_curve, indices=["eta", "delta_chi_squared", "j", "k", "sigma_mu"]) assert not pa.iscandidate(bad_light_curve, lower_eta_cut=10**db.fields.find_one({"_id" : field_id}, {"selection_criteria" : 1})["selection_criteria"]["eta"]) if plot: plot_lc(bad_light_curve) logger.info("---------------------------------------------------") logger.info(greenText("Simulated light curves")) logger.info("---------------------------------------------------") # Simulated light curves for field_id,mjd in [(4721,periodic_light_curve.mjd)]: for err in [0.01, 0.05, 0.1]: logger.debug("field: {0}, err: {1}".format(field_id,err)) light_curve = SimulatedLightCurve(mjd=mjd, mag=15, error=[err]) light_curve.indices = pa.compute_variability_indices(light_curve, indices=["eta", "delta_chi_squared", "j", "k", "sigma_mu"]) assert not pa.iscandidate(light_curve, lower_eta_cut=10**db.fields.find_one({"_id" : field_id}, {"selection_criteria" : 1})["selection_criteria"]["eta"]) light_curve.add_microlensing_event(u0=np.random.uniform(0.2, 0.8), t0=light_curve.mjd[int(len(light_curve)/2)], tE=light_curve.baseline/8.) light_curve.indices = pa.compute_variability_indices(light_curve, indices=["eta", "delta_chi_squared", "j", "k", "sigma_mu"]) if plot: plt.clf() light_curve.plot() plt.savefig("plots/tests/{0}_{1}.png".format(field_id,err)) assert pa.iscandidate(light_curve, lower_eta_cut=10**db.fields.find_one({"_id" : field_id}, {"selection_criteria" : 1})["selection_criteria"]["eta"]) logger.info("---------------------------------------------------") logger.info(greenText("Transient light curves")) logger.info("---------------------------------------------------") # Transients (SN, Novae) transients = [(4564, 0, 4703), (4914, 6, 9673), (100041, 1, 4855), (100082, 5, 7447), (4721, 8, 3208), (4445, 7, 11458),\ (100003, 6, 10741), (100001, 10, 5466), (4789, 6, 11457), (2263, 0, 3214), (4077, 8, 15293), (4330, 10, 6648), \ (4913, 7, 13436), (100090, 7, 2070), (4338, 2, 10330), (5171, 0, 885)] for field_id, ccd_id, source_id in transients: transient_light_curve = pdb.get_light_curve(field_id, ccd_id, source_id, clean=True) logger.debug(transient_light_curve) transient_light_curve.indices = pa.compute_variability_indices(transient_light_curve, indices=["eta", "delta_chi_squared", "j", "k", "sigma_mu"]) assert pa.iscandidate(transient_light_curve, lower_eta_cut=10**db.fields.find_one({"_id" : field_id}, {"selection_criteria" : 1})["selection_criteria"]["eta"]) if plot: plot_lc(transient_light_curve)
def variability_indices_distributions(field_id=100018, overwrite=False): field = pdb.Field(field_id, "R") indices = ["eta", "j", "delta_chi_squared", "sigma_mu", "k"] number_of_microlensing_light_curves = 1000 number_of_microlensing_simulations_per_light_curve = 100 min_number_of_good_observations = 100 # Convenience variables for filenames file_base = "field{:06d}_Nperccd{}_Nevents{}".format(field.id, number_of_microlensing_light_curves, number_of_microlensing_simulations_per_light_curve) + ".{ext}" pickle_filename = os.path.join("data", "var_indices", file_base.format(ext="pickle")) plot_filename = os.path.join("plots", "var_indices", file_base.format(ext="pdf")) if not os.path.exists(os.path.dirname(pickle_filename)): os.mkdir(os.path.dirname(pickle_filename)) if not os.path.exists(os.path.dirname(plot_filename)): os.mkdir(os.path.dirname(plot_filename)) if os.path.exists(pickle_filename) and overwrite: logger.debug("Data file exists, but you want to overwrite it!") os.remove(pickle_filename) logger.debug("Data file deleted...") # If the cache pickle file doesn't exist, generate the data if not os.path.exists(pickle_filename): logger.info("Data file {} not found. Generating data...".format(pickle_filename)) # Initialize my PDB statistic dictionary # I use a dictionary here because after doing some sub-selection the index arrays may # have difference lengths. pdb_statistics = dict() for index in indices: pdb_statistics[index] = np.array([]) for ccd in field.ccds.values(): print "Starting with CCD {}".format(ccd.id) chip = ccd.read() pdb_statistics_array = [] logger.info("Starting microlensing event simulations") # Keep track of how many light curves we've used, break after we reach the specified number light_curve_count = 0 for source in chip.sources.where("(ngoodobs > {})".format(min_number_of_good_observations)): source_id = source["matchedSourceID"] light_curve = ccd.light_curve(source_id, barebones=True, clean=True) if len(light_curve.mjd) < min_number_of_good_observations: continue # Add the pre-simulation statistics to an array lc_var_indices = pa.compute_variability_indices(light_curve, indices, return_tuple=True) pdb_statistics_array.append(lc_var_indices) one_light_curve_statistics = vi.simulate_events_compute_indices(light_curve, events_per_light_curve=number_of_microlensing_simulations_per_light_curve, indices=indices) try: simulated_microlensing_statistics = np.hstack((simulated_microlensing_statistics, one_light_curve_statistics)) except NameError: simulated_microlensing_statistics = one_light_curve_statistics light_curve_count += 1 if light_curve_count >= number_of_microlensing_light_curves: break pdb_statistics_array = np.array(pdb_statistics_array, dtype=[(index,float) for index in indices]) try: all_pdb_statistics_array = np.hstack((all_pdb_statistics_array, pdb_statistics_array)) except NameError: all_pdb_statistics_array = pdb_statistics_array ccd.close() f = open(pickle_filename, "w") pickle.dump((all_pdb_statistics_array, simulated_microlensing_statistics), f) f.close() f = open(pickle_filename, "r") all_pdb_statistics_array, simulated_microlensing_statistics = pickle.load(f) f.close() selection_criteria = { "eta" : 0.16167735855516213, "delta_chi_squared" : 1.162994709319348, "j" : 1.601729135628142 } index_pairs = [("eta", "delta_chi_squared"), ("eta", "j"), ("delta_chi_squared", "j")] nbins = 100 for x_index, y_index in index_pairs: fig, axes = plt.subplots(1, 2, sharey=True, figsize=(15,7.5)) # Variable data x = simulated_microlensing_statistics[x_index] y = simulated_microlensing_statistics[y_index] pos_x = x[(x > 0) & (y > 0)] pos_y = y[(x > 0) & (y > 0)] xbins_pos = np.logspace(np.log10(pos_x.min()), np.log10(pos_x.max()), nbins) ybins_pos = np.logspace(np.log10(pos_y.min()), np.log10(pos_y.max()), nbins) #print pos_x, pos_y, xbins_pos, ybins_pos H_pos, xedges_pos, yedges_pos = np.histogram2d(pos_x, pos_y, bins=[xbins_pos, ybins_pos]) # Non-variable data x = all_pdb_statistics_array[x_index] y = all_pdb_statistics_array[y_index] pos_x = x[(x > 0) & (y > 0)] pos_y = y[(x > 0) & (y > 0)] H_pos_boring, xedges_pos, yedges_pos = np.histogram2d(pos_x, pos_y, bins=[xedges_pos, yedges_pos]) ax1 = axes[1] #ax1.imshow(np.log10(H), interpolation="none", cmap=cm.gist_heat) ax1.pcolormesh(xedges_pos, yedges_pos, np.where(H_pos > 0, np.log10(H_pos), 0.).T, cmap=cm.Blues) ax1.set_xscale("log") ax1.set_yscale("log") ax1.set_xlim(xedges_pos[0], xedges_pos[-1]) ax1.set_ylim(yedges_pos[0], yedges_pos[-1]) ax1.set_xlabel(pu.index_to_label(x_index), fontsize=28) ax1.axhline(10.**selection_criteria[y_index], color='r', linestyle='--') ax1.axvline(10.**selection_criteria[x_index], color='r', linestyle='--') if x_index == "eta": ax1.fill_between([xedges_pos[0], 10.**selection_criteria[x_index]], 10.**selection_criteria[y_index], yedges_pos[-1], facecolor='red', alpha=0.1) elif x_index == "delta_chi_squared": ax1.fill_between([10.**selection_criteria[x_index], xedges_pos[-1]], 10.**selection_criteria[y_index], yedges_pos[-1], facecolor='red', alpha=0.1) ax2 = axes[0] ax2.pcolormesh(xedges_pos, yedges_pos, np.where(H_pos_boring > 0, np.log10(H_pos_boring), 0.).T, cmap=cm.Blues) ax2.set_xscale("log") ax2.set_yscale("log") ax2.set_xlim(xedges_pos[0], xedges_pos[-1]) ax2.set_ylim(yedges_pos[0], yedges_pos[-1]) ax2.set_xlabel(pu.index_to_label(x_index), fontsize=28) ax2.set_ylabel(pu.index_to_label(y_index), fontsize=28) ax2.axhline(10.**selection_criteria[y_index], color='r', linestyle='--') ax2.axvline(10.**selection_criteria[x_index], color='r', linestyle='--') if x_index == "eta": ax2.fill_between([xedges_pos[0], 10.**selection_criteria[x_index]], 10.**selection_criteria[y_index], yedges_pos[-1], facecolor='red', alpha=0.1) elif x_index == "delta_chi_squared": ax2.fill_between([10.**selection_criteria[x_index], xedges_pos[-1]], 10.**selection_criteria[y_index], yedges_pos[-1], facecolor='red', alpha=0.1) for ax in fig.axes: for ticklabel in ax.get_xticklabels()+ax.get_yticklabels(): ticklabel.set_fontsize(18) fig.savefig(os.path.join(pg.plots_path, "paper_figures", "{}_vs_{}.pdf".format(x_index, y_index)), bbox_inches="tight")