Beispiel #1
0
all_data = np.zeros((len(sourceIDs), Nfeatures))

for ii, f in enumerate(features):
    if f == "sigma_mu":
        pdb_f1, pdb_f2 = pdb_index_name[f]
        all_data[:, ii] = goodSources[pdb_f1] / goodSources[pdb_f2]
    else:
        pdb_f = pdb_index_name[f]
        all_data[:, ii] = goodSources[pdb_f]

np.save("/home/aprice-whelan/tmp/all_data.npy", all_data)

random_sourceIDs = sourceIDs[np.random.randint(len(sourceIDs), size=Nsources)]

training_data = np.zeros((Nsources, Ntrials, Nfeatures))
for ii, sourceID in enumerate(random_sourceIDs):
    d = sourceData.readWhere("matchedSourceID == {0}".format(sourceID))
    mjd = d["mjd"]
    mag = d["mag"]
    err = d["magErr"]

    for trial in range(Ntrials):
        lc = SimulatedLightCurve(mjd=mjd, mag=mag, error=err)
        lc.add_microlensing_event()
        stats = compute_variability_indices(lc, indices=features)
        training_data[ii, trial, :] = np.array([stats[x] for x in features])

np.save("/home/aprice-whelan/tmp/training_data.npy", training_data)

chip.close()
Beispiel #2
0
all_data = np.zeros((len(sourceIDs), Nfeatures))

for ii, f in enumerate(features):
    if f == "sigma_mu":
        pdb_f1, pdb_f2 = pdb_index_name[f]
        all_data[:, ii] = goodSources[pdb_f1] / goodSources[pdb_f2]
    else:
        pdb_f = pdb_index_name[f]
        all_data[:, ii] = goodSources[pdb_f]

np.save("/home/aprice-whelan/tmp/all_data.npy", all_data)

random_sourceIDs = sourceIDs[np.random.randint(len(sourceIDs), size=Nsources)]

training_data = np.zeros((Nsources, Ntrials, Nfeatures))
for ii, sourceID in enumerate(random_sourceIDs):
    d = sourceData.readWhere("matchedSourceID == {0}".format(sourceID))
    mjd = d['mjd']
    mag = d['mag']
    err = d['magErr']

    for trial in range(Ntrials):
        lc = SimulatedLightCurve(mjd=mjd, mag=mag, error=err)
        lc.add_microlensing_event()
        stats = compute_variability_indices(lc, indices=features)
        training_data[ii, trial, :] = np.array([stats[x] for x in features])

np.save("/home/aprice-whelan/tmp/training_data.npy", training_data)

chip.close()
Beispiel #3
0
def select_candidates(field, selection_criteria, num_fit_attempts=10):
    """ Select candidates from a field given the log10(selection criteria) from mongodb.

        The current selection scheme is to first select on eta, then to
        sanity check with delta chi-squared by making sure it's positive 
        and >10.

    """

    eta_cut = 10**selection_criteria

    light_curves = []
    for ccd in field.ccds.values():
        logger.info("Starting with CCD {}".format(ccd.id))
        chip = ccd.read()
        ####### APW @ MDM
        #print("Total:", len(chip.sources.read(field="matchedSourceID")))
        #cdtn = "(ngoodobs > 10) " #& ((ngoodobs/nobs) > 0.5)"
        #print("Condition:", len(chip.sources.readWhere(cdtn, field="matchedSourceID")))
        #continue
        ########################
        cdtn = ("(ngoodobs > {}) & (vonNeumannRatio > 0.0) & "
                "(vonNeumannRatio < {}) & ((ngoodobs/nobs) > 0.5)")
        cdtn = cdtn.format(min_number_of_good_observations, eta_cut)
        source_ids = chip.sources.readWhere(cdtn, field="matchedSourceID")

        logger.info("\tSelected {} pre-candidates from PDB"\
                    .format(len(source_ids)))
        for source_id in source_ids:
            # APW: TODO -- this is still the biggest time hog!!! It turns 
            #   out it's still faster than reading the whole thing into 
            #   memory, though!
            light_curve = ccd.light_curve(source_id, barebones=True, 
                                          clean=True)

            # If light curve doesn't have enough clean observations, skip it
            if light_curve != None and \
                len(light_curve) < min_number_of_good_observations: 
                continue

            # Compute the variability indices for the cleaned light curve
            try:
                ind_names = ["eta", "delta_chi_squared", "j", "k", "sigma_mu"]
                indices = pa.compute_variability_indices(light_curve, 
                                                         indices=ind_names)
            except ValueError:
                logger.warning("Failed to compute variability indices for "
                               "light curve! {0}".format(light_curve))
                return False

            light_curve.indices = indices
            light_curve.tags = []
            light_curve.features = {}

            if light_curve.sdss_type() == "galaxy":
                light_curve.tags.append("galaxy")
                continue

            # If the object is not a Galaxy or has no SDSS data, try to get 
            #    the SDSS colors to see if it passes the Richards et al. 
            #    QSO color cut.
            sdss_colors = light_curve.sdss_colors("psf")
            qso_status = richards_qso(sdss_colors)
            if sdss_colors != None and qso_status:
                light_curve.tags.append("qso")
            
            candidate_status = pa.iscandidate(light_curve, 
                                              lower_eta_cut=eta_cut)

            if candidate_status == "candidate" and \
                "qso" not in light_curve.tags:
                light_curve.tags.append("candidate")
                light_curves.append(light_curve)
                continue

            if candidate_status == "subcandidate" and \
                light_curve.indices["eta"] < eta_cut and not qso_status:
                # Try to do period analysis with AOV
                try:
                    peak_period = light_curve.features["aov_period"]
                    peak_power = light_curve.features["aov_power"]
                except KeyError:
                    try:
                        fp = pa.findPeaks_aov(light_curve.mjd.copy(),
                                              light_curve.mag.copy(), 
                                              light_curve.error.copy(), 
                                              3, 1., 2.*light_curve.baseline, 
                                              1., 0.1, 20)
                    except ZeroDivisionError:
                        continue

                    peak_period = fp["peak_period"][0]
                    peak_power = max(fp["peak_period"])

                    light_curve.features["aov_period"] = peak_period
                    light_curve.features["aov_power"] = peak_power

                if (peak_period < 2.*light_curve.baseline):
                    if peak_power > 25.:
                        light_curve.tags.append("variable star")

                        if "subcandidate" in light_curve.tags:
                            light_curve.tags.pop(light_curve.tags\
                                .index("subcandidate"))

                        if light_curve not in light_curves:
                            light_curves.append(light_curve)

        ccd.close()

    return light_curves
Beispiel #4
0
def test_iscandidate(plot=False):
    ''' Use test light curves to test selection:
        - Periodic
        - Bad data
        - Various simulated events
        - Flat light curve
        - Transients (SN, Nova, etc.)
    '''

    np.random.seed(10)

    logger.setLevel(logging.DEBUG)
    from ptf.lightcurve import SimulatedLightCurve
    import ptf.db.mongodb as mongo

    db = mongo.PTFConnection()

    logger.info("---------------------------------------------------")
    logger.info(greenText("Periodic light curves"))
    logger.info("---------------------------------------------------")

    # Periodic light curves
    periodics = [(4588, 7, 13227), (4588, 2, 15432), (4588, 9, 17195), (2562, 10, 28317), (4721, 8, 11979), (4162, 2, 14360)]

    for field_id, ccd_id, source_id in periodics:
        periodic_light_curve = pdb.get_light_curve(field_id, ccd_id, source_id, clean=True)
        periodic_light_curve.indices = pa.compute_variability_indices(periodic_light_curve, indices=["eta", "delta_chi_squared", "j", "k", "sigma_mu"])
        assert pa.iscandidate(periodic_light_curve, lower_eta_cut=10**db.fields.find_one({"_id" : field_id}, {"selection_criteria" : 1})["selection_criteria"]["eta"]) in ["subcandidate" , False]
        if plot: plot_lc(periodic_light_curve)

    logger.info("---------------------------------------------------")
    logger.info(greenText("Bad light curves"))
    logger.info("---------------------------------------------------")

    # Bad data
    bads = [(3756, 0, 14281), (1983, 10, 1580)]

    for field_id, ccd_id, source_id in bads:
        bad_light_curve = pdb.get_light_curve(field_id, ccd_id, source_id, clean=True)
        bad_light_curve.indices = pa.compute_variability_indices(bad_light_curve, indices=["eta", "delta_chi_squared", "j", "k", "sigma_mu"])
        assert not pa.iscandidate(bad_light_curve, lower_eta_cut=10**db.fields.find_one({"_id" : field_id}, {"selection_criteria" : 1})["selection_criteria"]["eta"])
        if plot: plot_lc(bad_light_curve)

    logger.info("---------------------------------------------------")
    logger.info(greenText("Simulated light curves"))
    logger.info("---------------------------------------------------")

    # Simulated light curves
    for field_id,mjd in [(4721,periodic_light_curve.mjd)]:
        for err in [0.01, 0.05, 0.1]:
            logger.debug("field: {0}, err: {1}".format(field_id,err))
            light_curve = SimulatedLightCurve(mjd=mjd, mag=15, error=[err])
            light_curve.indices = pa.compute_variability_indices(light_curve, indices=["eta", "delta_chi_squared", "j", "k", "sigma_mu"])
            assert not pa.iscandidate(light_curve, lower_eta_cut=10**db.fields.find_one({"_id" : field_id}, {"selection_criteria" : 1})["selection_criteria"]["eta"])

            light_curve.add_microlensing_event(u0=np.random.uniform(0.2, 0.8), t0=light_curve.mjd[int(len(light_curve)/2)], tE=light_curve.baseline/8.)
            light_curve.indices = pa.compute_variability_indices(light_curve, indices=["eta", "delta_chi_squared", "j", "k", "sigma_mu"])
            if plot:
                plt.clf()
                light_curve.plot()
                plt.savefig("plots/tests/{0}_{1}.png".format(field_id,err))
            assert pa.iscandidate(light_curve, lower_eta_cut=10**db.fields.find_one({"_id" : field_id}, {"selection_criteria" : 1})["selection_criteria"]["eta"])

    logger.info("---------------------------------------------------")
    logger.info(greenText("Transient light curves"))
    logger.info("---------------------------------------------------")

    # Transients (SN, Novae)
    transients = [(4564, 0, 4703), (4914, 6, 9673), (100041, 1, 4855), (100082, 5, 7447), (4721, 8, 3208), (4445, 7, 11458),\
                  (100003, 6, 10741), (100001, 10, 5466), (4789, 6, 11457), (2263, 0, 3214), (4077, 8, 15293), (4330, 10, 6648), \
                  (4913, 7, 13436), (100090, 7, 2070), (4338, 2, 10330), (5171, 0, 885)]

    for field_id, ccd_id, source_id in transients:
        transient_light_curve = pdb.get_light_curve(field_id, ccd_id, source_id, clean=True)
        logger.debug(transient_light_curve)
        transient_light_curve.indices = pa.compute_variability_indices(transient_light_curve, indices=["eta", "delta_chi_squared", "j", "k", "sigma_mu"])
        assert pa.iscandidate(transient_light_curve, lower_eta_cut=10**db.fields.find_one({"_id" : field_id}, {"selection_criteria" : 1})["selection_criteria"]["eta"])
        if plot: plot_lc(transient_light_curve)
Beispiel #5
0
def variability_indices_distributions(field_id=100018, overwrite=False):
    field = pdb.Field(field_id, "R")

    indices = ["eta", "j", "delta_chi_squared", "sigma_mu", "k"]
    number_of_microlensing_light_curves = 1000
    number_of_microlensing_simulations_per_light_curve = 100
    min_number_of_good_observations = 100

    # Convenience variables for filenames
    file_base = "field{:06d}_Nperccd{}_Nevents{}".format(field.id, number_of_microlensing_light_curves, number_of_microlensing_simulations_per_light_curve) + ".{ext}"
    pickle_filename = os.path.join("data", "var_indices", file_base.format(ext="pickle"))
    plot_filename = os.path.join("plots", "var_indices", file_base.format(ext="pdf"))

    if not os.path.exists(os.path.dirname(pickle_filename)):
        os.mkdir(os.path.dirname(pickle_filename))

    if not os.path.exists(os.path.dirname(plot_filename)):
        os.mkdir(os.path.dirname(plot_filename))

    if os.path.exists(pickle_filename) and overwrite:
        logger.debug("Data file exists, but you want to overwrite it!")
        os.remove(pickle_filename)
        logger.debug("Data file deleted...")

    # If the cache pickle file doesn't exist, generate the data
    if not os.path.exists(pickle_filename):
        logger.info("Data file {} not found. Generating data...".format(pickle_filename))

        # Initialize my PDB statistic dictionary
        # I use a dictionary here because after doing some sub-selection the index arrays may
        #   have difference lengths.
        pdb_statistics = dict()
        for index in indices:
            pdb_statistics[index] = np.array([])

        for ccd in field.ccds.values():
            print "Starting with CCD {}".format(ccd.id)
            chip = ccd.read()

            pdb_statistics_array = []

            logger.info("Starting microlensing event simulations")
            # Keep track of how many light curves we've used, break after we reach the specified number
            light_curve_count = 0
            for source in chip.sources.where("(ngoodobs > {})".format(min_number_of_good_observations)):
                source_id = source["matchedSourceID"]

                light_curve = ccd.light_curve(source_id, barebones=True, clean=True)
                if len(light_curve.mjd) < min_number_of_good_observations:
                    continue

                # Add the pre-simulation statistics to an array
                lc_var_indices = pa.compute_variability_indices(light_curve, indices, return_tuple=True)
                pdb_statistics_array.append(lc_var_indices)

                one_light_curve_statistics = vi.simulate_events_compute_indices(light_curve, events_per_light_curve=number_of_microlensing_simulations_per_light_curve, indices=indices)
                try:
                    simulated_microlensing_statistics = np.hstack((simulated_microlensing_statistics, one_light_curve_statistics))
                except NameError:
                    simulated_microlensing_statistics = one_light_curve_statistics

                light_curve_count += 1
                if light_curve_count >= number_of_microlensing_light_curves:
                    break

            pdb_statistics_array = np.array(pdb_statistics_array, dtype=[(index,float) for index in indices])

            try:
                all_pdb_statistics_array = np.hstack((all_pdb_statistics_array, pdb_statistics_array))
            except NameError:
                all_pdb_statistics_array = pdb_statistics_array

            ccd.close()

        f = open(pickle_filename, "w")
        pickle.dump((all_pdb_statistics_array, simulated_microlensing_statistics), f)
        f.close()

    f = open(pickle_filename, "r")
    all_pdb_statistics_array, simulated_microlensing_statistics = pickle.load(f)
    f.close()

    selection_criteria = {
		"eta" : 0.16167735855516213,
		"delta_chi_squared" : 1.162994709319348,
		"j" : 1.601729135628142
	}

    index_pairs = [("eta", "delta_chi_squared"), ("eta", "j"), ("delta_chi_squared", "j")]

    nbins = 100
    for x_index, y_index in index_pairs:
        fig, axes = plt.subplots(1, 2, sharey=True, figsize=(15,7.5))

        # Variable data
        x = simulated_microlensing_statistics[x_index]
        y = simulated_microlensing_statistics[y_index]

        pos_x = x[(x > 0) & (y > 0)]
        pos_y = y[(x > 0) & (y > 0)]

        xbins_pos = np.logspace(np.log10(pos_x.min()), np.log10(pos_x.max()), nbins)
        ybins_pos = np.logspace(np.log10(pos_y.min()), np.log10(pos_y.max()), nbins)

        #print pos_x, pos_y, xbins_pos, ybins_pos
        H_pos, xedges_pos, yedges_pos = np.histogram2d(pos_x, pos_y, bins=[xbins_pos, ybins_pos])

        # Non-variable data
        x = all_pdb_statistics_array[x_index]
        y = all_pdb_statistics_array[y_index]

        pos_x = x[(x > 0) & (y > 0)]
        pos_y = y[(x > 0) & (y > 0)]

        H_pos_boring, xedges_pos, yedges_pos = np.histogram2d(pos_x, pos_y, bins=[xedges_pos, yedges_pos])

        ax1 = axes[1]
        #ax1.imshow(np.log10(H), interpolation="none", cmap=cm.gist_heat)
        ax1.pcolormesh(xedges_pos, yedges_pos, np.where(H_pos > 0, np.log10(H_pos), 0.).T, cmap=cm.Blues)
        ax1.set_xscale("log")
        ax1.set_yscale("log")
        ax1.set_xlim(xedges_pos[0], xedges_pos[-1])
        ax1.set_ylim(yedges_pos[0], yedges_pos[-1])

        ax1.set_xlabel(pu.index_to_label(x_index), fontsize=28)
        ax1.axhline(10.**selection_criteria[y_index], color='r', linestyle='--')
        ax1.axvline(10.**selection_criteria[x_index], color='r', linestyle='--')

        if x_index == "eta":
            ax1.fill_between([xedges_pos[0], 10.**selection_criteria[x_index]], 10.**selection_criteria[y_index], yedges_pos[-1], facecolor='red', alpha=0.1)
        elif x_index == "delta_chi_squared":
            ax1.fill_between([10.**selection_criteria[x_index], xedges_pos[-1]], 10.**selection_criteria[y_index], yedges_pos[-1], facecolor='red', alpha=0.1)

        ax2 = axes[0]
        ax2.pcolormesh(xedges_pos, yedges_pos, np.where(H_pos_boring > 0, np.log10(H_pos_boring), 0.).T, cmap=cm.Blues)
        ax2.set_xscale("log")
        ax2.set_yscale("log")
        ax2.set_xlim(xedges_pos[0], xedges_pos[-1])
        ax2.set_ylim(yedges_pos[0], yedges_pos[-1])

        ax2.set_xlabel(pu.index_to_label(x_index), fontsize=28)
        ax2.set_ylabel(pu.index_to_label(y_index), fontsize=28)
        ax2.axhline(10.**selection_criteria[y_index], color='r', linestyle='--')
        ax2.axvline(10.**selection_criteria[x_index], color='r', linestyle='--')

        if x_index == "eta":
            ax2.fill_between([xedges_pos[0], 10.**selection_criteria[x_index]], 10.**selection_criteria[y_index], yedges_pos[-1], facecolor='red', alpha=0.1)
        elif x_index == "delta_chi_squared":
            ax2.fill_between([10.**selection_criteria[x_index], xedges_pos[-1]], 10.**selection_criteria[y_index], yedges_pos[-1], facecolor='red', alpha=0.1)

        for ax in fig.axes:
            for ticklabel in ax.get_xticklabels()+ax.get_yticklabels():
                ticklabel.set_fontsize(18)

        fig.savefig(os.path.join(pg.plots_path, "paper_figures", "{}_vs_{}.pdf".format(x_index, y_index)), bbox_inches="tight")