def fit_line(df, channel): cutoff = get_cutoff(channel) idx = (df['sub_flux'] > 0) & (df['sub_flux'] < cutoff) X = df['hdr_flux'][idx] y = df['sub_flux'][idx] # model = sm.OLS(y,X) # result = model.fit() # slope = result.params[0] slope = ordinary_least_squares(y, X) return slope
def combine_hdr_catalogs(catalog_filepaths_tuple): """ Takes a tuple containing the filepaths to the short and long exposure single-channel catalogs for a given region and channel. The result is a single catalog containing the union of all sources in both short and long exposure catalogs, with the short exposure measurements being used for the brighter sources, and the long exposure measurements used for the fainter sources. The cutoff between the two is determined by the parameter 'hdr_cutoff' in the metadata file and should be set to the saturation limit for the long exposure data (there are actually 2 parameters, one for each channel: hdr_cutoff_ch1, hdr_cutoff_ch2). """ # read in the data long_file, short_file = catalog_filepaths_tuple work_dir = "/".join(short_file.split("/")[:-1]) meta = json.load(open(work_dir + "/metadata.json")) header = "id ra dec flux unc n_obs" names = header.split() long_cat = np.recfromtxt(long_file, names=names) short_cat = np.recfromtxt(short_file, names=names) # fit a line to short ~ long idx_s = short_cat.flux < meta["short_cutoff"] idx_l = long_cat.flux < meta["long_cutoff"] short_flux = short_cat.flux[idx_s] long_flux = long_cat.flux[idx_l] short_ra, short_dec = short_cat.ra[idx_s], short_cat.dec[idx_s] long_ra, long_dec = long_cat.ra[idx_l], long_cat.dec[idx_l] idx1, idx2, ds = spherematch(short_ra, short_dec, long_ra, long_dec, tolerance=1 / 3600.0) y = short_flux[idx1] X = long_flux[idx2] slope = ordinary_least_squares(y, X) # divide short flux/unc by the slope so that it agrees with the long flux print("region {} correction value: {}".format(meta["name"], slope)) short_cat.flux /= slope short_cat.unc /= slope # get everything brighter than the cutoff in short and combine with long idx_faint = long_cat.flux < meta["long_cutoff"] idx_bright = short_cat.flux > meta["long_cutoff"] # before concatenation of long and short subsets, check for any duplicates # (if they exist they should tend to have flux very close to the cutoff) ls, ss = long_cat[idx_faint], short_cat[idx_bright] idx_s, idx_l, ds = spherematch(ss.ra, ss.dec, ls.ra, ls.dec, tolerance=1 / 3600.0) dup_ids = [] for idx in idx_l: if (ls.flux[idx] > 0.9 * meta["long_cutoff"]) & (ls.flux[idx] < meta["long_cutoff"]): dup_ids.append(ls.id[idx]) # now use the ids of the duplicates to delete them from the long dataset for idx in dup_ids: ls = ls[ls.id != idx] data = np.concatenate([ls, ss]) # eliminate sources with negative flux good = data["flux"] >= 0 data = data[good] # apply global sigma clip using the value from setup.yaml snr = data["flux"] / data["unc"] good = snr >= meta["sigma_clip"] data = data[good] # write to disk header = "id ra dec flux unc n_obs" data = data[header.split()] idx = np.argsort(data["ra"]) data = data[idx] data["id"] = np.arange(1, data.shape[0] + 1) fmt = ["%i"] + ["%0.8f"] * 2 + ["%.4e"] * 2 + ["%i"] out_name = "_".join([meta["name"], meta["channel"], "combined_hdr_catalog.txt"]) out_path = "/".join(["/".join(work_dir.split("/")[:-1]), out_name]) np.savetxt(out_path, data, fmt=fmt, header=header) print("created file: " + out_path)