def read_light_curves_from_sql_database(data_release, fname, field_in='%', model_in='%', batch_size=100, offset=0, sort=True, passbands=('g', 'r'), known_redshift=True): print(fname) extrasql = '' # "AND (objid LIKE '%00' OR objid LIKE '%50' OR sim_type_index IN (51,61,62,63,64,84,90,91,93))" # ''#AND sim_redshift_host < 0.5 AND sim_peakmag_r < 23' getter = GetData(data_release) result = getter.get_lcs_data(columns=[ 'objid', 'ptrobs_min', 'ptrobs_max', 'sim_peakmag_r', 'sim_redshift_host', 'mwebv', 'sim_dlmu', 'peakmjd', 'mwebv', 'ra', 'decl', 'hostgal_photoz', 'hostgal_photoz_err' ], field=field_in, model=model_in, snid='%', limit=batch_size, offset=offset, shuffle=False, sort=sort, extrasql=extrasql) store = pd.HDFStore(fname) for head, phot in result: objid, ptrobs_min, ptrobs_max, peakmag, redshift, mwebv, dlmu, peakmjd, mwebv, ra, dec, photoz, photozerr = head field, model, base, snid = objid.split('_') lc = getter.convert_pandas_lc_to_recarray_lc(phot, passbands=passbands) inputlightcurve = InputLightCurve(lc['mjd'], lc['flux'], lc['dflux'], lc['pb'], lc['zpt'], lc['photflag'], ra, dec, objid, redshift, mwebv, known_redshift=known_redshift, training_set_parameters={ 'class_number': int(model), 'peakmjd': peakmjd }) savepd = inputlightcurve.preprocess_light_curve() store.append(objid, savepd) store.close() print("saved %s" % fname)
def read_fits_file(args): head_file, phot_file, passbands, known_redshift, calculate_t0 = args getter = GetData() light_curves = {} header_HDU = afits.open(head_file) header_data = header_HDU[1].data for i, head in enumerate(header_data): class_num = head['SIM_TYPE_INDEX'] snid = head['SNID'] objid = '{}_{}'.format(class_num, snid) ptrobs_min = head['PTROBS_MIN'] ptrobs_max = head['PTROBS_MAX'] redshift = head['SIM_REDSHIFT_HOST'] peakmjd = head['PEAKMJD'] mwebv = head['MWEBV'] ra = head['RA'] if 'DEC' in header_data.names: dec = head['DEC'] else: dec = head['DECL'] print(i, len(header_data)) try: phot_data = getter.get_light_curve_array(phot_file, ptrobs_min, ptrobs_max) lc = getter.convert_pandas_lc_to_recarray_lc(phot_data, passbands=passbands) inputlightcurve = InputLightCurve(lc['mjd'], lc['flux'], lc['dflux'], lc['pb'], lc['photflag'], ra, dec, objid, redshift, mwebv, known_redshift=known_redshift, training_set_parameters={'class_number': class_num, 'peakmjd': peakmjd}, calculate_t0=calculate_t0) light_curves[objid] = inputlightcurve.preprocess_light_curve() except IndexError as e: print("No detections:", e) # TODO: maybe do better error checking in future except AttributeError as e: print("phot_data is NoneType", e) # TODO: maybe fix this later - rare case except ValueError as e: print("MCMC error while fitting t0", e) except Exception as e: print("Unspecified error", e, "Ignoring light curve", objid) return light_curves
def get_real_ztf_training_data(class_name, data_dir='data/real_ZTF_data_from_osc', save_dir='data/saved_light_curves/', pbs=('g', 'r'), known_redshift=True, nprocesses=1, redo=False, calculate_t0=True): """ Get data from saved real ZTF data with names and types from the Open Supernova Catalog """ save_lc_filepath = os.path.join(save_dir, f"lc_classnum_{class_name}.pickle") if os.path.exists(save_lc_filepath) and not redo: with open(save_lc_filepath, "rb") as fp: # Unpickling light_curves = pickle.load(fp) else: light_curves = {} data_filepath = os.path.join( data_dir, f"ZTF_data_{class_name}_osc-6-May-2020.pickle") with open(data_filepath, "rb") as fp: mjds, passbands, mags, magerrs, photflags, zeropoints, dc_mags, dc_magerrs, magnrs, \ sigmagnrs, isdiffposs, ras, decs, objids, redshifts, mwebvs = pickle.load(fp) for i, objid in enumerate(objids): if known_redshift and (redshifts[i] is None or np.isnan(redshifts[i])): print( f"Skipping {objid} because redshift is unknown and known_redshift model is selected" ) continue flux = 10.**(-0.4 * (mags[i] - zeropoints[i])) fluxerr = np.abs(flux * magerrs[i] * (np.log(10.) / 2.5)) passbands[i] = np.where( (passbands[i] == 1) | (passbands[i] == '1'), 'g', passbands[i]) passbands[i] = np.where( (passbands[i] == 2) | (passbands[i] == '2'), 'r', passbands[i]) mjd_first_detection = min(mjds[i][photflags[i] == 4096]) photflags[i][np.where(mjds[i] == mjd_first_detection)] = 6144 deleteindexes = np.where(( (passbands[i] == 3) | (passbands[i] == '3')) | ( (mjds[i] > mjd_first_detection) & (photflags[i] == 0)) | (np.isnan(flux))) if deleteindexes[0].size > 0: print("Deleting indexes {} at mjd {} and passband {}".format( deleteindexes, mjds[i][deleteindexes], passbands[i][deleteindexes])) mjd, passband, flux, fluxerr, zeropoint, photflag = delete_indexes( deleteindexes, mjds[i], passbands[i], flux, fluxerr, zeropoints[i], photflags[i]) peakmjd = mjd[np.argmax(flux)] inputlightcurve = InputLightCurve(mjd, flux, fluxerr, passband, photflag, ras[i], decs[i], objid, redshifts[i], mwebvs[i], known_redshift=known_redshift, training_set_parameters={ 'class_number': class_name, 'peakmjd': peakmjd }, calculate_t0=calculate_t0) light_curves[objid] = inputlightcurve.preprocess_light_curve() with open(save_lc_filepath, "wb") as fp: pickle.dump(light_curves, fp) return light_curves
def read_light_curves_from_snana_fits_files(save_fname, head_files, phot_files, passbands=('g', 'r'), known_redshift=True): """ Save lightcurves from SNANA HEAD AND PHOT FITS files Parameters ---------- save_fname : str Filename to save hdf5 file. dir_name : str Directory path of all SNANA HEAD and PHOT files. passbands : tuple passband filters. known_redshift : bool Whether to use redshift during training. """ getter = GetData() store = pd.HDFStore(save_fname) for fileidx, headfilepath in enumerate(head_files): print(fileidx, headfilepath) # Check that phot file correponds to head file assert phot_files[i].split('_')[-2] == head_files[i].split('_')[-2] header_HDU = afits.open(head_files[fileidx]) header_data = header_HDU[1].data for i, head in enumerate(header_data): model_num = head['SIM_TYPE_INDEX'] snid = head['SNID'] objid = 'field_{}_base_{}'.format(model_num, snid) ptrobs_min = head['PTROBS_MIN'] ptrobs_max = head['PTROBS_MAX'] peakmag_g = head['SIM_PEAKMAG_g'] peakmag_r = head['SIM_PEAKMAG_r'] redshift = head['SIM_REDSHIFT_HOST'] dlmu = head['SIM_DLMU'] peakmjd = head['PEAKMJD'] mwebv = head['MWEBV'] mwebv_err = head['MWEBV_ERR'] ra = head['RA'] if 'DEC' in header_data.names: dec = head['DEC'] else: dec = head['DECL'] photoz = head['HOSTGAL_PHOTOZ'] photozerr = head['HOSTGAL_PHOTOZ_ERR'] print(i, len(header_data)) phot_data = getter.get_light_curve_array(phot_files[fileidx], ptrobs_min, ptrobs_max) lc = getter.convert_pandas_lc_to_recarray_lc(phot_data, passbands=passbands) inputlightcurve = InputLightCurve(lc['mjd'], lc['flux'], lc['dflux'], lc['pb'], lc['photflag'], ra, dec, objid, redshift, mwebv, known_redshift=known_redshift, training_set_parameters={'class_number': int(model_num), 'peakmjd': peakmjd}) # TODO: work out why some light curves fail mcmc try: savepd = inputlightcurve.preprocess_light_curve() except Exception as e: print("Failed on object", objid, e) continue store.append(objid, savepd) store.close() print("saved %s" % save_fname)
def get_custom_data(class_num, data_dir, save_dir, passbands, known_redshift, nprocesses, redo): """ Get data from custom data files. You will need to write this function with this following skeleton function. Parameters ---------- class_num : int Class number. E.g. SNIa is 1. See helpers.py for lookup table. E.g. class_num = 1 data_dir : str Directory where data is stored E.g. data_dir='data/ZTF_20190512/' save_dir : str Directory to save processed data E.g. save_dir='data/saved_light_curves/' passbands : tuple Passbands to use. E.g. passbands=('g', 'r') known_redshift : bool Whether to correct the light curves for cosmological time dilation using redshift. nprocesses : int or None Number of processes to use redo : bool Whether to redo reading the data and saving the processed data. Returns ------- light_curves : dict of astropy.table.Table objects e.g light_curves['objid1'] = passband time flux fluxErr photflag str1 float32 float32 float32 int32 -------- -------- ----------- ---------- -------- g -46.8942 -48.926975 42.277767 0 g -43.9352 -105.35379 72.97575 0 g -35.9161 -46.264206 99.9172 0 g -28.9377 -28.978344 42.417065 0 g -25.9787 109.886566 46.03949 0 g -15.0399 -80.2485 80.38155 0 g -12.0218 93.51743 113.21529 0 g -6.9585 248.88364 108.606865 0 g -4.0411 341.41498 47.765404 0 g 0.0 501.7441 45.37485 6144 ... ... ... ... ... r 40.9147 194.32494 57.836903 4096 r 59.9162 67.59185 45.66463 4096 r 62.8976 80.85155 44.356197 4096 r 65.8974 28.174305 44.75049 4096 r 71.8966 -18.790287 108.049774 4096 r 74.9297 -3.1707647 125.15057 4096 r 77.9341 -11.0205965 125.784676 4096 r 80.8576 129.65466 69.99305 4096 r 88.8922 -14.259436 52.917866 4096 r 103.8734 27.178356 115.537704 4096 """ # If the data has already been run and processed, load it. Otherwise read it and save it save_lc_filepath = os.path.join(save_dir, f"lc_classnum_{class_num}.pickle") if os.path.exists(save_lc_filepath) and not redo: with open(save_lc_filepath, "rb") as fp: # Unpickling light_curves = pickle.load(fp) else: light_curves = {} # Read in data from data_dir and get the mjd, flux, fluxerr, passband, photflag as 1D numpy arrays for # each light curve. Get the ra, dec, objid, redshift, mwebv, model_num, peak_mjd as floats or strings. # Set whether you'd like to train a model with a known redshift or not. Set known_redshift as a boolean. # Enter your own data-reading code here that gets the mjds, fluxes, fluxerrs, passbands, photflags, # ras, decs, objids, redshifts, mwebvs, model_nums, peak_mjds for all the light curves from the data_dir # Once you have the required data information for each light curve, pass it into InputLightCurve with # something like the following code: for i, objid in enumerate(objids): inputlightcurve = InputLightCurve(mjds[i], fluxes[i], fluxerrs[i], passbands[i], photflags[i], ras[i], decs[i], objids[i], redshifts[i], mwebvs[i], known_redshift=known_redshift, training_set_parameters={ 'class_number': int(class_num), 'peakmjd': peakmjds[i] }) light_curves[objid] = inputlightcurve.preprocess_light_curve() # If you think that reading the data is too slow, you may want to replace the for loop above with # multiprocessing. See the example function in get_training_data.py if you need help doing this. # Next, we save it: with open(save_lc_filepath, "wb") as fp: # Pickling pickle.dump(light_curves, fp) return light_curves