def fetch_and_shift_spectra(n_spectra, outfile, primtarget=TARGET_GALAXY, zlim=(0, 0.7), loglam_start=3.5, loglam_end=3.9, Nlam=1000): """ This function queries CAS for matching spectra, and then downloads them and shifts them to a common redshift binning """ # First query for the list of spectra to download plate, mjd, fiber = query_plate_mjd_fiber(n_spectra, primtarget, zlim[0], zlim[1]) # Set up arrays to hold information gathered from the spectra spec_cln = np.zeros(n_spectra, dtype=np.int32) lineindex_cln = np.zeros(n_spectra, dtype=np.int32) log_NII_Ha = np.zeros(n_spectra, dtype=np.float32) log_OIII_Hb = np.zeros(n_spectra, dtype=np.float32) z = np.zeros(n_spectra, dtype=np.float32) zerr = np.zeros(n_spectra, dtype=np.float32) spectra = np.zeros((n_spectra, Nlam), dtype=np.float32) mask = np.zeros((n_spectra, Nlam), dtype=np.bool) # Calculate new wavelength coefficients new_coeff0 = loglam_start new_coeff1 = (loglam_end - loglam_start) / Nlam # Now download all the needed spectra, and resample to a common # wavelength bin. n_spectra = len(plate) num_skipped = 0 i = 0 while i < n_spectra: sys.stdout.write(' %i / %i spectra\r' % (i + 1, n_spectra)) sys.stdout.flush() try: spec = fetch_sdss_spectrum(plate[i], mjd[i], fiber[i]) except HTTPError: num_skipped += 1 print("%i, %i, %i not found" % (plate[i], mjd[i], fiber[i])) i += 1 continue spec_rebin = spec.restframe().rebin(new_coeff0, new_coeff1, Nlam) if np.all(spec_rebin.spectrum == 0): num_skipped += 1 print("%i, %i, %i is all zero" % (plate[i], mjd[i], fiber[i])) i += 1 continue spec_cln[i] = spec.spec_cln lineindex_cln[i], (log_NII_Ha[i], log_OIII_Hb[i])\ = spec.lineratio_index() z[i] = spec.z zerr[i] = spec.zerr spectra[i] = spec_rebin.spectrum mask[i] = spec_rebin.compute_mask(0.5, 5) i += 1 sys.stdout.write('\n') N = i print(" %i spectra skipped" % num_skipped) print(" %i spectra processed" % N) print("saving to %s" % outfile) np.savez(outfile, spectra=spectra[:N], mask=mask[:N], coeff0=new_coeff0, coeff1=new_coeff1, spec_cln=spec_cln[:N], lineindex_cln=lineindex_cln[:N], log_NII_Ha=log_NII_Ha[:N], log_OIII_Hb=log_OIII_Hb[:N], z=z[:N], zerr=zerr[:N])
import sys import numpy as np import matplotlib.pyplot as plt from astroML.datasets import fetch_sdss_spectrum from astroML.datasets.tools import query_plate_mjd_fiber, TARGET_GALAXY_RED from specanalysis import SpecMeanAggregator primtarget=TARGET_GALAXY_RED zlim=(0.2, 0.6) plate, mjd, fiber = query_plate_mjd_fiber(100, primtarget, zlim[0], zlim[1]) agg = SpecMeanAggregator() lam = agg.lam zdist = [] for plate_n, mjd_n, fiber_n in zip(plate, mjd, fiber): sys.stdout.write("{0}.{1}.{2} \r".format(plate_n, mjd_n, fiber_n)) sys.stdout.flush() spec = fetch_sdss_spectrum(plate_n, mjd_n, fiber_n) zdist.append(spec.z) agg.add_spec(spec) sys.stdout.write('\n') spec, dspec = agg.reduce()
def fetch_and_shift_spectra(n_spectra, outfile, primtarget=TARGET_GALAXY, zlim=(0, 0.7), loglam_start=3.5, loglam_end=3.9, Nlam=1000): """ This function queries CAS for matching spectra, and then downloads them and shifts them to a common redshift binning """ # First query for the list of spectra to download plate, mjd, fiber = query_plate_mjd_fiber(n_spectra, primtarget, zlim[0], zlim[1]) # Set up arrays to hold information gathered from the spectra spec_cln = np.zeros(n_spectra, dtype=np.int32) lineindex_cln = np.zeros(n_spectra, dtype=np.int32) log_NII_Ha = np.zeros(n_spectra, dtype=np.float32) log_OIII_Hb = np.zeros(n_spectra, dtype=np.float32) z = np.zeros(n_spectra, dtype=np.float32) zerr = np.zeros(n_spectra, dtype=np.float32) spectra = np.zeros((n_spectra, Nlam), dtype=np.float32) mask = np.zeros((n_spectra, Nlam), dtype=np.bool) # Calculate new wavelength coefficients new_coeff0 = loglam_start new_coeff1 = (loglam_end - loglam_start) / Nlam # Now download all the needed spectra, and resample to a common # wavelength bin. n_spectra = len(plate) num_skipped = 0 i = 0 while i < n_spectra: sys.stdout.write(' %i / %i spectra\r' % (i + 1, n_spectra)) sys.stdout.flush() try: spec = fetch_sdss_spectrum(plate[i], mjd[i], fiber[i]) except HTTPError: num_skipped += 1 print("%i, %i, %i not found" % (plate[i], mjd[i], fiber[i])) i += 1 continue spec_rebin = spec.restframe().rebin(new_coeff0, new_coeff1, Nlam) if np.all(spec_rebin.spectrum == 0): num_skipped += 1 print("%i, %i, %i is all zero" % (plate[i], mjd[i], fiber[i])) continue spec_cln[i] = spec.spec_cln lineindex_cln[i], (log_NII_Ha[i], log_OIII_Hb[i])\ = spec.lineratio_index() z[i] = spec.z zerr[i] = spec.zerr spectra[i] = spec_rebin.spectrum mask[i] = spec_rebin.compute_mask(0.5, 5) i += 1 sys.stdout.write('\n') N = i print(" %i spectra skipped" % num_skipped) print(" %i spectra processed" % N) print("saving to %s" % outfile) np.savez(outfile, spectra=spectra[:N], mask=mask[:N], coeff0=new_coeff0, coeff1=new_coeff1, spec_cln=spec_cln[:N], lineindex_cln=lineindex_cln[:N], log_NII_Ha=log_NII_Ha[:N], log_OIII_Hb=log_OIII_Hb[:N], z=z[:N], zerr=zerr[:N])
def fetch_and_shift_spectra(n_spectra, outfile, primtarget=TARGET_GALAXY, zlim=(0, 0.7), loglam_start=3.5, loglam_end=3.9, Nlam=1000): """ This function queries CAS for matching spectra, and then downloads them and shifts them to a common redshift binning """ # First query for the list of spectra to download plate, mjd, fiber = query_plate_mjd_fiber(n_spectra, primtarget, zlim[0], zlim[1]) # Set up arrays to hold information gathered from the spectra spec_cln = np.zeros(n_spectra, dtype=np.int32) lineindex_cln = np.zeros(n_spectra, dtype=np.int32) log_NII_Ha = np.zeros(n_spectra, dtype=np.float32) log_OIII_Hb = np.zeros(n_spectra, dtype=np.float32) z = np.zeros(n_spectra, dtype=np.float32) zerr = np.zeros(n_spectra, dtype=np.float32) spectra = np.zeros((n_spectra, Nlam), dtype=np.float32) mask = np.zeros((n_spectra, Nlam), dtype=np.bool) specerr = np.zeros((n_spectra, Nlam), dtype=np.float32) # also save plate, mjd, fiber to allow reference to SDSS data plates = np.zeros(n_spectra, dtype=np.int32) mjds = np.zeros(n_spectra, dtype=np.int32) fibers = np.zeros(n_spectra, dtype=np.int32) # Calculate new wavelength coefficients new_coeff0 = loglam_start new_coeff1 = (loglam_end - loglam_start) / Nlam # Now download all the needed spectra, and resample to a common # wavelength bin. n_spectra = len(plate) num_skipped = 0 # changed counter and loop so that skipped spectra do not create gaps in arrays j = 0 for i in range(n_spectra): sys.stdout.write(' %i / %i spectra\r' % (i + 1, n_spectra)) sys.stdout.flush() try: spec = fetch_sdss_spectrum(plate[i], mjd[i], fiber[i], data_home='/epyc/users/sportill/specAE/cache') except HTTPError: num_skipped += 1 print("%i, %i, %i not found" % (plate[i], mjd[i], fiber[i])) continue spec_rebin = spec.restframe().rebin(new_coeff0, new_coeff1, Nlam) if spec.z < zlim[0] or spec.z > zlim[1]: num_skipped += 1 print("%i, %i, %i outside redshift range" % (plate[i], mjd[i], fiber[i])) continue if np.all(spec_rebin.spectrum == 0): num_skipped += 1 print("%i, %i, %i is all zero" % (plate[i], mjd[i], fiber[i])) continue #if spec.spec_cln < 2 or spec.spec_cln > 3: # num_skipped += 1 # print("%i, %i, %i is not a galaxy spectrum" % (plate[i], mjd[i], fiber[i])) # continue spec_cln[j] = spec.spec_cln lineindex_cln[j], (log_NII_Ha[j], log_OIII_Hb[j])\ = spec.lineratio_index() z[j] = spec.z zerr[j] = spec.zerr spectra[j] = spec_rebin.spectrum mask[j] = spec_rebin.compute_mask(0.5, 5) assert((mask[j] == 0).any()) specerr[j] = spec_rebin.error plates[j] = plate[i] mjds[j] = mjd[i] fibers[j] = fiber[i] j += 1 sys.stdout.write('\n') N = j print(" %i spectra skipped" % num_skipped) print(" %i spectra processed" % N) print("saving to %s" % outfile) np.savez(outfile, spectra=spectra[:N], mask=mask[:N], spec_err=specerr[:N], coeff0=new_coeff0, coeff1=new_coeff1, spec_cln=spec_cln[:N], lineindex_cln=lineindex_cln[:N], log_NII_Ha=log_NII_Ha[:N], log_OIII_Hb=log_OIII_Hb[:N], z=z[:N], zerr=zerr[:N], plate=plates[:N], mjd=mjds[:N], fiber=fibers[:N])