def get_snr(wav, flux): spectrum1d_wav = wav * u.AA spectrum1d_flux = flux * u.erg / (u.cm * u.cm * u.s * u.AA) spec1d = Spectrum1D(spectral_axis=spectrum1d_wav, flux=spectrum1d_flux) return snr_derived(spec1d)
def snr_spec(flux, wl, n): sample = len(wl) noise = n * np.asarray(random.sample(range(0, len(wl)), sample)) / len(wl) unc = StdDevUncertainty(noise) fluxn = [[] for i in range(len(wl))] i = 0 for inc in unc: fluxn[i] = flux[i] + noise[i] i = i + 1 spec1d = Spectrum1D(spectral_axis=wl * u.AA, flux=fluxn * u.Jy, uncertainty=unc) #ax = plt.subplots()[1] #ax.plot(spec1d.spectral_axis, spec1d.flux) #ax.set_xlim([3520,3550]) sn1 = snr(spec1d, SpectralRegion(3070 * u.AA, 3090 * u.AA)) sn = snr_derived(spec1d, SpectralRegion(3070 * u.AA, 3090 * u.AA)) #print('SNR1: '+ str(snr(spec1d)), SpectralRegion(3500*u.AA, 3550*u.AA)) print('SNR: ' + str(sn1)) #print('SNR: '+ str(sn)) #print('FWHM:'+str(fwhm(spec1d))) #0.042 = snr 50 # try: return fluxn except: raise Exception('Check S/N function')
def Load_Files(file_1, file_2, N_sample, objts, classification=False): print('INFO:') #hdul = fitsio.FITS(file_1) # Open file 1 -- 'truth_DR12Q.fits' #info=hdul.info() # File info hdul = fits.open(file_1, mode='denywrite') #data=hdul[1].read() # Database of spectra with human-expert classifications data = hdul[1].data #print('The file {} have {} objects. \n'.format(file_1,data.shape[0])) print('INFO:') # Reading data from data_dr12.fits. This file had the spectra from data dr12. #hdul_2 = fitsio.FITS(file_2) # Open file 2 -- 'data_dr12.fits' #info2=hdul_2.info() # File info #data2=hdul_2[1].read() # Database of spectra #spectra=hdul_2[0].read() # Spectrum of each object hdul_2 = fits.open(file_2, mode='denywrite') data2 = hdul_2[1].data # Database of spectra spectra = hdul_2[0].data # Spectrum of each object #print('The file {} have {} spectra. \n'.format(file_2,spectra.shape[0])) # Subset of PLATE parameters of both data data_PLATE_1 = data['PLATE'] data_PLATE_2 = data2['PLATE'] # Subset of MJD parameters of both data data_MJD_1 = data['MJD'] data_MJD_2 = data2['MJD'] # Subset of FIBERID parameters of both data data_FIBERID_1 = data['FIBERID'] data_FIBERID_2 = data2['FIBERID'] data_ID_1 = data['THING_ID'] data_ID_2 = data2['TARGETID'] objts = np.asarray(objts) # The column 'CLASS_PERSON' have a class identifier for each spectrum: STARS=1, GALAXY=4, QSO=3 and QSO_BAL=30. C_P = data['CLASS_PERSON'] #Class Person column STAR = C_P[C_P == 1] # objects classified as stars GALAXY = C_P[C_P == 4] # objects classified as galaxies QSO = C_P[C_P == 3] # objects classified as QSO (Quasars) QSO_BAL = C_P[ C_P == 30] # objects classified as QSO BAL (Quasars with Broad Absortions Lines) N_C = C_P[C_P != 30] N_C = N_C[N_C != 3] N_C = N_C[N_C != 1] N_C = N_C[N_C != 4] # objects wrong classified print('INFO: There is available') print('-->Star:', STAR.shape[0]) print('-->Galaxy:', GALAXY.shape[0]) print('-->QSO:', QSO.shape[0]) print('-->QSO BAL:', QSO_BAL.shape[0]) print('-->NN: {}\n'.format(N_C.shape[0])) # I create two DataFrame for Superset_DR12Q and data_dr12 with only three parameters data1 = { 'PLATE': data_PLATE_1, 'MJD': data_MJD_1, 'FIBERID': data_FIBERID_1, 'ID': data_ID_1 } data1 = pd.DataFrame(data=data1) data2 = { 'PLATE': data_PLATE_2, 'MJD': data_MJD_2, 'FIBERID': data_FIBERID_2, 'ID': data_ID_2 } data2 = pd.DataFrame(data=data2) # I convert all objects in both set to string chain in orden to combine them as one new ID. data1['PLATE'] = data1['PLATE'].astype(str) data1['MJD'] = data1['MJD'].astype(str) data1['FIBERID'] = data1['FIBERID'].astype(str) data1['PM'] = data1['MJD'].str.cat(data1['FIBERID'], sep="-") data1['NEWID'] = data1['PLATE'].str.cat(data1['PM'], sep="-") data_1 = data1.drop(columns=['PLATE', 'MJD', 'FIBERID', 'ID', 'PM']).values data2['PLATE'] = data2['PLATE'].astype(str) data2['MJD'] = data2['MJD'].astype(str) data2['FIBERID'] = data2['FIBERID'].astype(str) data2['PM'] = data2['MJD'].str.cat(data2['FIBERID'], sep="-") data2['NEWID'] = data2['PLATE'].str.cat(data2['PM'], sep="-") data_2 = data2.drop(columns=['PLATE', 'MJD', 'FIBERID', 'ID', 'PM'] ).values # New set of database 2 with new ID's # With the routine of numpy intersect1d, I find the intersections elements in both sets. This elements data_CO = np.array(np.intersect1d(data_1, data_2, return_indices=True)) data_CO_objects = data_CO[ 0] # The unique new ID of each element in both sets data_CO_ind1 = data_CO[ 1] # Indices of intersected elements from the original data 1 (Superset_DR12Q.fits) data_CO_ind2 = data_CO[ 2] # Indices of intersected elements form the original data 2 (data_dr12.fits) print('INFO:') print('I find {} objects with spectra from DR12 \n'.format( len(data_CO_objects))) indi = {'ind1': data_CO_ind1, 'ind2': data_CO_ind2} ind = pd.DataFrame(data=indi, index=data_CO_ind1) cp = np.array(data['CLASS_PERSON'], dtype=float) z = np.array(data['Z_VI'], dtype=float) zc = np.array(data['Z_CONF_PERSON'], dtype=float) bal = np.array(data['BAL_FLAG_VI'], dtype=float) bi = np.array(data['BI_CIV'], dtype=float) d = { 'CLASS_PERSON': cp, 'Z_VI': z, 'Z_CONF_PERSON': zc, 'BAL_FLAG_VI': bal, 'BI_CIV': bi } data_0 = pd.DataFrame(data=d) obj = data_0.loc[data_CO_ind1] if (classification != True): if (objts[0] == 'QSO'): qsos = obj.loc[obj['CLASS_PERSON'] == 3] qsos = qsos.loc[qsos['Z_CONF_PERSON'] == 3] sample_objects = qsos.sample(n=int(N_sample), weights='CLASS_PERSON', random_state=5) indi = np.array(sample_objects.index) indi1 = ind.loc[indi].values elif (objts[0] == 'QSO_BAL'): qsos_bal = obj.loc[obj['CLASS_PERSON'] == 30] qsos_bal = qsos_bal.loc[qsos_bal['Z_CONF_PERSON'] == 3] sample_objects = qsos_bal.sample(n=int(N_sample), weights='CLASS_PERSON', random_state=5) indi = np.array(sample_objects.index) indi1 = ind.loc[indi].values elif (len(objts) == 2): qsos = obj.loc[obj['CLASS_PERSON'] == 3] qsos = qsos.loc[qsos['Z_CONF_PERSON'] == 3] qsos_bal = obj.loc[obj['CLASS_PERSON'] == 30] qsos_bal = qsos_bal.loc[qsos_bal['Z_CONF_PERSON'] == 3] sample_qso = qsos.sample(n=int(N_sample / 2), weights='CLASS_PERSON', random_state=5) sample_qso_bal = qsos_bal.sample(n=int(N_sample / 2), weights='CLASS_PERSON', random_state=5) sample_objects = pd.concat([sample_qso, sample_qso_bal]) ind_qso = np.array(sample_qso.index) ind_qso_bal = np.array(sample_qso_bal.index) indi = np.concatenate((ind_qso, ind_qso_bal), axis=None) indi1 = ind.loc[indi].values spectra_ = np.zeros((N_sample, 886)) j = 0 kernel_size = 5 flux_threshold = 1.1 parameters = np.zeros( (N_sample, 7) ) #Number of lines // FHWM of max emission line // EW of max emission line // Spectrum Mean // Spectrum STDV // Spectrum Flux Integral // Spectrum SNR for i in indi: k = indi1[j, 1] x = np.linspace(3600, 10500, 443) zero_spectrum = spectra[k, :443] spectrum = Spectrum1D(flux=zero_spectrum * u.Jy, spectral_axis=x * u.AA) #Continuum fit and gaussian smooth g1_fit = fit_generic_continuum(spectrum) y_continuum_fitted = g1_fit(x * u.AA) spec_nw_2 = spectrum / y_continuum_fitted spectrum_smooth = gaussian_smooth(spec_nw_2, kernel_size) #Number of lines lines_1 = find_lines_derivative(spectrum_smooth, flux_threshold=flux_threshold) l = lines_1[lines_1['line_type'] == 'emission'] number_lines = l['line_center_index'].shape[0] parameters[j, 0] = number_lines #FWHM parameters[j, 1] = fwhm(spectrum_smooth).value #EW parameters[j, 2] = equivalent_width(spectrum_smooth).value #Spectrum Mean parameters[j, 3] = np.mean(spectrum_smooth.flux) #Spectrum STDV parameters[j, 4] = np.std(spectrum_smooth.flux) #Spectrum Flux Integral parameters[j, 5] = line_flux(spectrum_smooth).value #Spectrum SNR parameters[j, 6] = snr_derived(spectrum_smooth).value j += 1 d = { 'Lines_Number': parameters[:, 0], 'FHWM': parameters[:, 1], 'EW': parameters[:, 2], 'Mean': parameters[:, 3], 'STDV': parameters[:, 4], 'STDV': parameters[:, 4], 'Spectrum_Flux': parameters[:, 5], 'SNR': parameters[:, 6] } parameters = pd.DataFrame(data=d) #X=spectra_.values #mean_flx= np.ma.average(X[:,:443],axis=1) #ll=(X[:,:443]-mean_flx.reshape(-1,1))**2 #aveflux=np.ma.average(ll, axis=1) #sflux = np.sqrt(aveflux) #X = (X[:,:443]-mean_flx.reshape(-1,1))/sflux.reshape(-1,1) y = sample_objects['Z_VI'] y = np.array(y, dtype=float) #y_max=np.max(y) #y=y/y_max return parameters, y stars = obj.loc[obj['CLASS_PERSON'] == 1] galaxies = obj.loc[obj['CLASS_PERSON'] == 4] qsos = obj.loc[obj['CLASS_PERSON'] == 3] qsos_bal = obj.loc[obj['CLASS_PERSON'] == 30] sample_star = stars.sample(n=int(N_sample / 4), weights='CLASS_PERSON', random_state=5) sample_galaxy = galaxies.sample(n=int(N_sample / 4), weights='CLASS_PERSON', random_state=5) sample_qso = qsos.sample(n=int(N_sample / 4), weights='CLASS_PERSON', random_state=5) sample_qso_bal = qsos_bal.sample(n=int(N_sample / 4), weights='CLASS_PERSON', random_state=5) sample_objects = pd.concat( [sample_star, sample_galaxy, sample_qso, sample_qso_bal]) ind_star = np.array(sample_star.index) ind_galaxy = np.array(sample_galaxy.index) ind_qso = np.array(sample_qso.index) ind_qso_bal = np.array(sample_qso_bal.index) indi = np.concatenate((ind_star, ind_galaxy, ind_qso, ind_qso_bal), axis=None) indi1 = ind.loc[indi].values spectra_ = np.zeros((N_sample, 886)) j = 0 for i in indi: k = indi1[j, 1] spectra_[j, :] = spectra[k, :] j = j + 1 spectra_ = pd.DataFrame(spectra_) X = spectra_.values #Renormalize spectra mean_flx = np.ma.average(X[:, :443], axis=1) ll = (X[:, :443] - mean_flx.reshape(-1, 1))**2 aveflux = np.ma.average(ll, axis=1) sflux = np.sqrt(aveflux) X = (X[:, :443] - mean_flx.reshape(-1, 1)) / sflux.reshape(-1, 1) y = sample_objects['CLASS_PERSON'] y = y.replace([1, 4, 3, 30], [0, 1, 2, 3]).values y = np.array(y, dtype=float) return X, y