def make_datasets(sightlines, kernel=kernel, REST_RANGE=REST_RANGE, v=best_v['all'], output='MOCK_spectra/processed/datasets.npy', validate=True): """ Generate training set or validation set for DESI. Parameters: ----------------------------------------------- sightlines: list of 'dla_cnn.data_model.Sightline' object, the sightlines should be preprocessed. validate: bool Returns ----------------------------------------------- dataset:dict, the training set contains flux and 3 labels, the validation set contains flux, lam, 3 labels and DLAs' data. """ dataset = {} for sightline in sightlines: wavelength_dlas = [dla.central_wavelength for dla in sightline.dlas] coldensity_dlas = [dla.col_density for dla in sightline.dlas] label_sightline(sightline, kernel=kernel, REST_RANGE=REST_RANGE) data_split = split_sightline_into_samples(sightline, REST_RANGE=REST_RANGE, kernel=kernel, v=v) if validate: flux = np.vstack([data_split[0]]) labels_classifier = np.hstack([data_split[1]]) labels_offset = np.hstack([data_split[2]]) col_density = np.hstack([data_split[3]]) lam = np.vstack([data_split[4]]) #no need lam dataset[sightline.id] = { 'FLUX': flux, 'labels_classifier': labels_classifier, 'labels_offset': labels_offset, 'col_density': col_density, 'wavelength_dlas': wavelength_dlas, 'coldensity_dlas': coldensity_dlas } else: sample_masks = select_samples_50p_pos_neg(sightline, kernel=kernel) if sample_masks != []: flux = np.vstack([data_split[0][m] for m in sample_masks]) labels_classifier = np.hstack( [data_split[1][m] for m in sample_masks]) labels_offset = np.hstack( [data_split[2][m] for m in sample_masks]) col_density = np.hstack( [data_split[3][m] for m in sample_masks]) dataset[sightline.id] = { 'FLUX': flux, 'labels_classifier': labels_classifier, 'labels_offset': labels_offset, 'col_density': col_density } np.save(output, dataset) return dataset
def is_lyb(self, peakix): """ Returns true if the given peakix (from peaks_ixs) is the ly-b of another DLA in the set peaks_ixs in prediction :param peakix: :return: boolean """ assert self.prediction is not None and peakix in self.prediction.peaks_ixs data_split = split_sightline_into_samples(self) lam_analyse = data_split[5] lambda_higher = (lam_analyse[peakix]) / (1025.722 / 1215.67 ) #找这个peak对应的dla # An array of how close each peak is to beign the ly-b of peakix in spectrum reference frame peak_difference_spectrum = np.abs( lam_analyse[self.prediction.peaks_ixs] - lambda_higher) nearest_peak_ix = np.argmin(peak_difference_spectrum) #找距离这个dla最近的peak # get the column density of the identfied nearest peak算这两个的nhi _, potential_lya_nhi, _, _ = \ self.prediction.get_coldensity_for_peak(self.prediction.peaks_ixs[nearest_peak_ix]) _, potential_lyb_nhi, _, _ = \ self.prediction.get_coldensity_for_peak(peakix) # Validations: check that the nearest peak is close enough to match # sanity check that the LyB is at least 0.3 less than the DLA is_nearest_peak_within_range = peak_difference_spectrum[ nearest_peak_ix] <= 15 #两者距离小于15 is_nearest_peak_larger_coldensity = potential_lyb_nhi < potential_lya_nhi - 0.3 #nhi差距0.3以上? return is_nearest_peak_within_range and is_nearest_peak_larger_coldensity #true为lyb,false为lya
def make_smoothdatasets(sightlines, validate=True): """ Generate smoothed training set or validation set for DESI. Parameters: ----------------------------------------------- sightlines: list of 'dla_cnn.data_model.Sightline' object, the sightlines should be preprocessed. validate: bool Returns ----------------------------------------------- dataset:dict, the training set contains smoothed flux and 3 labels, the validation set contains smoothed flux, lam, 3 labels and DLAs' data. """ dataset = {} for sightline in sightlines: wavelength_dlas = [dla.central_wavelength for dla in sightline.dlas] coldensity_dlas = [dla.col_density for dla in sightline.dlas] label_sightline(sightline) data_split = split_sightline_into_samples(sightline) if validate: flux = np.vstack([data_split[0]]) labels_classifier = np.hstack([data_split[1]]) labels_offset = np.hstack([data_split[2]]) col_density = np.hstack([data_split[3]]) lam = np.vstack([data_split[4]]) flux_matrix = smooth_flux(flux) dataset[sightline.id] = { 'FLUXMATRIX': flux_matrix, 'lam': lam, 'labels_classifier': labels_classifier, 'labels_offset': labels_offset, 'col_density': col_density, 'wavelength_dlas': wavelength_dlas, 'coldensity_dlas': coldensity_dlas } else: sample_masks = select_samples_50p_pos_neg(sightline) if sample_masks != []: flux = np.vstack([data_split[0][m] for m in sample_masks]) labels_classifier = np.hstack( [data_split[1][m] for m in sample_masks]) labels_offset = np.hstack( [data_split[2][m] for m in sample_masks]) col_density = np.hstack( [data_split[3][m] for m in sample_masks]) flux_matrix = smooth_flux(flux) dataset[sightline.id] = { 'FLUXMATRIX': flux_matrix, 'labels_classifier': labels_classifier, 'labels_offset': labels_offset, 'col_density': col_density } return dataset
def analyze_pred(sightline,pred,conf, offset, coldensity,PEAK_THRESH): for i in range(0,len(pred)):#删去pred为0处的offset,防止影响offset hist的判断 if (pred[i]==0):#or(real_classifier[i]==-1): offset[i]=0 sightline.prediction = Prediction(loc_pred=pred, loc_conf=conf, offsets=offset, density_data=coldensity) compute_peaks(sightline,PEAK_THRESH) sightline.prediction.smoothed_loc_conf() data_split=split_sightline_into_samples(sightline) lam_analyse=data_split[5] #generate absorbers catalog for every sightline dla_tbl = Table(names=('TARGET_RA','TARGET_DEC', 'ZQSO','Z','TARGETID','S/N','DLAID','NHI','DLA_CONFIDENCE','NHI_STD','ABSORBER_TYPE'),dtype=('float','float','float','float','int','float','str','float','float','float','str'),meta={'EXTNAME': 'DLACAT'}) for jj in range(0,len(sightline.prediction.peaks_ixs)): peak=sightline.prediction.peaks_ixs[jj] peak_lam_spectrum = lam_analyse[peak] z_dla = float(peak_lam_spectrum) / 1215.67 - 1 peak_lam_rest=float(peak_lam_spectrum)/(1+sightline.z_qso) _, mean_col_density_prediction, std_col_density_prediction, bias_correction = sightline.prediction.get_coldensity_for_peak(peak) absorber_type = "DLA" if mean_col_density_prediction >= 20.3 else "LYB" if sightline.is_lyb(peak) else "SUBDLA" dla_tbl.add_row((sightline.ra,sightline.dec,sightline.z_qso,float(z_dla),sightline.id,sightline.s2n,str(sightline.id)+'00'+str(jj),float(mean_col_density_prediction),min(1.0,float(sightline.prediction.offset_conv_sum[peak])),float(std_col_density_prediction),absorber_type)) return dla_tbl