예제 #1
0
def make_datasets(sightlines,
                  kernel=kernel,
                  REST_RANGE=REST_RANGE,
                  v=best_v['all'],
                  output='MOCK_spectra/processed/datasets.npy',
                  validate=True):
    """
    Generate training set or validation set for DESI.
    
    Parameters:
    -----------------------------------------------
    sightlines: list of 'dla_cnn.data_model.Sightline' object, the sightlines should be preprocessed.
    validate: bool
    
    Returns
    -----------------------------------------------
    dataset:dict, the training set contains flux and 3 labels, the validation set contains flux, lam, 3 labels and DLAs' data.
    
    """
    dataset = {}
    for sightline in sightlines:
        wavelength_dlas = [dla.central_wavelength for dla in sightline.dlas]
        coldensity_dlas = [dla.col_density for dla in sightline.dlas]
        label_sightline(sightline, kernel=kernel, REST_RANGE=REST_RANGE)
        data_split = split_sightline_into_samples(sightline,
                                                  REST_RANGE=REST_RANGE,
                                                  kernel=kernel,
                                                  v=v)
        if validate:
            flux = np.vstack([data_split[0]])
            labels_classifier = np.hstack([data_split[1]])
            labels_offset = np.hstack([data_split[2]])
            col_density = np.hstack([data_split[3]])
            lam = np.vstack([data_split[4]])  #no need lam
            dataset[sightline.id] = {
                'FLUX': flux,
                'labels_classifier': labels_classifier,
                'labels_offset': labels_offset,
                'col_density': col_density,
                'wavelength_dlas': wavelength_dlas,
                'coldensity_dlas': coldensity_dlas
            }
        else:
            sample_masks = select_samples_50p_pos_neg(sightline, kernel=kernel)
            if sample_masks != []:
                flux = np.vstack([data_split[0][m] for m in sample_masks])
                labels_classifier = np.hstack(
                    [data_split[1][m] for m in sample_masks])
                labels_offset = np.hstack(
                    [data_split[2][m] for m in sample_masks])
                col_density = np.hstack(
                    [data_split[3][m] for m in sample_masks])
            dataset[sightline.id] = {
                'FLUX': flux,
                'labels_classifier': labels_classifier,
                'labels_offset': labels_offset,
                'col_density': col_density
            }
    np.save(output, dataset)
    return dataset
예제 #2
0
    def is_lyb(self, peakix):
        """
        Returns true if the given peakix (from peaks_ixs) is the ly-b of another DLA in the set peaks_ixs in prediction
        :param peakix:
        :return: boolean
        """
        assert self.prediction is not None and peakix in self.prediction.peaks_ixs

        data_split = split_sightline_into_samples(self)
        lam_analyse = data_split[5]
        lambda_higher = (lam_analyse[peakix]) / (1025.722 / 1215.67
                                                 )  #找这个peak对应的dla

        # An array of how close each peak is to beign the ly-b of peakix in spectrum reference frame
        peak_difference_spectrum = np.abs(
            lam_analyse[self.prediction.peaks_ixs] - lambda_higher)
        nearest_peak_ix = np.argmin(peak_difference_spectrum)  #找距离这个dla最近的peak

        # get the column density of the identfied nearest peak算这两个的nhi
        _, potential_lya_nhi, _, _ = \
            self.prediction.get_coldensity_for_peak(self.prediction.peaks_ixs[nearest_peak_ix])
        _, potential_lyb_nhi, _, _ = \
            self.prediction.get_coldensity_for_peak(peakix)

        # Validations: check that the nearest peak is close enough to match
        #              sanity check that the LyB is at least 0.3 less than the DLA
        is_nearest_peak_within_range = peak_difference_spectrum[
            nearest_peak_ix] <= 15  #两者距离小于15
        is_nearest_peak_larger_coldensity = potential_lyb_nhi < potential_lya_nhi - 0.3  #nhi差距0.3以上?

        return is_nearest_peak_within_range and is_nearest_peak_larger_coldensity  #true为lyb,false为lya
예제 #3
0
def make_smoothdatasets(sightlines, validate=True):
    """
    Generate smoothed training set or validation set for DESI.
    
    Parameters:
    -----------------------------------------------
    sightlines: list of 'dla_cnn.data_model.Sightline' object, the sightlines should be preprocessed.
    validate: bool
    
    Returns
    -----------------------------------------------
    dataset:dict, the training set contains smoothed flux and 3 labels, the validation set contains smoothed flux, lam, 3 labels and DLAs' data.
    
    """
    dataset = {}
    for sightline in sightlines:
        wavelength_dlas = [dla.central_wavelength for dla in sightline.dlas]
        coldensity_dlas = [dla.col_density for dla in sightline.dlas]
        label_sightline(sightline)
        data_split = split_sightline_into_samples(sightline)
        if validate:
            flux = np.vstack([data_split[0]])
            labels_classifier = np.hstack([data_split[1]])
            labels_offset = np.hstack([data_split[2]])
            col_density = np.hstack([data_split[3]])
            lam = np.vstack([data_split[4]])
            flux_matrix = smooth_flux(flux)
            dataset[sightline.id] = {
                'FLUXMATRIX': flux_matrix,
                'lam': lam,
                'labels_classifier': labels_classifier,
                'labels_offset': labels_offset,
                'col_density': col_density,
                'wavelength_dlas': wavelength_dlas,
                'coldensity_dlas': coldensity_dlas
            }
        else:
            sample_masks = select_samples_50p_pos_neg(sightline)
            if sample_masks != []:
                flux = np.vstack([data_split[0][m] for m in sample_masks])
                labels_classifier = np.hstack(
                    [data_split[1][m] for m in sample_masks])
                labels_offset = np.hstack(
                    [data_split[2][m] for m in sample_masks])
                col_density = np.hstack(
                    [data_split[3][m] for m in sample_masks])
                flux_matrix = smooth_flux(flux)
                dataset[sightline.id] = {
                    'FLUXMATRIX': flux_matrix,
                    'labels_classifier': labels_classifier,
                    'labels_offset': labels_offset,
                    'col_density': col_density
                }
    return dataset
예제 #4
0
def analyze_pred(sightline,pred,conf, offset, coldensity,PEAK_THRESH):
    for i in range(0,len(pred)):#删去pred为0处的offset,防止影响offset hist的判断
        if (pred[i]==0):#or(real_classifier[i]==-1):
            offset[i]=0
    sightline.prediction = Prediction(loc_pred=pred, loc_conf=conf, offsets=offset, density_data=coldensity)
    compute_peaks(sightline,PEAK_THRESH)
    sightline.prediction.smoothed_loc_conf()
    data_split=split_sightline_into_samples(sightline)
    lam_analyse=data_split[5]
    
    #generate absorbers catalog for every sightline
    dla_tbl = Table(names=('TARGET_RA','TARGET_DEC', 'ZQSO','Z','TARGETID','S/N','DLAID','NHI','DLA_CONFIDENCE','NHI_STD','ABSORBER_TYPE'),dtype=('float','float','float','float','int','float','str','float','float','float','str'),meta={'EXTNAME': 'DLACAT'})
    for jj in range(0,len(sightline.prediction.peaks_ixs)):
        peak=sightline.prediction.peaks_ixs[jj]
        peak_lam_spectrum = lam_analyse[peak]
        z_dla = float(peak_lam_spectrum) / 1215.67 - 1
        peak_lam_rest=float(peak_lam_spectrum)/(1+sightline.z_qso)
        _, mean_col_density_prediction, std_col_density_prediction, bias_correction =             sightline.prediction.get_coldensity_for_peak(peak)

        absorber_type =  "DLA" if mean_col_density_prediction >= 20.3 else "LYB" if sightline.is_lyb(peak) else "SUBDLA"
        dla_tbl.add_row((sightline.ra,sightline.dec,sightline.z_qso,float(z_dla),sightline.id,sightline.s2n,str(sightline.id)+'00'+str(jj),float(mean_col_density_prediction),min(1.0,float(sightline.prediction.offset_conv_sum[peak])),float(std_col_density_prediction),absorber_type))
        
    return dla_tbl