Exemple #1
0
    def is_lyb(self, peakix):
        """
        Returns true if the given peakix (from peaks_ixs) is the ly-b of another DLA in the set peaks_ixs in prediction
        :param peakix:
        :return: boolean
        """
        assert self.prediction is not None and peakix in self.prediction.peaks_ixs

        lam, lam_rest, ix_dla_range = get_lam_data(self.loglam, self.z_qso)
        kernelrangepx = 200
        cut=((np.nonzero(ix_dla_range)[0])>=kernelrangepx)&((np.nonzero(ix_dla_range)[0])<=(len(lam)-kernelrangepx-1))   
        lam_analyse=lam[ix_dla_range][cut]
        lambda_higher = (lam_analyse[peakix]) / (1025.722/1215.67)#找这个peak对应的dla

        # An array of how close each peak is to beign the ly-b of peakix in spectrum reference frame
        peak_difference_spectrum = np.abs(lam_analyse[self.prediction.peaks_ixs] - lambda_higher)
        nearest_peak_ix = np.argmin(peak_difference_spectrum)#找距离这个dla最近的peak

        # get the column density of the identfied nearest peak算这两个的nhi
        _, potential_lya_nhi, _, _ = \
            self.prediction.get_coldensity_for_peak(self.prediction.peaks_ixs[nearest_peak_ix])
        _, potential_lyb_nhi, _, _ = \
            self.prediction.get_coldensity_for_peak(peakix)

        # Validations: check that the nearest peak is close enough to match
        #              sanity check that the LyB is at least 0.3 less than the DLA
        is_nearest_peak_within_range = peak_difference_spectrum[nearest_peak_ix] <= 15#两者距离小于15
        is_nearest_peak_larger_coldensity = potential_lyb_nhi < potential_lya_nhi - 0.3#nhi差距0.3以上?

        return is_nearest_peak_within_range and is_nearest_peak_larger_coldensity#true为lyb,false为lya
Exemple #2
0
def split_sightline_into_samples(sightline, REST_RANGE=REST_RANGE, kernel=kernel, v=best_v['all']):
    """
    Split the sightline into a series of snippets, each with length kernel

    Parameters
    ----------
    sightline: dla_cnn.data_model.Sightline
    REST_RANGE: list
    kernel: int, optional

    Returns
    -------

    """
    lam, lam_rest, ix_dla_range = get_lam_data(sightline.loglam, sightline.z_qso, REST_RANGE)
    kernelrangepx = int(kernel/2) #200
    #samplerangepx = int(kernel*pos_sample_kernel_percent/2) #60
    #padding the sightline:
    flux_padded,lam_padded,pixel_num_left=pad_sightline(sightline,lam,lam_rest,ix_dla_range,kernelrangepx,v=v)
     
    #ix_dlas = [(np.abs(lam[ix_dla_range]-dla.central_wavelength).argmin()) for dla in sightline.dlas]
    #coldensity_dlas = [dla.col_density for dla in sightline.dlas]       # column densities matching ix_dlas

    # FLUXES - Produce a 1748x400 matrix of flux values
    #fluxes_matrix = np.vstack(map(lambda x:x[0][x[1]-kernelrangepx:x[1]+kernelrangepx],zip(itertools.repeat(sightline.flux), np.nonzero(ix_dla_range)[0])))
    fluxes_matrix = np.vstack(map(lambda x:x[0][x[1]-kernelrangepx:x[1]+kernelrangepx],zip(itertools.repeat(flux_padded), np.nonzero(ix_dla_range)[0]+pixel_num_left)))
    lam_matrix = np.vstack(map(lambda x:x[0][x[1]-kernelrangepx:x[1]+kernelrangepx],zip(itertools.repeat(lam_padded), np.nonzero(ix_dla_range)[0]+pixel_num_left)))
    #using cut will lose side information,so we use padding instead of cutting 
    #fluxes_matrix = np.vstack(map(lambda x:x[0][x[1]-kernelrangepx:x[1]+kernelrangepx],zip(itertools.repeat(sightline.flux), np.nonzero(ix_dla_range)[0][cut])))
    #lam_matrix = np.vstack(map(lambda x:x[0][x[1]-kernelrangepx:x[1]+kernelrangepx],zip(itertools.repeat(lam), np.nonzero(ix_dla_range)[0][cut])))
    #the wavelength and flux array we input:
    input_lam=lam_padded[np.nonzero(ix_dla_range)[0]+pixel_num_left]
    input_flux=flux_padded[np.nonzero(ix_dla_range)[0]+pixel_num_left]
    # Return
    return fluxes_matrix, sightline.classification, sightline.offsets, sightline.column_density,lam_matrix,input_lam,input_flux
def analyze_pred(sightline, pred, conf, offset, coldensity, PEAK_THRESH):
    for i in range(0, len(pred)):  #exclude offset when pred=0
        if (pred[i] == 0):
            offset[i] = 0
    sightline.prediction = Prediction(loc_pred=pred,
                                      loc_conf=conf,
                                      offsets=offset,
                                      density_data=coldensity)
    # get prediction for each sightline
    compute_peaks(sightline, PEAK_THRESH)
    sightline.prediction.smoothed_loc_conf()
    lam, lam_rest, ix_dla_range = get_lam_data(sightline.loglam,
                                               sightline.z_qso)
    kernelrangepx = 200
    cut = ((np.nonzero(ix_dla_range)[0]) >= kernelrangepx) & (
        (np.nonzero(ix_dla_range)[0]) <= (len(lam) - kernelrangepx - 1))
    #get input lam array
    lam_analyse = lam[ix_dla_range][cut]
    dla_sub_lyb = []
    for peak in sightline.prediction.peaks_ixs:
        peak_lam_rest = lam_rest[ix_dla_range][cut][peak]
        peak_lam_spectrum = lam_analyse[peak]
        z_dla = float(peak_lam_spectrum) / 1215.67 - 1
        _, mean_col_density_prediction, std_col_density_prediction, bias_correction = sightline.prediction.get_coldensity_for_peak(
            peak)

        absorber_type = "DLA" if mean_col_density_prediction >= 20.3 else "LYB" if sightline.is_lyb(
            peak) else "SUBDLA"

        abs_dict = {
            'rest':
            float(peak_lam_rest),
            'spectrum':
            float(peak_lam_spectrum),
            'z_dla':
            float(z_dla),
            'dla_confidence':
            min(1.0, float(sightline.prediction.offset_conv_sum[peak])),
            'column_density':
            float(mean_col_density_prediction),
            'std_column_density':
            float(std_col_density_prediction),
            'column_density_bias_adjust':
            float(bias_correction),
            'type':
            absorber_type
        }
        dla_sub_lyb.append(abs_dict)
    return dla_sub_lyb
def select_samples_50p_pos_neg(sightline, kernel=kernel):
    """
    For a given sightline, generate the indices for DLAs and for without
    Split 50/50 to have equal representation

    Parameters
    ----------
    classification: np.ndarray
        Array of classification values.  1=DLA; 0=Not; -1=not analyzed

    Returns
    -------
    idx: np.ndarray
        positive + negative indices

    """
    #classification = data[1]
    lam, lam_rest, ix_dla_range = get_lam_data(sightline.loglam,
                                               sightline.z_qso)
    kernelrangepx = int(kernel / 2)
    cut = ((np.nonzero(ix_dla_range)[0]) >= kernelrangepx) & (
        (np.nonzero(ix_dla_range)[0]) <= (len(lam) - kernelrangepx - 1))
    newclassification = sightline.classification[cut]
    num_pos = np.sum(newclassification == 1, dtype=np.float64)
    num_neg = np.sum(newclassification == 0, dtype=np.float64)
    n_samples = int(min(num_pos, num_neg))

    r = np.random.permutation(len(newclassification))

    pos_ixs = r[newclassification[r] == 1][0:n_samples]
    neg_ixs = r[newclassification[r] == 0][0:n_samples]
    # num_total = data[0].shape[0]
    # ratio_neg = num_pos / num_neg

    # pos_mask = classification == 1      # Take all positive samples

    # neg_ixs_by_ratio = np.linspace(1,num_total-1,round(ratio_neg*num_total), dtype=np.int32) # get all samples by ratio
    # neg_mask = np.zeros((num_total),dtype=np.bool) # create a 0 vector of negative samples
    # neg_mask[neg_ixs_by_ratio] = True # set the vector to positives, selecting for the appropriate ratio across the whole sightline
    # neg_mask[pos_mask] = False # remove previously positive samples from the set
    # neg_mask[classification == -1] = False # remove border samples from the set, what remains is still in the right ratio

    # return pos_mask | neg_mask
    #return np.hstack((pos_ixs,neg_ixs))
    return np.hstack(pos_ixs)
def split_sightline_into_samples(sightline,
                                 REST_RANGE=REST_RANGE,
                                 kernel=kernel):
    """
    Split the sightline into a series of snippets, each with length kernel

    Parameters
    ----------
    sightline: dla_cnn.data_model.Sightline
    REST_RANGE: list
    kernel: int, optional

    Returns
    -------

    """
    lam, lam_rest, ix_dla_range = get_lam_data(sightline.loglam,
                                               sightline.z_qso, REST_RANGE)
    kernelrangepx = int(kernel / 2)  #200
    #samplerangepx = int(kernel*pos_sample_kernel_percent/2)
    #consider boundaries
    cut = ((np.nonzero(ix_dla_range)[0]) >= kernelrangepx) & (
        (np.nonzero(ix_dla_range)[0]) <= (len(lam) - kernelrangepx - 1))

    #ix_dlas = [(np.abs(lam[ix_dla_range]-dla.central_wavelength).argmin()) for dla in sightline.dlas]
    #coldensity_dlas = [dla.col_density for dla in sightline.dlas]       # column densities matching ix_dlas

    # FLUXES - Produce a 400 matrix of flux values
    fluxes_matrix = np.vstack(
        map(
            lambda x: x[0][x[1] - kernelrangepx:x[1] + kernelrangepx],
            zip(itertools.repeat(sightline.flux),
                np.nonzero(ix_dla_range)[0][cut])))
    lam_matrix = np.vstack(
        map(lambda x: x[0][x[1] - kernelrangepx:x[1] + kernelrangepx],
            zip(itertools.repeat(lam),
                np.nonzero(ix_dla_range)[0][cut])))
    # Return
    return fluxes_matrix, sightline.classification[cut], sightline.offsets[
        cut], sightline.column_density[cut], lam_matrix
Exemple #6
0
def draw_sightline(sightline, pred, pred_abs):
    lam, lam_rest, ix_dla_range = get_lam_data(sightline.loglam,
                                               sightline.z_qso)
    kernelrangepx = 200
    #cut=((np.nonzero(ix_dla_range)[0])>=kernelrangepx)&((np.nonzero(ix_dla_range)[0])<=(len(lam)-kernelrangepx-1))
    lam_analyse = lam[ix_dla_range]  #[cut]
    flux_analyse = sightline.flux[ix_dla_range]  #[cut]
    ab = np.max(flux_analyse)
    matrix_lam = np.array(lam_analyse)
    matrix_flux = np.array(flux_analyse)
    #classifier=pred[sightline.id]['pred']
    #conf=pred[sightline.id]['conf']
    lya = []
    lya_preds = []
    wvcen = []
    central_wave = []
    col_density = []
    col_d = []
    for dla in sightline.dlas:
        zabs = (dla.central_wavelength) / 1215.67 - 1
        NHI = dla.col_density
        lya.append(get_dla(zabs, NHI, matrix_lam, matrix_flux, wvoff=60.))
        #(lyawavelength_1,lyaflux_1)=get_dla(zabs_1,NHI_1,matrix_lam,matrix_flux,wvoff=60.)
        wvcen.append(dla.central_wavelength)
        col_density.append(NHI)
    for pred_ab in pred_abs:
        z = pred_ab['spectrum'] / 1215.67 - 1
        nhi = pred_ab['column_density']
        lya_preds.append(get_dla(z, nhi, matrix_lam, matrix_flux, wvoff=60.))
        central_wave.append(pred_ab['spectrum'])
        col_d.append(nhi)

    plt.rcParams['figure.figsize'] = (12.0, 6.0)

    plt.plot(lam_analyse, flux_analyse, color='black')
    #plt.legend(bbox_to_anchor=(0.88,1.02,10,20), loc=3,ncol=1, mode=None, borderaxespad=0,fontsize=18)

    for lyaabs in lya:
        plt.plot(lyaabs[0], lyaabs[1], color='blue', label='real dla')
    for lya_pred in lya_preds:
        plt.plot(lya_pred[0], lya_pred[1], color='red', label='pred dla')
    plt.legend(bbox_to_anchor=(0.88, 1.02, 10, 20),
               loc=3,
               ncol=1,
               mode=None,
               borderaxespad=0,
               fontsize=18)
    plt.axvline(x=(sightline.z_qso + 1) * 1215.67,
                ls="-",
                c='yellow',
                linewidth=3)
    #plt.text((sightline.z_qso+1)*1215.67+10,ab,'lya_emission',fontsize=12,color='blue')
    plt.xlim([3800, 1250 * (1 + sightline.z_qso)])
    for ii in range(0, len(wvcen)):
        plt.axvline(x=wvcen[ii], ls="-", c="blue", linewidth=2)
        plt.text(wvcen[ii] + 5,
                 ab - 1,
                 'GT:' + '%.2f' % (wvcen[ii]),
                 fontsize=18,
                 color='blue')
        plt.text(wvcen[ii] + 5,
                 ab,
                 'log${N_{\mathregular{HI}}}$' + '=%.2f' % (col_density[ii]),
                 fontsize=18,
                 color='blue')
    for jj in range(0, len(central_wave)):
        plt.axvline(x=central_wave[jj], ls="-", c="red", linewidth=2)
        plt.text(central_wave[jj] + 10,
                 ab - 3,
                 'GT:' + '%.2f' % (central_wave[jj]),
                 fontsize=18,
                 color='red')
        plt.text(central_wave[jj] + 10,
                 ab - 2,
                 'log${N_{\mathregular{HI}}}$' + '=%.2f' % (col_d[jj]),
                 fontsize=18,
                 color='red')
    plt.ylabel('Relative Flux', fontsize=20)
    plt.xlabel('Wavelength' + '[' + '$\AA$' + ']', fontsize=20)
    plt.title('spec-%s snr-%s' % (sightline.id, sightline.s2n),
              fontdict=None,
              loc='center',
              pad='20',
              fontsize=30,
              color='blue')

    #plt.savefig('/Users/zjq/sightlines/717/low/%s.png'%(sightline.id))
    plt.show()
Exemple #7
0
def label_sightline(sightline, kernel=kernel, REST_RANGE=REST_RANGE, pos_sample_kernel_percent=0.3):
    """
    Add labels to input sightline based on the DLAs along that sightline

    Parameters
    ----------
    sightline: dla_cnn.data_model.Sightline
    pos_sample_kernel_percent: float
    kernel: int
    REST_RANGE: list

    Returns
    -------
    classification: np.ndarray
        is 1 / 0 / -1 for DLA/nonDLA/border
    offsets_array: np.ndarray
        offset
    column_density: np.ndarray

    """
    lam, lam_rest, ix_dla_range = get_lam_data(sightline.loglam, sightline.z_qso, REST_RANGE)
    samplerangepx = int(kernel*pos_sample_kernel_percent/2) #60
    #kernelrangepx = int(kernel/2) #200
    ix_dlas=[]
    coldensity_dlas=[]
    for dla in sightline.dlas:
        if (912<(dla.central_wavelength/(1+sightline.z_qso))<1220)&(dla.central_wavelength>=3700):
            ix_dlas.append(np.abs(lam[ix_dla_range]-dla.central_wavelength).argmin()) 
            coldensity_dlas.append(dla.col_density)    # column densities matching ix_dlas

    '''
    # FLUXES - Produce a 1748x400 matrix of flux values
    fluxes_matrix = np.vstack(map(lambda f,r:f[r-kernelrangepx:r+kernelrangepx],
                                  zip(itertools.repeat(sightline.flux), np.nonzero(ix_dla_range)[0])))
    '''

    # CLASSIFICATION (1 = positive sample, 0 = negative sample, -1 = border sample not used
    # Start with all samples zero
    classification = np.zeros((np.sum(ix_dla_range)), dtype=np.float32)
    # overlay samples that are too close to a known DLA, write these for all DLAs before overlaying positive sample 1's
    for ix_dla in ix_dlas:
        classification[ix_dla-samplerangepx*2:ix_dla+samplerangepx*2+1] = -1
        # Mark out Ly-B areas
        lyb_ix = sightline.get_lyb_index(ix_dla)
        classification[lyb_ix-samplerangepx:lyb_ix+samplerangepx+1] = -1
    # mark out bad samples from custom defined markers
    #for marker in sightline.data_markers:
        #assert marker.marker_type == Marker.IGNORE_FEATURE              # we assume there are no other marker types for now
        #ixloc = np.abs(lam_rest - marker.lam_rest_location).argmin()
        #classification[ixloc-samplerangepx:ixloc+samplerangepx+1] = -1
    # overlay samples that are positive
    for ix_dla in ix_dlas:
        classification[ix_dla-samplerangepx:ix_dla+samplerangepx+1] = 1

    # OFFSETS & COLUMN DENSITY
    offsets_array = np.full([np.sum(ix_dla_range)], np.nan, dtype=np.float32)     # Start all NaN markers
    column_density = np.full([np.sum(ix_dla_range)], np.nan, dtype=np.float32)
    # Add DLAs, this loop will work from the DLA outward updating the offset values and not update it
    # if it would overwrite something set by another nearby DLA
    for i in range(int(samplerangepx+1)):
        for ix_dla,j in zip(ix_dlas,range(len(ix_dlas))):
            offsets_array[ix_dla+i] = -i if np.isnan(offsets_array[ix_dla+i]) else offsets_array[ix_dla+i]
            offsets_array[ix_dla-i] =  i if np.isnan(offsets_array[ix_dla-i]) else offsets_array[ix_dla-i]
            column_density[ix_dla+i] = coldensity_dlas[j] if np.isnan(column_density[ix_dla+i]) else column_density[ix_dla+i]
            column_density[ix_dla-i] = coldensity_dlas[j] if np.isnan(column_density[ix_dla-i]) else column_density[ix_dla-i]
    offsets_array = np.nan_to_num(offsets_array)
    column_density = np.nan_to_num(column_density)

    # Append these to the Sightline
    sightline.classification = classification
    sightline.offsets = offsets_array
    sightline.column_density = column_density

    # classification is 1 / 0 / -1 for DLA/nonDLA/border
    # offsets_array is offset
    return classification, offsets_array, column_density