def is_lyb(self, peakix): """ Returns true if the given peakix (from peaks_ixs) is the ly-b of another DLA in the set peaks_ixs in prediction :param peakix: :return: boolean """ assert self.prediction is not None and peakix in self.prediction.peaks_ixs lam, lam_rest, ix_dla_range = get_lam_data(self.loglam, self.z_qso) kernelrangepx = 200 cut=((np.nonzero(ix_dla_range)[0])>=kernelrangepx)&((np.nonzero(ix_dla_range)[0])<=(len(lam)-kernelrangepx-1)) lam_analyse=lam[ix_dla_range][cut] lambda_higher = (lam_analyse[peakix]) / (1025.722/1215.67)#找这个peak对应的dla # An array of how close each peak is to beign the ly-b of peakix in spectrum reference frame peak_difference_spectrum = np.abs(lam_analyse[self.prediction.peaks_ixs] - lambda_higher) nearest_peak_ix = np.argmin(peak_difference_spectrum)#找距离这个dla最近的peak # get the column density of the identfied nearest peak算这两个的nhi _, potential_lya_nhi, _, _ = \ self.prediction.get_coldensity_for_peak(self.prediction.peaks_ixs[nearest_peak_ix]) _, potential_lyb_nhi, _, _ = \ self.prediction.get_coldensity_for_peak(peakix) # Validations: check that the nearest peak is close enough to match # sanity check that the LyB is at least 0.3 less than the DLA is_nearest_peak_within_range = peak_difference_spectrum[nearest_peak_ix] <= 15#两者距离小于15 is_nearest_peak_larger_coldensity = potential_lyb_nhi < potential_lya_nhi - 0.3#nhi差距0.3以上? return is_nearest_peak_within_range and is_nearest_peak_larger_coldensity#true为lyb,false为lya
def split_sightline_into_samples(sightline, REST_RANGE=REST_RANGE, kernel=kernel, v=best_v['all']): """ Split the sightline into a series of snippets, each with length kernel Parameters ---------- sightline: dla_cnn.data_model.Sightline REST_RANGE: list kernel: int, optional Returns ------- """ lam, lam_rest, ix_dla_range = get_lam_data(sightline.loglam, sightline.z_qso, REST_RANGE) kernelrangepx = int(kernel/2) #200 #samplerangepx = int(kernel*pos_sample_kernel_percent/2) #60 #padding the sightline: flux_padded,lam_padded,pixel_num_left=pad_sightline(sightline,lam,lam_rest,ix_dla_range,kernelrangepx,v=v) #ix_dlas = [(np.abs(lam[ix_dla_range]-dla.central_wavelength).argmin()) for dla in sightline.dlas] #coldensity_dlas = [dla.col_density for dla in sightline.dlas] # column densities matching ix_dlas # FLUXES - Produce a 1748x400 matrix of flux values #fluxes_matrix = np.vstack(map(lambda x:x[0][x[1]-kernelrangepx:x[1]+kernelrangepx],zip(itertools.repeat(sightline.flux), np.nonzero(ix_dla_range)[0]))) fluxes_matrix = np.vstack(map(lambda x:x[0][x[1]-kernelrangepx:x[1]+kernelrangepx],zip(itertools.repeat(flux_padded), np.nonzero(ix_dla_range)[0]+pixel_num_left))) lam_matrix = np.vstack(map(lambda x:x[0][x[1]-kernelrangepx:x[1]+kernelrangepx],zip(itertools.repeat(lam_padded), np.nonzero(ix_dla_range)[0]+pixel_num_left))) #using cut will lose side information,so we use padding instead of cutting #fluxes_matrix = np.vstack(map(lambda x:x[0][x[1]-kernelrangepx:x[1]+kernelrangepx],zip(itertools.repeat(sightline.flux), np.nonzero(ix_dla_range)[0][cut]))) #lam_matrix = np.vstack(map(lambda x:x[0][x[1]-kernelrangepx:x[1]+kernelrangepx],zip(itertools.repeat(lam), np.nonzero(ix_dla_range)[0][cut]))) #the wavelength and flux array we input: input_lam=lam_padded[np.nonzero(ix_dla_range)[0]+pixel_num_left] input_flux=flux_padded[np.nonzero(ix_dla_range)[0]+pixel_num_left] # Return return fluxes_matrix, sightline.classification, sightline.offsets, sightline.column_density,lam_matrix,input_lam,input_flux
def analyze_pred(sightline, pred, conf, offset, coldensity, PEAK_THRESH): for i in range(0, len(pred)): #exclude offset when pred=0 if (pred[i] == 0): offset[i] = 0 sightline.prediction = Prediction(loc_pred=pred, loc_conf=conf, offsets=offset, density_data=coldensity) # get prediction for each sightline compute_peaks(sightline, PEAK_THRESH) sightline.prediction.smoothed_loc_conf() lam, lam_rest, ix_dla_range = get_lam_data(sightline.loglam, sightline.z_qso) kernelrangepx = 200 cut = ((np.nonzero(ix_dla_range)[0]) >= kernelrangepx) & ( (np.nonzero(ix_dla_range)[0]) <= (len(lam) - kernelrangepx - 1)) #get input lam array lam_analyse = lam[ix_dla_range][cut] dla_sub_lyb = [] for peak in sightline.prediction.peaks_ixs: peak_lam_rest = lam_rest[ix_dla_range][cut][peak] peak_lam_spectrum = lam_analyse[peak] z_dla = float(peak_lam_spectrum) / 1215.67 - 1 _, mean_col_density_prediction, std_col_density_prediction, bias_correction = sightline.prediction.get_coldensity_for_peak( peak) absorber_type = "DLA" if mean_col_density_prediction >= 20.3 else "LYB" if sightline.is_lyb( peak) else "SUBDLA" abs_dict = { 'rest': float(peak_lam_rest), 'spectrum': float(peak_lam_spectrum), 'z_dla': float(z_dla), 'dla_confidence': min(1.0, float(sightline.prediction.offset_conv_sum[peak])), 'column_density': float(mean_col_density_prediction), 'std_column_density': float(std_col_density_prediction), 'column_density_bias_adjust': float(bias_correction), 'type': absorber_type } dla_sub_lyb.append(abs_dict) return dla_sub_lyb
def select_samples_50p_pos_neg(sightline, kernel=kernel): """ For a given sightline, generate the indices for DLAs and for without Split 50/50 to have equal representation Parameters ---------- classification: np.ndarray Array of classification values. 1=DLA; 0=Not; -1=not analyzed Returns ------- idx: np.ndarray positive + negative indices """ #classification = data[1] lam, lam_rest, ix_dla_range = get_lam_data(sightline.loglam, sightline.z_qso) kernelrangepx = int(kernel / 2) cut = ((np.nonzero(ix_dla_range)[0]) >= kernelrangepx) & ( (np.nonzero(ix_dla_range)[0]) <= (len(lam) - kernelrangepx - 1)) newclassification = sightline.classification[cut] num_pos = np.sum(newclassification == 1, dtype=np.float64) num_neg = np.sum(newclassification == 0, dtype=np.float64) n_samples = int(min(num_pos, num_neg)) r = np.random.permutation(len(newclassification)) pos_ixs = r[newclassification[r] == 1][0:n_samples] neg_ixs = r[newclassification[r] == 0][0:n_samples] # num_total = data[0].shape[0] # ratio_neg = num_pos / num_neg # pos_mask = classification == 1 # Take all positive samples # neg_ixs_by_ratio = np.linspace(1,num_total-1,round(ratio_neg*num_total), dtype=np.int32) # get all samples by ratio # neg_mask = np.zeros((num_total),dtype=np.bool) # create a 0 vector of negative samples # neg_mask[neg_ixs_by_ratio] = True # set the vector to positives, selecting for the appropriate ratio across the whole sightline # neg_mask[pos_mask] = False # remove previously positive samples from the set # neg_mask[classification == -1] = False # remove border samples from the set, what remains is still in the right ratio # return pos_mask | neg_mask #return np.hstack((pos_ixs,neg_ixs)) return np.hstack(pos_ixs)
def split_sightline_into_samples(sightline, REST_RANGE=REST_RANGE, kernel=kernel): """ Split the sightline into a series of snippets, each with length kernel Parameters ---------- sightline: dla_cnn.data_model.Sightline REST_RANGE: list kernel: int, optional Returns ------- """ lam, lam_rest, ix_dla_range = get_lam_data(sightline.loglam, sightline.z_qso, REST_RANGE) kernelrangepx = int(kernel / 2) #200 #samplerangepx = int(kernel*pos_sample_kernel_percent/2) #consider boundaries cut = ((np.nonzero(ix_dla_range)[0]) >= kernelrangepx) & ( (np.nonzero(ix_dla_range)[0]) <= (len(lam) - kernelrangepx - 1)) #ix_dlas = [(np.abs(lam[ix_dla_range]-dla.central_wavelength).argmin()) for dla in sightline.dlas] #coldensity_dlas = [dla.col_density for dla in sightline.dlas] # column densities matching ix_dlas # FLUXES - Produce a 400 matrix of flux values fluxes_matrix = np.vstack( map( lambda x: x[0][x[1] - kernelrangepx:x[1] + kernelrangepx], zip(itertools.repeat(sightline.flux), np.nonzero(ix_dla_range)[0][cut]))) lam_matrix = np.vstack( map(lambda x: x[0][x[1] - kernelrangepx:x[1] + kernelrangepx], zip(itertools.repeat(lam), np.nonzero(ix_dla_range)[0][cut]))) # Return return fluxes_matrix, sightline.classification[cut], sightline.offsets[ cut], sightline.column_density[cut], lam_matrix
def draw_sightline(sightline, pred, pred_abs): lam, lam_rest, ix_dla_range = get_lam_data(sightline.loglam, sightline.z_qso) kernelrangepx = 200 #cut=((np.nonzero(ix_dla_range)[0])>=kernelrangepx)&((np.nonzero(ix_dla_range)[0])<=(len(lam)-kernelrangepx-1)) lam_analyse = lam[ix_dla_range] #[cut] flux_analyse = sightline.flux[ix_dla_range] #[cut] ab = np.max(flux_analyse) matrix_lam = np.array(lam_analyse) matrix_flux = np.array(flux_analyse) #classifier=pred[sightline.id]['pred'] #conf=pred[sightline.id]['conf'] lya = [] lya_preds = [] wvcen = [] central_wave = [] col_density = [] col_d = [] for dla in sightline.dlas: zabs = (dla.central_wavelength) / 1215.67 - 1 NHI = dla.col_density lya.append(get_dla(zabs, NHI, matrix_lam, matrix_flux, wvoff=60.)) #(lyawavelength_1,lyaflux_1)=get_dla(zabs_1,NHI_1,matrix_lam,matrix_flux,wvoff=60.) wvcen.append(dla.central_wavelength) col_density.append(NHI) for pred_ab in pred_abs: z = pred_ab['spectrum'] / 1215.67 - 1 nhi = pred_ab['column_density'] lya_preds.append(get_dla(z, nhi, matrix_lam, matrix_flux, wvoff=60.)) central_wave.append(pred_ab['spectrum']) col_d.append(nhi) plt.rcParams['figure.figsize'] = (12.0, 6.0) plt.plot(lam_analyse, flux_analyse, color='black') #plt.legend(bbox_to_anchor=(0.88,1.02,10,20), loc=3,ncol=1, mode=None, borderaxespad=0,fontsize=18) for lyaabs in lya: plt.plot(lyaabs[0], lyaabs[1], color='blue', label='real dla') for lya_pred in lya_preds: plt.plot(lya_pred[0], lya_pred[1], color='red', label='pred dla') plt.legend(bbox_to_anchor=(0.88, 1.02, 10, 20), loc=3, ncol=1, mode=None, borderaxespad=0, fontsize=18) plt.axvline(x=(sightline.z_qso + 1) * 1215.67, ls="-", c='yellow', linewidth=3) #plt.text((sightline.z_qso+1)*1215.67+10,ab,'lya_emission',fontsize=12,color='blue') plt.xlim([3800, 1250 * (1 + sightline.z_qso)]) for ii in range(0, len(wvcen)): plt.axvline(x=wvcen[ii], ls="-", c="blue", linewidth=2) plt.text(wvcen[ii] + 5, ab - 1, 'GT:' + '%.2f' % (wvcen[ii]), fontsize=18, color='blue') plt.text(wvcen[ii] + 5, ab, 'log${N_{\mathregular{HI}}}$' + '=%.2f' % (col_density[ii]), fontsize=18, color='blue') for jj in range(0, len(central_wave)): plt.axvline(x=central_wave[jj], ls="-", c="red", linewidth=2) plt.text(central_wave[jj] + 10, ab - 3, 'GT:' + '%.2f' % (central_wave[jj]), fontsize=18, color='red') plt.text(central_wave[jj] + 10, ab - 2, 'log${N_{\mathregular{HI}}}$' + '=%.2f' % (col_d[jj]), fontsize=18, color='red') plt.ylabel('Relative Flux', fontsize=20) plt.xlabel('Wavelength' + '[' + '$\AA$' + ']', fontsize=20) plt.title('spec-%s snr-%s' % (sightline.id, sightline.s2n), fontdict=None, loc='center', pad='20', fontsize=30, color='blue') #plt.savefig('/Users/zjq/sightlines/717/low/%s.png'%(sightline.id)) plt.show()
def label_sightline(sightline, kernel=kernel, REST_RANGE=REST_RANGE, pos_sample_kernel_percent=0.3): """ Add labels to input sightline based on the DLAs along that sightline Parameters ---------- sightline: dla_cnn.data_model.Sightline pos_sample_kernel_percent: float kernel: int REST_RANGE: list Returns ------- classification: np.ndarray is 1 / 0 / -1 for DLA/nonDLA/border offsets_array: np.ndarray offset column_density: np.ndarray """ lam, lam_rest, ix_dla_range = get_lam_data(sightline.loglam, sightline.z_qso, REST_RANGE) samplerangepx = int(kernel*pos_sample_kernel_percent/2) #60 #kernelrangepx = int(kernel/2) #200 ix_dlas=[] coldensity_dlas=[] for dla in sightline.dlas: if (912<(dla.central_wavelength/(1+sightline.z_qso))<1220)&(dla.central_wavelength>=3700): ix_dlas.append(np.abs(lam[ix_dla_range]-dla.central_wavelength).argmin()) coldensity_dlas.append(dla.col_density) # column densities matching ix_dlas ''' # FLUXES - Produce a 1748x400 matrix of flux values fluxes_matrix = np.vstack(map(lambda f,r:f[r-kernelrangepx:r+kernelrangepx], zip(itertools.repeat(sightline.flux), np.nonzero(ix_dla_range)[0]))) ''' # CLASSIFICATION (1 = positive sample, 0 = negative sample, -1 = border sample not used # Start with all samples zero classification = np.zeros((np.sum(ix_dla_range)), dtype=np.float32) # overlay samples that are too close to a known DLA, write these for all DLAs before overlaying positive sample 1's for ix_dla in ix_dlas: classification[ix_dla-samplerangepx*2:ix_dla+samplerangepx*2+1] = -1 # Mark out Ly-B areas lyb_ix = sightline.get_lyb_index(ix_dla) classification[lyb_ix-samplerangepx:lyb_ix+samplerangepx+1] = -1 # mark out bad samples from custom defined markers #for marker in sightline.data_markers: #assert marker.marker_type == Marker.IGNORE_FEATURE # we assume there are no other marker types for now #ixloc = np.abs(lam_rest - marker.lam_rest_location).argmin() #classification[ixloc-samplerangepx:ixloc+samplerangepx+1] = -1 # overlay samples that are positive for ix_dla in ix_dlas: classification[ix_dla-samplerangepx:ix_dla+samplerangepx+1] = 1 # OFFSETS & COLUMN DENSITY offsets_array = np.full([np.sum(ix_dla_range)], np.nan, dtype=np.float32) # Start all NaN markers column_density = np.full([np.sum(ix_dla_range)], np.nan, dtype=np.float32) # Add DLAs, this loop will work from the DLA outward updating the offset values and not update it # if it would overwrite something set by another nearby DLA for i in range(int(samplerangepx+1)): for ix_dla,j in zip(ix_dlas,range(len(ix_dlas))): offsets_array[ix_dla+i] = -i if np.isnan(offsets_array[ix_dla+i]) else offsets_array[ix_dla+i] offsets_array[ix_dla-i] = i if np.isnan(offsets_array[ix_dla-i]) else offsets_array[ix_dla-i] column_density[ix_dla+i] = coldensity_dlas[j] if np.isnan(column_density[ix_dla+i]) else column_density[ix_dla+i] column_density[ix_dla-i] = coldensity_dlas[j] if np.isnan(column_density[ix_dla-i]) else column_density[ix_dla-i] offsets_array = np.nan_to_num(offsets_array) column_density = np.nan_to_num(column_density) # Append these to the Sightline sightline.classification = classification sightline.offsets = offsets_array sightline.column_density = column_density # classification is 1 / 0 / -1 for DLA/nonDLA/border # offsets_array is offset return classification, offsets_array, column_density