def data(clustering1, clustering2, survey_data, path_1, path_2, plts): clust1 = Table.read(path_1 + '\\' + clustering1, format='ascii.commented_header', guess=False) clust2 = Table.read(path_2 + '\\' + clustering2, format='ascii.commented_header', guess=False) clust1_data = join(clust1, survey_data, 'object_id') clust2_data = join(clust2, survey_data, 'object_id') clust_labels = join(clust1, clust2, 'object_id') return(clust_labels, clust1_data, clust2_data)
def plotting(dimensions, model_name, f_path, s_path): model_file = 'C:\\Users\\Alex\\Documents\\GitHub\\m83_clustering\\model_colours\\{}'.format(model_name) survey_data = Table.read('data_v3.txt', format='ascii.commented_header', guess=False) # survey_data = survey_data[::5] save_path = 'C:\\Users\\Alex\\Documents\\GitHub\\m83_clustering\\{}\\'.format(s_path) model = Table.read(model_file, format='ascii.commented_header', guess=False) if '2d' in dimensions: # Check if 2D or 3D plots = Table.read('plots.txt', format='ascii.commented_header', guess=False) # Load bands to plot for i in range(0, len(plots)): # Loop over all combos waves = plots[i] colours = ('{}-{}_{}-{}').format(waves[0], waves[1], waves[2], waves[3]) mod_filt_2d = np.vstack([model[waves[0]], model[waves[1]], model[waves[2]], model[waves[3]]]) n_clusters = plots['n_clust'][i] # Find the number of clusters algorithm = plots['clustering'][i] # Which algorithm # Load id file generated from clustering_2d.py id_file_2d, centers_2d = load_files(n_clusters, algorithm, waves, dimensions, colours, f_path) cluster_data_2d = join(id_file_2d, survey_data, 'object_id') colour1_2d, colour2_2d, wave_2, wave_4 = load_2d_cluster_data(cluster_data_2d, waves) make_2d_plots(colour1_2d, colour2_2d, waves, n_clusters, algorithm, cluster_data_2d, centers_2d, save_path, wave_2, wave_4, mod_filt_2d) if '3d' in dimensions: plots_3d = Table.read('plots_3d.txt', format='ascii.commented_header', guess=False) for j in range(0, len(plots_3d)): waves = plots_3d[j] colours = ('{}-{}_{}-{}_{}-{}').format(waves[0], waves[1], waves[2], waves[3], waves[4], waves[5]) mod_filt_3d = np.vstack([model[waves[0]], model[waves[1]], model[waves[2]], model[waves[3]], model[waves[4]], model[waves[5]]]) n_clusters = plots_3d['n_clust'][j] algorithm = plots_3d['clustering'][j] id_file_3d, centers_3d = load_files(n_clusters, algorithm, waves, dimensions, colours, f_path) cluster_data_3d = join(id_file_3d, survey_data, 'object_id') colour1, colour2, colour3, base, wave_base1, wave_base2 = load_3d_cluster_data(cluster_data_3d, waves) make_3d_plots(colour1, colour2, colour3, waves, n_clusters, algorithm, cluster_data_3d, centers_3d, save_path, base, wave_base1, wave_base2, mod_filt_3d) return()
def doall(write_fits=False, write_latex=False, make_mega_table=True): # read in the data tab_eqw = get_linelists('eqw_filenames.dat',suffix='EQW2.dat',skipr=0, wave_lab=pah_wave_lab) tab_pah = get_linelists('PAHfilenames.dat', suffix='PAH.dat',skipr=1,wave_lab=pah_wave_lab) tab_atm = get_linelists('Atomiclines_fnames',suffix='_ato_Line.dat', skipr=1,wave_lab=atomic_wave_lab) # apply conversion factor, compute PAH complexes, fixup upper limits/missing data, add eqw/flx to names cf = 35.26 * 1e6 # conversion factor for fluxes: 35.26 * 1e6 goes from W/m^2/sr to 1e-15 W/m^2, assuming 1500arcsec^2 extraction area. fwms = 1.0e-15* u.W/(u.m*u.m) tab_atm_new = convert_linelist(tab_atm, conv_factor = cf, complex_list={}, add_upper_lim=True, colunit=fwms, sn_limit=master_sn) tab_pah_new = convert_linelist(tab_pah, conv_factor = cf, complex_list=pah_complex_list, add_upper_lim=False, colunit=fwms, sn_limit=master_sn, suffix='flx') tab_eqw_new = convert_linelist(tab_eqw, conv_factor = 1.0, complex_list=pah_complex_list, add_upper_lim=False, colunit=u.micron, sn_limit=master_sn,suffix='eqw') #normalize each PAH feature by average over all objects tab_eqw_norm = norm_pah(tab_eqw_new, unc_wt = True, startcol=2) # add identifiers to tables tab_atm_new = add_pub_id(tab_atm_new, "id_map") tab_atm_new.rename_column('Filename','Filename_atm') tab_eqw_new = add_pub_id(tab_eqw_new, "id_map") tab_pah_new = add_pub_id(tab_pah_new, "id_map") tab_eqw_norm = add_pub_id(tab_eqw_norm, "id_map") if write_fits: # write to FITS tables wms = u.W/(u.m*u.m) # have to undo unit conversion as FITS can't deal with this tab_atm_new2 = convert_linelist(tab_atm_new, conv_factor = 1e-15, complex_list={}, add_upper_lim=False, colunit=wms, sn_limit =-1.0, startcol=3) tab_atm_new2.write('m31_atomic.fits', format='fits', overwrite=True) tab_pah_new2 = convert_linelist(tab_pah_new, conv_factor = 1e-15, complex_list={}, add_upper_lim=False, colunit=wms, sn_limit = 0.001, startcol=3) tab_pah_new2.write('m31_pah_str.fits', format='fits', overwrite=True) tab_eqw_new.write('m31_pah_eqw.fits', format='fits', overwrite=True) tab_eqw_norm.write('m31_pah_eqw_norm.fits', format='fits', overwrite=True) if write_latex: # write to Latex tables make_latex_table_rows(tab_atm_new, col_list = atm_cols, outfile = 'm31_atomic_new.tex') make_latex_table_rows(tab_eqw_new, col_list = pah_cols, outfile = 'm31_pah_eqw_new.tex', col_sfx = 'eqw', col_sfx_start=1) make_latex_table_rows(tab_pah_new, col_list = pah_cols, outfile = 'm31_pah_str_new.tex', col_sfx= 'flx', col_sfx_start=1) make_latex_table_rows(tab_eqw_norm, col_list = pah_cols, outfile = 'm31_pah_norm_new.tex', col_sfx='eqw_norm', col_sfx_start=1) # UNTESTED if make_mega_table: # join into a big table if not write_fits: wms = u.W/(u.m*u.m) # have to undo unit conversion as FITS can't deal with this tab_atm_new2 = convert_linelist(tab_atm_new, conv_factor = 1e-15, complex_list={}, add_upper_lim=False, colunit=wms, sn_limit =-1.0, startcol=3) tab_pah_new2 = convert_linelist(tab_pah_new, conv_factor = 1e-15, complex_list={}, add_upper_lim=False, colunit=wms, sn_limit = 0.001, startcol=3) # decided not to include normalized PAH values here # big_tab2 = join(tab_eqw_new, tab_eqw_norm, keys=['ID', 'Pub_ID','Filename']) # big_tab = join(big_tab1, big_tab2, keys=['ID', 'Pub_ID'], table_names = ['flx','eqw'] ) infotab = Table.read('table1.fits') # table with RAs & decs, etc big_tab1 = join(infotab,tab_eqw_new, keys=['Pub_ID']) big_tab2 = join(tab_pah_new2, tab_atm_new2, keys=['ID', 'Pub_ID']) #, table_names = ['PAHflx','atm']) big_tab = join(big_tab1, big_tab2, keys=['ID', 'Pub_ID']) #, table_names = ['flx','eqw'] ) return big_tab else: return
def process_comparison_data(): # add RHI to Gordon data grd_rhi = add_rhi('gordon_atomic.dat') grd_eqw = Table.read('GordonEQW',format='ascii.commented_header') # join atomic and EQW files gordon_full = join(grd_eqw, grd_rhi,keys='ID') gordon_full.write('gordon_m101.dat', format='ascii.commented_header') # add RHI to Engelbracht data eng_in = add_rhi('englbrt.dat') eng_met = Table.read('englbrt_eqw_oxy',format='ascii.commented_header') eng_out = join(eng_in, eng_met, keys=['ID','PAH8eqw','PAH8eqw_unc']) eng_out.write('englbrt_sb.dat',format='ascii.commented_header') return
def initial_catalog(leda, twomass, edd, kknearby): """ `matchtolerance` is how close an NSA object needs to be to be counted as having that PGC#. `removeduplicatepgcson` something needs to be to be included in the """ #first join the small ones, because they both have "dist" columns small = table.join(edd, kknearby, keys=['pgc'], table_names=['edd', 'kk'], join_type='outer') #now join them with LEDA #we call the second one "kk" because the only thing shared with LEDA is 'm21' on the KK catalog ledaj = table.join(leda, small, keys=['pgc'], table_names=['leda', 'kk'], join_type='outer') #add in the 2mass stuff #call the first one "eddkk" because the shared columns are all either in the EDD or KK return table.join(ledaj, twomass, keys=['pgc'], table_names=['eddkk', '2mass'], join_type='outer')
def match_ids(reference_data,match_data,reference_column='id',match_column='id'): ''' ---Find the matches between 2 sets of IDs points--- Inputs: ------- reference_data: usually the catlogue we wish to match to (eg. galaxies in GZ). match_data: usually a subsidiary dataset, eg. detections in AFALFA, WISE, ... reference_column, match_column: names of the columns that contain the IDs (eg. DR7 ids). Outputs: -------- ids: 3 column catalogue of 'match index', 'reference index' and 'id'. ''' reference_indices = np.arange(len(reference_data)) match_indices = np.arange(len(match_data)) reference_table = Table(np.array([reference_indices,reference_data[reference_column]]).T, names=('reference_index','id')) match_table = Table(np.array([match_indices,match_data[match_column]]).T, names=('match_index','id')) ids = join(reference_table, match_table, keys='id') print('{} galaxies in the reference catalogue'.format(len(reference_data))) print('{} galaxies in the match catalogue'.format(len(match_data))) print('---> {} matches in total'.format(len(ids))) return ids
def load_gain_data(gfile,obsDb): gdat = Table(dict(np.load(gfile)),masked=True) gdat.rename_column('files','fileName') gdat = join(gdat,obsDb['fileName','filter','utDate'],'fileName') for c in ['gains','rawAmpGain','rawCcdGain']: gdat[c][gdat[c]==0] = np.ma.masked return gdat
def fit_dlogM_mw(tab, sfrsd_tab, mltype='ring', mlb='i'): merge_tab = t.join(tab, sfrsd_tab, 'plateifu') is_agn = m.mask_from_maskbits(merge_tab['mngtarg3'], [1, 2, 3, 4]) mlb_ix = totalmass.StellarMass.bands_ixs[mlb] absmag_sun_mlb = totalmass.StellarMass.absmag_sun[mlb_ix] logmass_in_ifu = merge_tab['mass_in_ifu'].to(u.dex(u.Msun)) logmass_in_ifu_lw = merge_tab['ml_fluxwt'] + merge_tab['ifu_absmag'][:, mlb_ix].to( u.dex(m.bandpass_sol_l_unit), totalmass.bandpass_flux_to_solarunits(absmag_sun_mlb)) merge_tab['dlogmass_lw'] = logmass_in_ifu - logmass_in_ifu_lw ha_corr = np.exp(merge_tab['mean_atten_mwtd'] * (6563 / 5500)**-1.3) sfrsd = merge_tab['sigma_sfr'] * ha_corr * u.Msun / u.yr / u.pc**2 mass_pca = merge_tab['mass_in_ifu'] + merge_tab['outer_mass_{}'.format(mltype)] ssfrsd = sfrsd / mass_pca merge_tab['log_ssfrsd'] = ssfrsd.to(u.dex(ssfrsd.unit)) merge_tab['log_ssfrsd'][~np.isfinite(merge_tab['log_ssfrsd'])] = np.nan * merge_tab['log_ssfrsd'].unit ols = OLS( endog=np.array(merge_tab['dlogmass_lw'][~is_agn]), exog=sm_add_constant( t.Table(merge_tab['mean_atten_mwtd', 'std_atten_mwtd', 'log_ssfrsd'])[~is_agn].to_pandas(), prepend=False), hasconst=True, missing='drop') olsfit = ols.fit() return olsfit
def var_frac(line, k): """fraction of objects with variablity measurements (i.e., have different BIs as measured in Filiz Ak et al. 2014) """ data= Table.read('myBALCat_xtra.csv') c= Table.read("./clusters/3features/"+line+str(k)+"clstrs.fits") t= join(data, c, keys='SDSSName') clstrs_ls=[] for o in range(k): clstrs_ls.append([o ,len(t[t['label'] ==o]),\ mean(t['Vmin'][t['label'] ==o]),\ mean(t['Vmax'][t['label'] ==o]), \ mean(t['EW'][t['label'] ==o])]) oc= sorted(clstrs_ls, key= itemgetter(2)) #print oc for x in oc: l= x[0] print "N= "+str(len(t[t['label'] ==l]))+", "+ \ "N(var)= "+str(len(t[(t['label'] ==l) & (abs(t['BI1']-t['BI2']) >0)]))+"= " \ +str(len(t[(t['label'] ==l) & (abs(t['BI1']-t['BI2']) >0)])*100./len(t[t['label']==l]))+"%" return
def filtered_selection(self,obsDb,ii): # filter out bright sky values t = join(obsDb[ii],self.sky,'frameIndex') if len(t) < len(ii): raise ValueError('missing files!') # badfield = np.in1d(obsDb['objName'][ii], ['rm10','rm11','rm12','rm13']) badfield |= obsDb['fileName'][ii] == 'bokrm.20150405.0059' fvar = t['skyRms']**2/t['skyMean'] badrms = sigma_clip(fvar,sigma=3.0,iters=2).mask keep = (t['skyMean'] < self.maxCounts) & ~badrms & ~badfield if keep.sum() < self.minNImg: return ~badrms & ~badfield # grpNum = 0 pgrp = [grpNum] for _i in range(1,len(ii)): if obsDb['objName'][ii[_i]] != obsDb['objName'][ii[_i-1]]: grpNum += 1 pgrp.append(grpNum) pgrp = np.array(pgrp) # jj = np.where(keep)[0] keep[jj] = False jj2 = t['skyMean'][jj].argsort()[:self.maxNImg*2] _,jj3 = np.unique(pgrp[jj[jj2]],return_index=True) jj4 = t['skyMean'][jj[jj2[jj3]]].argsort()[:self.maxNImg] keep[jj[jj2[jj3[jj4]]]] = True return keep
def get_fits_catalog(args, index_table): """Makes catalog containing information about parametric fits to the galaxies. Columns are identical to COSMOS Real Galaxy catalog""" print "Creating fits catalog" all_seg_ids = np.loadtxt(args.seg_list_file, delimiter=" ",dtype='S2') for f, filt in enumerate(args.filter_names): final_table = fits_table() for seg_id in all_seg_ids: file_name = args.main_path + seg_id + '/' + filt + '_with_pstamp.fits' seg_cat = Table.read(file_name, format='fits') q, = np.where(index_table['SEG_ID'] == seg_id) indx_seg = index_table[q] temp = join(seg_cat, indx_seg, keys='NUMBER') temp.rename_column('MAG_AUTO', 'mag_auto') temp.rename_column('FLUX_RADIUS', 'flux_radius') col = Column(temp['stamp_flux'], name='flux') temp.add_column(col) final_table = vstack([final_table,temp], join_type='inner') path = args.main_path + args.out_dir index_table.sort('ORDER') ord_indx = [np.where(i_t==final_table['IDENT'])[0][0] for i_t in index_table['IDENT']] file_name = args.fits_file_name.replace('filter', args.file_filter_name[f]) print "Savings fits file at ", path + file_name final_table[ord_indx].write(path + file_name, format='fits', overwrite=True)
def GenerateRegions(refresh=False,release='all'): if refresh: updateLogs(release=release) updateCatalog(release=release) obs = Table.read('ObservationLog.csv') cat = Table.read('RegionCatalog.csv') # This takes out rows that are empty # This needs to be done twice for some reason... for idx, row in enumerate(cat): if not row['BoxName']: cat.remove_row(idx) for idx, row in enumerate(cat): if not row['BoxName']: cat.remove_row(idx) obs.rename_column('Source','BoxName') joincat = join(obs,cat,keys='BoxName') groupcat = joincat.group_by('Region name') min_values = groupcat.groups.aggregate(np.min) max_values = groupcat.groups.aggregate(np.max) mean_values = groupcat.groups.aggregate(np.mean) vavg = 0.5*(min_values['VLSR'] + max_values['VLSR']) vrange = max_values['VLSR']- min_values['VLSR'] mean_values['VAVG'] = vavg mean_values['VRANGE'] = vrange return(mean_values)
def frac(line, k): """ print %'s of objects from each cluster which has BI0_Alll >0 """ data= Table.read('myBALCat_xtra.csv') c= Table.read("./clusters/3features/"+line+str(k)+"clstrs.fits") t= join(data, c, keys='SDSSName') q= 'BIO_AlIII' # change to BIO_SiIV #q= 'BIO_SiIV' clstrs_ls=[] for o in range(k): clstrs_ls.append([o ,len(t[t['label'] ==o]),\ mean(t['Vmin'][t['label'] ==o]),\ mean(t['Vmax'][t['label'] ==o]), \ mean(t['EW'][t['label'] ==o])]) oc= sorted(clstrs_ls, key= itemgetter(2)) #print oc for x in oc: l= x[0] print "N= "+str(len(t[t['label'] ==l]))+", N("+q+")= "+str(len(t[(t['label'] ==l) & (t[q] >0)]))+ \ " = "+str(len(t[(t['label'] ==l) & (t[q] >0)])*100./len(t[t['label']==l]))+"%" return
def print_efficiency_stats(truth, mtl_initial, zcat): print('Overall efficiency') tmp_init = join(mtl_initial, truth, keys='TARGETID') total = join(zcat, tmp_init, keys='TARGETID') true_types = ['LRG', 'ELG', 'QSO'] zcat_types = ['GALAXY', 'GALAXY', 'QSO'] for true_type, zcat_type in zip(true_types, zcat_types): i_initial = ((tmp_init['DESI_TARGET'] & desi_mask.mask(true_type)) != 0) & (tmp_init['TRUESPECTYPE'] == zcat_type) i_final = ((total['DESI_TARGET'] & desi_mask.mask(true_type)) != 0) & (total['SPECTYPE'] == zcat_type) n_t = 1.0*len(total['TARGETID'][i_final]) n_i = 1.0*len(tmp_init['TARGETID'][i_initial]) print("\t {} fraction : {}".format(true_type, n_t/n_i)) #print("\t TRUE:ZCAT\n\t {}\n".format(Counter(zip(total['DESI_TARGET'], total['TYPE'])))) return
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='Add in solar and ecliptic data to observational metadata' ) parser.add_argument( '--metadata_file_path', type=str, default=None, metavar='PATH', help='Metadata file path to work from, if not ''.''' ) parser.add_argument( '--metadata_path', type=str, default='.', metavar='PATH', help='Metadata path to work from, if not ''.''' ) parser.add_argument( '--input', type=str, default='FITS', metavar='OUTPUT', help='Output format, either of FITS or CSV, defaults to FITS.' ) parser.add_argument( '--solar_metadata', type=str, default='solar_ephemeris.csv', help='Solar metadata file (from parse_ephemeris.py)' ) args = parser.parse_args() if args.metadata_file_path is None: if args.input == 'CSV': obs_md_table = astab.Table.read(os.path.join(args.metadata_path, "annotated_metadata.csv"), format="ascii.csv") elif args.input == 'FITS': obs_md_table = astab.Table.read(os.path.join(args.metadata_path, "annotated_metadata.fits"), format="fits") else: obs_md_table = Table.read(args.metadata_file_path) solar_md_table = astab.Table.read(args.solar_metadata, format="ascii.csv") solar_md_table.rename_column('UTC', 'EPHEM_DATE') obs_md_table = astab.join(obs_md_table, solar_md_table['EPHEM_DATE', 'RA_ABS', 'DEC_ABS']) boresight_ra_dec = ascoord.SkyCoord(ra=obs_md_table['RA'], dec=obs_md_table['DEC'], distance=1.0, unit=('deg', 'deg', 'AU'), frame='fk5') #Change distance to... e.g. 1 Mpc boresight_ecliptic = boresight_ra_dec.transform_to('heliocentrictrueecliptic') solar_ra_dec = ascoord.SkyCoord(ra=obs_md_table['RA_ABS'], dec=obs_md_table['DEC_ABS'], distance=1.0, unit=('deg', 'deg', 'AU'), frame='icrs') solar_ecliptic = solar_ra_dec.transform_to('heliocentrictrueecliptic') obs_md_table.add_column(astab.Column(boresight_ecliptic.lat, dtype=float, name="ECLIPTIC_PLANE_SEP")) belp = np.mod(boresight_ecliptic.lon.value + 360.0, 360.0) selp = np.mod(solar_ecliptic.lon.value + 360.0, 360.0) lon_diff = np.abs(belp - selp) lon_diff[lon_diff > 180] -= 360 lon_diff = np.abs(lon_diff) obs_md_table.add_column(astab.Column(lon_diff, dtype=float, name="ECLIPTIC_PLANE_SOLAR_SEP")) obs_md_table.remove_columns(['RA_ABS', 'DEC_ABS']) if args.metadata_file_path is None: if args.input == 'CSV': obs_md_table.write(os.path.join(args.metadata_path, "annotated_metadata.csv"), format="ascii.csv") elif args.input == 'FITS': obs_md_table.write(os.path.join(args.metadata_path, "annotated_metadata.fits"), format="fits", overwrite=True) else: obs_md_table.write(args.metadata_file_path, overwrite=True)
def compare_mtot_pca_nsa(tab, jhu_mpa, mltype='ring', mlb='i', cb1='g', cb2='r'): jointab = t.join(tab, jhu_mpa, 'plateifu') mlb_ix = totalmass.StellarMass.bands_ixs[mlb] cb1_ix = totalmass.StellarMass.bands_ixs[cb1] cb2_ix = totalmass.StellarMass.bands_ixs[cb2] absmag_sun_mlb = totalmass.StellarMass.absmag_sun[mlb_ix] broadband_color = (jointab['nsa_absmag'][:, cb1_ix] - jointab['nsa_absmag'][:, cb2_ix]) mass_pca = jointab['mass_in_ifu'] + jointab['outer_mass_{}'.format(mltype)] nsa_h = 1. mass_nsa = (jointab['nsa_elpetro_mass'] * u.Msun * (nsa_h * u.littleh)**-2).to( u.Msun, u.with_H0(cosmo.H0)) jhumpa_h = 1. / .7 chabrier_to_kroupa_dex = .05 mass_jhumpa = (10.**(jointab['LOG_MSTAR'] + chabrier_to_kroupa_dex) * \ u.Msun * (jhumpa_h * u.littleh)**-2.).to(u.Msun, u.with_H0(cosmo.H0)) lowess_grid = np.linspace(broadband_color.min(), broadband_color.max(), 100).value lowess_pca_nsa, swt_nsa = smooth( x=broadband_color.value, y=np.log10(mass_pca / mass_nsa).value, xgrid=lowess_grid, bw=.01) print(lowess_pca_nsa) print(swt_nsa) lowess_pca_jhumpa, swt_jhumpa = smooth( x=broadband_color.value, y=np.log10(mass_pca / mass_jhumpa).value, xgrid=lowess_grid, bw=.01) print(lowess_pca_jhumpa) print(swt_jhumpa) swt_th = .2 * swt_nsa.max() good_lowess_nsa = (swt_nsa >= swt_th) good_lowess_jhumpa = (swt_jhumpa >= swt_th) fig, ax = plt.subplots(1, 1, figsize=(3, 3), dpi=300) ax.scatter(broadband_color, np.log10(mass_pca / mass_nsa), s=2., edgecolor='None', c='C0', label='NSA') ax.plot(lowess_grid[good_lowess_nsa], lowess_pca_nsa[good_lowess_nsa], linewidth=0.5, c='k', linestyle='-') ax.scatter(broadband_color, np.log10(mass_pca / mass_jhumpa), s=2., edgecolor='None', c='C1', label='JHU-MPA') ax.plot(lowess_grid[good_lowess_jhumpa], lowess_pca_jhumpa[good_lowess_jhumpa], linewidth=0.5, c='k', linestyle='--') ax.set_ylim([-.2, .5]); ax.set_xlim([-.1, 1.]) ax.legend(loc='best', prop={'size': 'xx-small'}) ax.tick_params(labelsize='xx-small') ax.set_xlabel(r'${}-{}$'.format(cb1, cb2), size='x-small') ax.set_ylabel(r'$\log \frac{M^*_{\rm PCA}}{M^*_{\rm catalog}}$', size='x-small') fig.tight_layout() fig.subplots_adjust(top=.95, left=.21, right=.97) fig.savefig(os.path.join(basedir, 'lib_diags/', 'dMasses.png'), dpi=fig.dpi)
def process_photons(self, photons, verbose=0): self.add_colpos(photons) new_photons = self._process_photons_in_c(photons, verbose) photons = join(new_photons, photons, keys='tag', uniq_col_name='{col_name}{table_name}', table_names=['', '_beforemirror']) photons['probability'][photons['unreflected'] | photons['mirror_vblocked']] = 0 return photons
def join_by_id(table1, table2, columnid1, columnid2, join_type="inner", conflict_suffixes=("_A", "_B"), idproc=None, additional_keys=[]): '''Joins two tables based on columns with different names. Table1 and table2 are the tables to be joined together. The column names that should be joined are the two columnids. Columnid1 will be the column name for the returned table. In case of conflicts, the conflict suffixes will be appended to the keys with conflicts. To merge conflicts instead of keeping them separate, add the column name to additional_keys. If the entries in the columns to be merged should be processed a certain way, the function that does the processing should be given in idfilter. For no processing, "None" should be passed instead. ''' # Process the columns if need be. if idproc is not None: # I want to duplicate the data so it won't be lost. And by keeping it # in the table, it will be preserved when it is joined. origcol1 = table1[columnid1] origcol2 = table2[columnid2] randomcol1 = generate_random_string(10) randomcol2 = generate_random_string(10) table1.rename_column(columnid1, randomcol1) table2.rename_column(columnid2, randomcol2) table1[columnid1] = idproc(origcol1) table2[columnid2] = idproc(origcol2) # If columnid1 = columnid2, then we can go straight to a join. If not, then # columnid2 needs to be renamed to columnid1. If table2[columnid1] exists, # then we have a problem and an exception should be thrown. if columnid1 != columnid2: if columnid1 not in table2.colnames: table2[columnid1] = table2[columnid2] else: raise ValueError( "Column {0} already exists in second table.".format(columnid1)) try: newtable = join( table1, table2, keys=[columnid1]+additional_keys, join_type=join_type, table_names=list(conflict_suffixes), uniq_col_name="{col_name}{table_name}") finally: # Clean up the new table. if columnid1 != columnid2: del(table2[columnid1]) if idproc is not None: del(table1[columnid1]) del(table2[columnid2]) del(newtable[randomcol1]) del(newtable[randomcol2]) table1.rename_column(randomcol1, columnid1) table2.rename_column(randomcol2, columnid2) return newtable
def matchwithcosmos(tab_file, match_file, nocosmos=False): print "Matching {} with {} within 1 arcsec...".format(tab_file, cosmos_file) ntab = Table.read(tab_file) y1tab = Table.read(y1_dfull) ntab.rename_column('id', 'COADD_OBJECTS_ID') tab = join(ntab, y1tab, keys='COADD_OBJECTS_ID') print ntab['COADD_OBJECTS_ID'][:10] print len(tab) print y1tab['COADD_OBJECTS_ID'][:10] plt.scatter(tab['RA'], tab['DEC']) plt.show() exit() cosmos = Table.read(cosmos_file) h = esutil.htm.HTM(10) h.match(tab['RA'], tab['DEC'], cosmos['ALPHA_J2000'], cosmos['DELTA_J2000'], radius=1./3600, file=match_file) m = h.read(match_file) data_m, cosmos_m, merr = np.array(zip(*m)) print " {} COSMOS/Y1 matches".format(len(cosmos_m)) print " No duplicates in matched list?:{}".format(len(data_m)==len(set(data_m))) if nocosmos==True: no_cosmos = list(set(range(0,len(tab)))-set(data_m)) new_data = tab[no_cosmos] print "made new table without cosmos matches" new_data.write(os.path.splitext(tab_file)[0]+'_no_cosmos.fits') del new_data print ' wrote new table' data_m_int = [int(g) for g in data_m] cosmos_m_int = [int(c) for c in cosmos_m] new_cosmos = tab[data_m_int] del tab print ' deleted old table' new_cosmos.add_column(Column(name='photoz', data=cosmos['PHOTOZ'][cosmos_m_int])) new_cosmos.add_column(Column(name='NUMBER', data=cosmos['NUMBER'][cosmos_m_int])) new_cosmos.add_column(Column(name='ALPHA_J2000', data=cosmos['ALPHA_J2000'][cosmos_m_int])) new_cosmos.add_column(Column(name='DELTA_J2000', data=cosmos['ALPHA_J2000'][cosmos_m_int])) new_cosmos.add_column(Column(name='match_err', data=merr)) print ' made new cosmos table' new_cosmos.write(os.path.splitext(tab_file)[0]+'_cosmos.fits')
def truth_plus_mag_by_orig(): """ Return the truth table inner joined to the magnified table by original balrog index """ bmag = Table.read(mag_file) bal = Table.read(truth_file) print "joining truth table to magnified table by original index..." return join(bal,bmag)
def sim_plus_mag_by_orig(): """ Return the sim table outer joined to the magnified table by balrog index of the original object """ sim = Table.read(sim_file) bmag = Table.read(mag_file) print "joining sim table to magnified table by original index..." return join(bmag, sim, join_type='outer')
def __call__(self,obsDb,ii): keep = super(SkyFlatSelector,self).__call__(obsDb,ii) m = join(obsDb['frameIndex',][ii],self.skyDat,'frameIndex') assert np.all(m['frameIndex']==obsDb['frameIndex'][ii]) keep &= m['skyMean'] < self.maxCounts if keep.sum() > self.maxNImg: jj = np.where(keep)[0] jj = jj[m['skyMean'][jj].argsort()] keep[jj[self.maxNImg:]] = False return keep
def test_join(table_types): """ Join tables with mixin cols. Use column "i" as proxy for what the result should be for each mixin. """ t1 = table_types.Table() t1['a'] = table_types.Column(['a', 'b', 'b', 'c']) t1['i'] = table_types.Column([0, 1, 2, 3]) for name, col in MIXIN_COLS.items(): t1[name] = col t2 = table_types.Table(t1) t2['a'] = ['b', 'c', 'a', 'd'] for name, col in MIXIN_COLS.items(): t1[name].info.description = name t2[name].info.description = name + '2' for join_type in ('inner', 'left'): t12 = join(t1, t2, keys='a', join_type=join_type) idx1 = t12['i_1'] idx2 = t12['i_2'] for name, col in MIXIN_COLS.items(): name1 = name + '_1' name2 = name + '_2' assert_table_name_col_equal(t12, name1, col[idx1]) assert_table_name_col_equal(t12, name2, col[idx2]) assert t12[name1].info.description == name assert t12[name2].info.description == name + '2' for join_type in ('outer', 'right'): with pytest.raises(NotImplementedError) as exc: t12 = join(t1, t2, keys='a', join_type=join_type) assert 'join requires masking column' in str(exc.value) with pytest.raises(ValueError) as exc: t12 = join(t1, t2, keys=['a', 'skycoord']) assert 'not allowed as a key column' in str(exc.value) # Join does work for a mixin which is a subclass of np.ndarray t12 = join(t1, t2, keys=['quantity']) assert np.all(t12['a_1'] == t1['a'])
def truth_plus_mag_by_mag(): """ Return the truth table inner joined to the magnified table by balrog index of magnified match """ bal = Table.read(truth_file) bal.rename_column('BALROG_INDEX', 'BALROG_INDEX_MAG'+str(mu)) bmag = Table.read(mag_file) print "joining truth table to magnified table by magnified index..." return join(bal,bmag)
def sim_plus_mag_by_mag(): """ Return the sim table outer joined to the magnified table by balrog index of the magnified match """ sim = Table.read(sim_file) sim.rename_column('BALROG_INDEX', 'BALROG_INDEX_MAG'+str(mu)) bmag = Table.read(mag_file) print "joining sim table to magnified table by magnified index..." return join(bmag, sim, join_type='outer')
def load_cache(tables, names, cachefile='cache.fits'): if os.path.isfile(cachefile): return Table.read(cachefile) tab0 = Table.read(tab_ext_filename) tab1 = Table.read(tab_lnl_filename) tab = join(tab0,tab1) tab = load_source_rows(tab, names) tab.write(cachefile) return tab
def make_mtl(targets, zcat=None, trim=True): ''' Adds NUMOBS, PRIORITY, and GRAYLAYER columns to a targets table Args: targets : Table with columns TARGETID, DESI_TARGET Optional: zcat : redshift catalog table with columns TARGETID, NUMOBS, Z, ZWARN trim: if True (default), don't include targets that don't need any more observations. If False, include every input target. Returns: MTL Table with targets columns plus * NUMOBS_MORE - number of additional observations requested * PRIORITY - target priority (larger number = higher priority) * GRAYLAYER - can this be observed during gray time? TODO: Check if input targets is ever altered (ist shouldn't...) ''' n = len(targets) targets = Table(targets) if zcat is not None: ztargets = join(targets, zcat, keys='TARGETID', join_type='outer') if ztargets.masked: unobs = ztargets['NUMOBS'].mask ztargets['NUMOBS'][unobs] = 0 else: ztargets = targets.copy() ztargets['NUMOBS'] = np.zeros(n, dtype=np.int32) ztargets['Z'] = -1 * np.ones(n, dtype=np.float32) ztargets['ZWARN'] = -1 * np.ones(n, dtype=np.int32) ztargets['NUMOBS_MORE'] = np.maximum(0, calc_numobs(ztargets) - ztargets['NUMOBS']) mtl = targets.copy() mtl['NUMOBS_MORE'] = ztargets['NUMOBS_MORE'] mtl['PRIORITY'] = calc_priority(ztargets) #- ELGs can be observed during gray time graylayer = np.zeros(n, dtype='i4') iselg = (mtl['DESI_TARGET'] & desi_mask.ELG) != 0 graylayer[iselg] = 1 mtl['GRAYLAYER'] = graylayer if trim: notdone = mtl['NUMOBS_MORE'] > 0 mtl = mtl[notdone] return mtl
def join_starcheck_telem(fids_starcheck, fids_telem): """ Remake dict of tables into a single table for each structure """ # Stack the dict of tables into a single table t_fids_starcheck = table.vstack([fids_starcheck[obsid] for obsid in sorted(fids_starcheck)]) t_fids_telem = table.vstack([fids_telem[obsid] for obsid in sorted(fids_telem)]) # Join on obsid and slot columns into a single table starcheck_telem = table.join(t_fids_starcheck, t_fids_telem, keys=['obsid', 'slot']) # Reject unacquired fids ok = starcheck_telem['aoacyan'] > -3276 return starcheck_telem[ok]
def update_db_with_badsky(maxSkyCounts=40000): from astropy.io import fits rmObsDbFile = os.path.join('config','sdssrm-bok.fits') f = fits.open(rmObsDbFile,mode='update') skyDat = Table.read(os.path.join('data','bokrm_skyadu.fits')) ii = np.where(f[1].data['imType']=='object')[0] m = join(Table(f[1].data[ii])['frameIndex',],skyDat,'frameIndex') assert np.all(f[1].data['frameIndex'][ii]==m['frameIndex']) good = m['skyMean'] < maxSkyCounts f[1].data['good'][ii] &= good f.flush() f.close() m['frameIndex','utDate','fileName','filter','skyMean'][~good].write( 'data/badskylist.txt',format='ascii',overwrite=True)
def main(): # Read in acquisition statistics from a pkl if present or get from the # database acq_file = 'acq.pkl' if os.path.exists(acq_file): f = open(acq_file, 'r') acq = pickle.load(f) f.close() else: acq_query = """select * from acq_stats_data""" acq = dbh.fetchall(acq_query) f = open(acq_file, 'w') pickle.dump(acq, f) f.close() # Exclude bad obsids "in place" acq_mask = np.zeros(len(acq), dtype=bool) for obsid in bad_obsids.bad_obsids: acq_mask[acq['obsid'] == obsid] = True acq = acq[~acq_mask] # Use table grouping operations to make a table of agasc ids and their # failures. This is used by the sausage list and the starcheck list long_acqs = Table(acq[['agasc_id', 'obc_id', 'tstart']]) all_acqs = long_acqs[['agasc_id', 'tstart']].group_by('agasc_id').groups.aggregate(np.size) acqs_noid = long_acqs[long_acqs['obc_id'] == 'NOID'] bacqs = acqs_noid.group_by('agasc_id').groups.aggregate(np.size) bad_acqs = join(all_acqs, bacqs, keys='agasc_id')[['agasc_id', 'tstart_1', 'tstart_2']] bad_acqs.rename_column('tstart_1', 'attempts') bad_acqs.rename_column('tstart_2', 'failures') # Read in acquisition statistics from a pkl if present or get from the # database gui_file = 'gui.pkl' if os.path.exists(gui_file): f = open(gui_file, 'r') gui = pickle.load(f) f.close() else: gui_query = "select * from trak_stats_data where type != 'FID'" gui = dbh.fetchall(gui_query) f = open( gui_file, 'w') pickle.dump(gui, f) f.close() # The definite of a "bad" guide star is different in the sausage and starcheck lists # so pass the raw guide list to each sausage_list(gui, bad_acqs) starcheck_gui_list(gui) starcheck_acq_list(bad_acqs)
def function10(): """ faccio il plot del seeing misurato con le nebulose e di quello dalle stelle """ pn_table = ascii.read('/media/congiu/Data/Dati/PHANGS/PN_selection/'\ +'psf_planetary.txt') o3_table = ascii.read( '/media/congiu/Data/Dati/PHANGS/star_fit/reliable_pointing_o3.txt') new_table = join(pn_table, o3_table, keys='pointing') fitter = fitting.LinearLSQFitter() model = models.Linear1D(1, 1) fig, ax = plt.subplots(1, 1, figsize=(8, 6)) for o3, pn, err, region in zip(new_table['fwhm_2'], new_table['fwhm_1'], new_table['fwhm_err_1'], new_table['pointing']): ax.errorbar(o3, pn, yerr=err, label=region, ls='', marker='o') fit = fitter(model, new_table['fwhm_2'], new_table['fwhm_1'], weights=1 / new_table['fwhm_err_1']) print(len(new_table['fwhm_2'].pformat())) chi2 = np.sum((new_table['fwhm_1']-fit(new_table['fwhm_2']))**2\ / fit(new_table['fwhm_2']))/(len(new_table['fwhm_2'].pformat())) plt.plot(new_table['fwhm_2'], fit(new_table['fwhm_2']), label='fit') ax.set_xlabel('FWHM at 5007A') ax.set_ylabel('PN FWHM') ax.set_xlim([0.5, 1.2]) ax.set_ylim([0.5, 1.2]) ax.plot([0.4, 1.2], [0.4, 1.2], label='bisector') box = ax.get_position() ax.set_position([box.x0, box.y0, box.width * 0.8, box.height]) plt.plot([], [], ls='', label='chi2 = {:0.2f}'.format(chi2)) ax.legend(loc='center left', bbox_to_anchor=(1, 0.5)) ax.set_title('PN FWHM (exponnorm)') # plt.savefig('location_exponnorm.png') # plt.close() plt.show()
def merge_photom_tables(new_tbl, old_file, tol=1 * units.arcsec, debug=False): """ Merge photometry tables Args: new_tbl (astropy.table.Table): New table of photometry old_file (str or Table): Path to the old table Returns: astropy.table.Table: Merged tables """ fill_value = -999. # File or tbl? if isinstance(old_file, str): # New file? if not os.path.isfile(old_file): return new_tbl # Load me old_tbl = Table.read(old_file, format=table_format) elif isinstance(old_file, Table): old_tbl = old_file else: embed(header='42 of photom') # Coords new_coords = SkyCoord(ra=new_tbl['ra'], dec=new_tbl['dec'], unit='deg') old_coords = SkyCoord(ra=old_tbl['ra'], dec=old_tbl['dec'], unit='deg') idx, d2d, _ = match_coordinates_sky(new_coords, old_coords, nthneighbor=1) match = d2d < tol # Match? if np.sum(match) == len(new_coords): # Insist on the same RA, DEC new_tbl['ra'] = old_tbl['ra'][idx[0]] new_tbl['dec'] = old_tbl['dec'][idx[0]] # Join merge_tbl = join(old_tbl.filled(-999.), new_tbl, join_type='left').filled(-999.) elif np.sum(match) == 0: merge_tbl = vstack([old_tbl, new_tbl]).filled(-999.) else: embed(header='50 of photom') # Best to avoid!! Use photom_by_name # Return return merge_tbl
def match_to_master_catalog(master_cat, sci_cat, max_distance=.05): """ Matches a single image extension catalog to the master catalog so that each row of the returned catalog is the same star This function takes in one final catalog and one catalog of a single science extension (sci_cat) and matches sources in them. This is done by finding closest point in master_cat (in sky space) to each point in sci_cat. Points closer than max_distance are considered to be the same source. The returned table contains the same number of rows as master_cat, and has values for each of the stars successfully matched. Basically just left joins the tables when matching rows are found. Parameters ---------- master_cat : astropy.table.Table or str The master table (or filename) to be matched to sci_cat : astropy.table.Table or str The single science extension table (or filename) to be matched max_distance : float, optional The threshold (in arcsec) which distances must be in for sources to be considered a match. Returns ------- joined : astropy.table.Table The matched (left-joined) table. """ if type(master_cat) == str: master_cat = Table.read(master_cat, format='ascii.commented_header') if type(sci_cat) == str: sci_cat = Table.read(sci_cat, format='ascii.commented_header') master_skycoord = SkyCoord(master_cat['rbar']*u.deg, master_cat['dbar']*u.deg) sci_skycoord = SkyCoord(sci_cat['r']*u.deg, sci_cat['d']*u.deg) idx, ang, wat = sci_skycoord.match_to_catalog_sky(master_skycoord) distance_mask = ang.arcsec < max_distance master_cat['id'] = range(len(master_cat)) idx[ang.arcsec>.05] = -1 sci_copy= copy(sci_cat) sci_copy['id'] = idx joined = join(master_cat, sci_copy, keys='id', join_type='left') return joined
def data(**kwargs): if kwargs['abstracts'] is None: write_json_abstracts([]) return {'talks': [], 'posters': [], 'unassigned': []} # abstr = Table.read(abstrfile, fast_reader=False, fill_values=()) abstr = read_abstracts_table(kwargs['abstracts'], **kwargs) abstr['Email Address'] = [s.lower() for s in abstr['Email Address']] if kwargs['registered_abstracts']: regabs = Table.read(kwargs['registered_abstracts'], format='ascii.csv') #regabs['registered'] = True abstr = join(abstr, regabs) #abstr = abstr[abstr['registered']] ind_talk = (abstr['type'] == 'invited') | (abstr['type'] == 'contributed') ind_poster = abstr['type'] == 'poster' talks = abstr[ind_talk] talks.sort(['binary_time', 'type', 'Select a major science topic']) posters = abstr[ind_poster] posters['intnumber'] = [int(i) for i in posters['poster number']] posters.sort(['intnumber', 'Authors']) #import pdb #pdb.set_trace() #print('asdfasdfasdfasdf') # List all entries that do not have a valid type notype = abstr[~ind_talk & ~ind_poster] if len(notype) > 0: print('The following entries do not have a valid "type" entry, which would classify them as talk or poster:') for r in notype: print(r['Timestamp'], r['type'], r['Title']) if not kwargs['output_unassigned']: abstr = abstr[abstr['type'] != ''] abstr.sort(['binary_time', 'poster number']) abstr['index'] = np.arange(1.0 * len(abstr)) write_json_abstracts(abstr) notype.sort(['Select a major science topic', 'Authors']) unass = notype if kwargs['output_unassigned'] else [] return {'talks': talks, 'posters': posters, 'unassigned': unass}
def _complete(self, right, key=None, join_type='left', add_col=True, verbose=True, debug=False): if not key: raise ValueError('key is empty') # Save shared columns without "key" cols = intersection(self.keys(), right.keys()) cols.remove(key) # Join tables join_t = join(self, right, join_type=join_type, keys=key) # Complete masked values of "self" if available in "right" for col in cols: # Add eventually a condition to check units! # Names of joined columns (default from join()) col1, col2 = col + '_1', col + '_2' # Index of masked in "self" and not masked in "right" index = join_t[col1].mask & ~join_t[col2].mask # Reassign value join_t[col1].unshare_mask() join_t[col1][index] = join_t[col2][index] # Remove 2nd column and rename to original join_t[col1].name = col del join_t[col2] # Remove added columns from "right" if not wanted supp_cols = difference(right.keys(), self.keys()) if debug: print(supp_cols) if not add_col and supp_cols: if verbose: print('remove non shared columns from second table') join_t.remove_columns(supp_cols) return join_t
def create_zoospec_catalog(): mass_table = create_mass_catalog() mass_table.rename_column('objID', 'OBJID') hdulist = aio.fits.open(fnzGZ) Z_table = (hdulist[1].data) #OBJID is string must convert to ints ids = np.array(Z_table['OBJID'][0], dtype='int') print len(ids) bptclass = np.array(Z_table['BPT_CLASS'][0]) logmstellar = np.array(Z_table['LOG_MSTELLAR'][0]) LO3 = np.array(Z_table['L_O3'][0]) tmpT = Table([ids, bptclass, logmstellar, LO3], names=['OBJID', 'BPT_CLASS', 'LOG_MSTELLAR', 'L_O3']) shared = ['OBJID'] t = join(mass_table, tmpT, keys=shared) t.rename_column('OBJID', 'objID') return t
def __init__(self, input_url=None, h5file=None, **kwargs): self._should_close = False # enable using input_url as posarg if input_url not in {None, traits.Undefined}: kwargs["input_url"] = input_url super().__init__(**kwargs) if h5file is None and self.input_url is None: raise ValueError("Need to specify either input_url or h5file") self._should_close = False if h5file is None: self.h5file = tables.open_file(self.input_url, mode="r") self._should_close = True else: if not isinstance(h5file, tables.File): raise TypeError("h5file must be a tables.File") self.input_url = Path(h5file.filename) self.h5file = h5file self.subarray = SubarrayDescription.from_hdf(self.h5file) Provenance().add_input_file(self.input_url, role="Event data") try: self.structure = _get_structure(self.h5file) except ValueError: self.structure = None self.instrument_table = None if self.load_instrument: table = self.subarray.to_table() optics = self.subarray.to_table(kind="optics") optics["optics_index"] = np.arange(len(optics)) optics.remove_columns(["name", "description", "type"]) table = join( table, optics, keys="optics_index", # conflicts for TAB_VER, TAB_TYPE, not needed here, ignore metadata_conflicts="silent", ) table.remove_columns(["optics_index", "camera_index"]) self.instrument_table = table
def lines_match(sv_gals, release='everest', survey='sv3'): fastphot, fp_meta, fastspec, fs_meta = get_lines(release=release, survey=survey) sv_gals = atable.Table(sv_gals, copy=True) linelist = [ 'OII_3726', 'OII_3729', 'HBETA', 'OIII_4363', 'OIII_4959', 'HALPHA', 'SII_6731', 'SII_6716' ] linelist = ['{}_FLUX'.format(line) for line in linelist] linelist += ['TARGETID'] return atable.join(sv_gals, fastspec[linelist], keys=['TARGETID'], join_type='left')
def _add_srcids(self, match): match = Table.from_pandas(match) match['IDX'] = range(len(match)) catalogues = [self.pcat] + list(self.scats) for cat in catalogues[::-1]: srcids = Table() srcids['SRCID_' + cat.name] = cat.ids.data srcids[cat.name] = range(len(cat)) match = join(srcids, match, keys=cat.name, join_type='right') match.remove_column(cat.name) # Recover original sorting match.sort('IDX') match.remove_column('IDX') return match
def identify_equa_componiens(sources, compare_catalog, x_col, y_col): db = DBSCAN(eps=2. / 3600, min_samples=2) l_sources = len(sources) x = np.zeros((l_sources + len(compare_catalog), 2)) x[:l_sources, 0] = sources['ra'] x[:l_sources, 1] = sources['dec'] x[l_sources:, 0] = compare_catalog[x_col] x[l_sources:, 1] = compare_catalog[y_col] db.fit(x) sources = exclude_sources(sources, db.labels_[:l_sources]) compare_catalog = exclude_sources(compare_catalog, db.labels_[l_sources:]) cc = compare_catalog[[x_col, y_col, 'label']] sources = sources[['ra', 'dec', 'xcentroid', 'ycentroid', 'label']] comb = join(sources, cc, keys='label') return comb
def match(self, n1, n2): print "Selecting unique objects" cat1 = getattr(self, "cat%d" % n1) cat2 = getattr(self, "cat%d" % n2) if 'COADD_OBJECTS_ID' in cat1.columns: cat1.rename_column('COADD_OJECTS_ID', 'coadd_objects_id') if 'COADD_OJECTS_ID' in cat2.columns: cat2.rename_column('COADD_OJECTS_ID', 'coadd_objects_id') c1 = tb.unique(cat1, keys='coadd_objects_id') c2 = tb.unique(cat2, keys='coadd_objects_id') print "Sorting" c1 = c1.group_by("coadd_objects_id") c2 = c2.group_by("coadd_objects_id") tab = tb.join(c, d, join_type='inner') return tab
def find_in_tess(catalog, raname='ra', decname='dec', namename='Name', verbose=True): names = [] for j, name in enumerate(catalog['Name']): ra, dec = catalog[j][raname, decname] obsTable = Observations.query_region("%s %s " % (ra, dec), radius=10 * u.arcsec) m = [obsTable['obs_collection'] == 'TESS'] if np.sum(m) != 0: names.append(name) if verbose: print(name) in_tess = join(Table({namename: np.array(names)}), catalog, keys=namename) return in_tess
def cross_catalogs(name_field): prefix = '/data1/osinga/data/NN/try_4/' name = name_field + '_NN.fits' try: results1 = fits.open(prefix + name) results1 = Table(results1[1].data) except IOError: return 'No sources in this catalog: ' + name_field catalog1 = fits.open('/data1/osinga/data/NN/' + name_field + 'NearestNeighbours_efficient_spherical2.fits') catalog1 = Table(catalog1[1].data) a = join(results1, catalog1, join_type='left') a.sort('Isl_id') return a
def join_tables(left, right, key_left, key_right, cols_right=None): """Perform a join of two tables. Parameters ---------- left : `~astropy.Table` Left table for join. right : `~astropy.Table` Right table for join. key_left : str Key used to match elements from ``left`` table. key_right : str Key used to match elements from ``right`` table. cols_right : list Subset of columns from ``right`` table that will be appended to joined table. """ right = right.copy() if cols_right is None: cols_right = right.colnames else: cols_right = [c for c in cols_right if c in right.colnames] if key_left != key_right: right[key_right].name = key_left if key_left not in cols_right: cols_right += [key_left] out = join(left, right[cols_right], keys=key_left, join_type='left') for col in out.colnames: if out[col].dtype.kind in ['S', 'U']: out[col].fill_value = '' elif out[col].dtype.kind in ['i']: out[col].fill_value = 0 else: out[col].fill_value = np.nan return out.filled()
def merge_gaia(tbl: Table) -> Table: """Merges Gaia data for non-Celestia stars.""" with gzip.open(GAIA_PATH, 'rb') as f: gaia = votable.parse_single_table(f).to_table() bp_rp = gaia['bp_rp'].filled(0) bp_rp2 = bp_rp * bp_rp gaia.add_column( MaskedColumn(data=gaia['phot_g_mean_mag'].filled(np.nan) + 0.01760 + bp_rp * 0.006860 + bp_rp2 * 0.1732, name='flux', mask=gaia['phot_g_mean_mag'].mask)) gaia.remove_columns(['phot_g_mean_mag', 'bp_rp']) gaia.rename_column('source_id', 'gaia') gaia.rename_column('r_est', 'dist') has_gaia = tbl[np.logical_not(tbl['gaia'].mask)] merged = join(has_gaia, gaia, keys=['gaia'], join_type='left', table_names=['cel', 'gaia']) merged['ra'] = np.where(merged['ra_gaia'].mask, merged['ra_cel'], merged['ra_gaia']) merged['dec'] = np.where(merged['dec_gaia'].mask, merged['dec_cel'], merged['dec_gaia']) merged.add_columns([ MaskedColumn(data=np.where(merged['dist_gaia'].mask, merged['dist_cel'], merged['dist_gaia']), name='dist', mask=np.logical_and(merged['dist_gaia'].mask, merged['dist_cel'].mask)), MaskedColumn(data=np.where(merged['flux_cel'].mask, merged['flux_gaia'], merged['flux_cel']), name='flux', mask=np.logical_and(merged['flux_cel'].mask, merged['flux_gaia'].mask)) ]) merged.remove_columns([ 'ra_cel', 'ra_gaia', 'dec_cel', 'dec_gaia', 'dist_cel', 'dist_gaia', 'flux_cel', 'flux_gaia' ]) return vstack([tbl[tbl['gaia'].mask], merged], join_type='exact')
def Q4minusQ1(q4File, q1File): #q4File = "/Volumes/Yolanda/CRF_Screen/InVivo/1_1_Norm/20190401/P1-7_Q4_Exp35Exp56_pctg_nbPctgTotal.csv" #q1File = "/Volumes/Yolanda/CRF_Screen/InVivo/1_1_Norm/20190401/P1-7_Q1_Exp35Exp56_pctg_nbPctgTotal.csv" group = q4File.split("/")[-1].split("_")[0] outName = group + "_Q4minusQ1.csv" q4Tab = ascii.read(q4File) q1Tab = ascii.read(q1File) del q4Tab['count'] del q4Tab['adjPctg'] del q1Tab['count'] del q1Tab['adjPctg'] q4Tab["nbPctg"].name = "nbPctg_Q4" q1Tab["nbPctg"].name = "nbPctg_Q1" allTab = join(q4Tab, q1Tab, join_type="inner", keys="shRNA") q4minusq1 = [x-y for index, (x,y) in enumerate(zip(list(allTab["nbPctg_Q4"]), list(allTab["nbPctg_Q1"])))] allTab["q4minusq1_nbPctg"] = q4minusq1 ascii.write(allTab, outName, format="csv", overwrite=True)
def process_xhip() -> Table: """Processes the XHIP data.""" xhip = load_xhip() sptypes = load_tyc2specnew() xhip = join(xhip, sptypes, keys=['HIP'], join_type='left', metadata_conflicts='silent') xhip['SpType'] = xhip['SpType1'].filled(xhip['SpType']) xhip.remove_column('SpType1') compute_distances(xhip) update_coordinates(xhip) xhip.remove_columns( ['RAdeg', 'DEdeg', 'pmRA', 'pmDE', 'RV', 'Dist', 'e_Dist']) return xhip
def read_files(filter): #sex_file="/global/cscratch1/sd/amichoi/UltraVISTA/cat/UVISTA_%s_21_01_16_psfcat.fits" % (filter) sex_file = "/global/cscratch1/sd/amichoi/VIDEO/cat/VIDEO_%s_6_52.80_-27.71_psfcat.fits" % ( filter) dat = fits.open(sex_file) cols = dat[2].columns #print(cols) sex = Table(dat[2].data) print("Length of sex file: ", len(sex)) #read in list of stars made from Sextractor and PSFEx #star_file="/global/homes/a/aamon/DES/DEStests/DEEP/deeppsfs/UltraVista/UVISTA_%s_21_01_16_psfex-starlist.fits" % (filter) star_file = "/global/cscratch1/sd/amichoi/VIDEO/psf/VIDEO_%s_6_52.80_-27.71_psfex-starlist.fits" % ( filter) dat = fits.open(star_file) cols = dat[2].columns #print(cols) star = Table(dat[2].data) print("Length of star file: ", len(star)) print(sex['X_IMAGE'], star['X_IMAGE']) #sex.rename_column('XWIN_IMAGE', 'X') #USE FOR ULTRAVISTA #star.rename_column('X_IMAGE', 'X') #sexstarmerge = join(sex, star, keys=['X'], join_type='inner') # sex['X_IMAGE'] = sex['X_IMAGE'].astype(int) star['X_IMAGE'] = star['X_IMAGE'].astype(int) sex['Y_IMAGE'] = sex['Y_IMAGE'].astype(int) star['Y_IMAGE'] = star['Y_IMAGE'].astype(int) sexstarmerge = join(sex, star, keys=['X_IMAGE', 'Y_IMAGE'], join_type='inner') print("length of merged cat: ", len(sexstarmerge)) cols = tuple(name for name in sexstarmerge.colnames if len(sexstarmerge[name].shape) <= 1) t2 = sexstarmerge[cols] sexstardf = t2.to_pandas() return sexstarmerge, sex, star
def steal_columns(self, tab, colnames=['z'], keys=['ra', 'dec']): """ steal columns (colnames) from a table (tab) and add them to list (including list_good, list_except). The table has to have identical objects and keys (ra, dec) as the list in batch. One needs to run compile_table to update compiled tables. Params ------ self tab (astropy table): a table with identical objects and keys (ra, dec) as the list colnames=['z']: a list of columns names to steal (string) keys=['ra', 'dec']: a list of the keys used for matching. Write output ------------ rewrites list, list_good, and list_except, and update then as self.*. """ for col in colnames: if col in self.list.colnames: raise Exception( "[batch] column to steal already exists in list") cols_trim = keys + colnames tab_trim = tab[cols_trim] for lst in [self.list, self.list_good, self.list_except]: if len(lst) > 0: lst_joined = at.join(lst, tab_trim, keys=keys, join_type='left') columns_toadd = [lst_joined[cn] for cn in colnames] lst.add_columns(columns_toadd) else: columns_toadd = [tab_trim[cn] for cn in colnames] lst.add_columns([ at.Column(name=c.name, dtype=c.dtype, meta=c.meta) for c in columns_toadd ]) self._write_all_lists()
def print_numobs_stats(truth, targets, zcat): print('Target distributions') #- truth and targets are row-matched, so directly add columns instead of join for colname in targets.colnames: if colname not in truth.colnames: truth[colname] = targets[colname] xcat = join(zcat, truth, keys='TARGETID') for times_observed in range(1,5): print('\t Fraction (number) with exactly {} observations'.format(times_observed)) ii = (xcat['NUMOBS']==times_observed) c = Counter(xcat['LVM_TARGET'][ii]) total = np.sum(list(c.values())) for k in c: print("\t\t {}: {} ({} total)".format(desi_mask.names(k), c[k]/total, c[k])) return
def combine(sources, v): db = DBSCAN(eps=10, min_samples=2) x = np.zeros((len(sources) + len(v), 2)) x[:len(v), 0] = v['ra'] x[:len(v), 1] = v['dec'] x[len(v):, 0] = sources['xcentroid'] x[len(v):, 1] = sources['ycentroid'] db.fit(x) v['label'] = db.labels_[:len(v)] sources['label'] = db.labels_[len(v):] pv = np.where(v['label'] >= 0)[0] ps = np.where(sources['label'] >= 0)[0] comb = join(v[['ra', 'dec', 'RAJ2000', 'DEJ2000', 'label']][pv], sources[['xcentroid', 'ycentroid', 'label']][ps], keys='label') return comb
def combine_w_apass(sources, apass): db = DBSCAN(eps=5./3600, min_samples=2) x = np.zeros((len(sources)+len(apass), 2)) x[:len(sources), 0] = sources['ra'] x[:len(sources), 1] = sources['dec'] x[len(sources):, 0] = apass['RAJ2000'] x[len(sources):, 1] = apass['DEJ2000'] db.fit(x) sources['label'] = db.labels_[:len(sources)] apass['label'] = db.labels_[len(sources):] p = np.where(sources['label'] >= 0)[0] sources = sources[p] p = np.where(apass['label'] >= 0)[0] apass = apass[p] comb = join(sources, apass, keys='label') return comb
def reduce_data(date, tree='gama', subjectset='gama09', survey_id_field='provided_image_id', subjectcat='galaxy_zoo_subjects_lee.csv.gz', subjectstub=None): """Do everything to produce reduced table of ids and vote fractions""" questions, answers = parse_tree(tree) template = '{}_galaxy_zoo_{}_classifications.csv' indata = Table.read(template.format(date, subjectset), fast_reader=False) outdata = collate_classifications(indata, tree, questions, answers) outdata = recalculate_odd_total(outdata) outdata = calculate_fractions(outdata, questions, answers) if subjectstub is None: subjectstub = subjectset subjects = read_subjects(subjectcat, subjectstub, survey_id_field) outdata = join(outdata, subjects, 'subject_id') return outdata
def get_amp_flag(self): """ Generate amp flag for each fiber in the survey Parameters ---------- FiberIndex Class Returns ------- fiber_id: str unique fiber identifier string amp_flag: bool True if fiber is on a good quality amplifier """ global config print("Adding amplifier flags") t0 = time.time() badamps = Table.read(config.badamp) self.fiber_table["row_index"] = np.arange(0, len(self.fiber_table)) join_tab = join(self.fiber_table, badamps, keys=["shotid", "multiframe"], join_type="left") join_tab.rename_column("flag", "amp_flag") t1 = time.time() join_tab.sort("row_index") # quick check to make sure columns match for idx in np.random.random_integers(0, high=len( self.hdfile.root.FiberIndex), size=5000): if self.fiber_table["fiber_id"][idx] != join_tab["fiber_id"][idx]: print("Something went wrong. fiber_id columns don't match") print("Done adding amplifier flags in {:4.3} minutes".format( (t1 - t0) / 60)) return np.array(join_tab["amp_flag"], dtype=bool)
def test_batch_steal_columns_good(batch_good): """ check compiled file exists and have correct content """ b = batch_good col = 'patch_id' tab_cat = at.Table.read(fn_cat) tab_cat.rename_column('RA', 'ra') tab_cat.rename_column('DEC', 'dec') tab_cat = tab_cat[::-1] assert col in tab_cat.colnames b.steal_columns(tab=tab_cat, colnames=[col]) for lst in [ b.list, b.list_good, b.list_except, ]: assert col in lst.colnames if len(lst) > 0: tab_join = at.join(lst, tab_cat, keys=['ra', 'dec'], join_type='left') assert all(tab_join[col + '_1'] == tab_join[col + '_2']) for fn in [b.fp_list, b.fp_list_good, b.fp_list_except]: tab = at.Table.read(fn) assert col in tab.colnames b.remove_columns(colnames=[col]) for lst in [ b.list, b.list_good, b.list_except, ]: assert col not in lst.colnames for fn in [b.fp_list, b.fp_list_good, b.fp_list_except]: tab = at.Table.read(fn) assert col not in tab.colnames
def function1(): """ confronta la fwhm a 5000A con una delle misure di seeing salvate nell'header """ info = ascii.read('./info_header') seeing = ascii.read( '/media/congiu/Data/Dati/PHANGS/star_fit/reliable_pointing_ha.txt') new_table = join(info, seeing, keys='pointing') fig, ax = plt.subplots(1, 1, figsize=(10, 10)) for i, value in enumerate(new_table['pointing']): ax.errorbar(new_table['seeing3'][i], new_table['fwhm'][i], new_table['fwhm_err'][i], ls='', marker='o', label=value) fitter = fitting.LinearLSQFitter() model = models.Linear1D(1, 0) fit = fitter(model, new_table['seeing2'], new_table['fwhm']) #, weights = 1/new_table['fwhm_err']) chi2 = np.sum((new_table['fwhm'] - fit(new_table['seeing3']))**2/(fit(new_table['seeing3'])))\ /(len(new_table['seeing2'])-2) ax.set_xlim(0.6, 1.3) ax.set_ylim(0.6, 1.3) ax.plot(new_table['seeing3'], fit(new_table['seeing3']), label='fit') ax.plot([], [], ls='', label='m = {:1.2f}'.format(fit.slope[0])) ax.plot([], [], ls='', label='q = {:1.2f}'.format(fit.intercept[0])) ax.plot([], [], ls='', label='chi2 = {:1.3f}'.format(chi2)) plt.plot([0, 2], [0, 2], c='k', ls='--') ax.set_xlabel('Seeing from header (arcsec)') ax.set_ylabel('FWHM at 6500A') ax.set_title('FWHMLISOBS') plt.legend(loc='best') # plt.savefig('fwhm_seeing.png', dpi = 150) # plt.close() plt.show()
def checkarchive(logf, archivef, outfn=None): if True: bassfiles = [] with open(archivef) as basslog: for l in basslog: try: pat = '.* /data/primefocus/bass/(.*)\/(.*)\n' utdfn = re.match(pat, l).groups() except AttributeError: continue bassfiles.append(utdfn) _archive = Table(rows=bassfiles, names=('utDir', 'DTACQNAM')) archive = unique(_archive, keys='DTACQNAM') print 'archive log returned %d unique files out of %d' % \ (len(archive),len(_archive)) _log = Table.read(logf) log = unique(_log, keys='DTACQNAM') print 'NERSC log returned %d unique files out of %d' % (len(log), len(_log)) # join the tables based on the unique (?!) file handle t = join(archive, log, keys='DTACQNAM', join_type='outer') if outfn: outf = open(outfn, 'w') else: outf = sys.stdout if True: is2017 = [str(utd).startswith('2017') for utd in t['utDir_1']] initmask = np.array(is2017) # files in archive but not on NERSC not_on_nersc = t['frameIndex'].mask # files on NERSC but not archive (???) not_in_archive_log = t['utDir_1'].mask # summary info about missing files utds = np.unique(t['utDir_1'][initmask & not_on_nersc]) utds = np.array(utds) for utd in utds: if 'bad' in utd: continue # files moved manually ii = np.where(t['utDir_1'] == utd)[0] missing = not_on_nersc[ii] print '%s missing %d/%d files' % (utd, missing.sum(), len(ii)) for i in ii[missing]: outf.write('%s %s\n' % (t['utDir_1'][i], t['DTACQNAM'][i])) if outfn: outf.close()
def zcheck(line, sample, k): ''' param: line used for clustering: c3, c4, mg2 sample: main, mixed, or bal k: number of clusters: 3,4,5,6 ''' if sample == "main": s= "" elif sample == "mixed": s= "mixed_" else: s= "bal_" t= Table.read("sample_"+s+"myflags.fits") # open table with catalog data c= Table.read("./clusters/"+line+"_"+str(k)+"clstrs_"+sample+".fits") # table with clustering results from a combination of K and line tt= join(t, c, keys= "SDSS_NAME") # join tables to have both clustering results and data from catalog # now calculate and print Anderson-Darling test for the redshift estimates using CIII], MgII, CIV and PCA. PCA is the one I used to shift spectra to restframe. f= open("z_match.txt", 'wr') f.write("Clstr"+'\t'+ "Num" + '\t'+ "Z_MgII"+ '\t' + "sig"+ '\t'+ \ "Z_CIII"+ '\t' + "sig"+ '\t' +"Z_CIV"+ '\t' + "sig" + '\n') for l in range(k): #ss = stats.anderson_ksamp([tt['Z_PCA'][tt['label']==l], tt['Z_MGII'][tt['label'] ==l], \ #tt['Z_CIII'][tt['label'] ==l], tt['Z_CIV'][tt['label'] ==l]]) s_mg= stats.ks_2samp(tt['Z_PCA'][tt['label']==l], tt['Z_MGII'][tt['label'] ==l]) s_c3= stats.ks_2samp(tt['Z_PCA'][tt['label']==l], tt['Z_CIII'][tt['label'] ==l]) s_c4= stats.ks_2samp(tt['Z_PCA'][tt['label']==l], tt['Z_CIV'][tt['label'] ==l]) f.write('\t' + '&' +str(len(tt[tt['label'] == l]))+ \ '\t'+ '&'+'{:5.3f}'.format(s_mg[0])+ '\t' +'&'+'{:5.3f}'.format(s_mg[1])+ \ '\t'+ '&'+'{:5.3f}'.format(s_c3[0])+ '\t' +'&'+'{:5.3f}'.format(s_c3[1])+ \ '\t'+ '&'+'{:5.3f}'.format(s_c4[0])+'\t'+ '&'+'{:5.3f}'.format(s_c4[1])+'\n')
def process_data() -> Table: """Processes the missing data values.""" data = merge_all() data = data[np.logical_not(data['dist_use'].mask)] data = data[np.isin(data['HIP'], EXCLUSIONS, invert=True)] estimate_magnitudes(data) data = parse_spectra(data) unknown_spectra = data[data['CelSpec'] == CEL_UNKNOWN_STAR]['HIP', 'teff_val', 'B-V', 'e_B-V', 'V-I', 'e_V-I', 'V-K', 'e_V-K', 'J-K', 'e_J-K', 'H-K', 'e_H-K'] unknown_spectra = estimate_spectra(unknown_spectra) data = join(data, unknown_spectra['HIP', 'CelSpec'], keys=['HIP'], join_type='left', table_names=['data', 'est']) data['CelSpec'] = np.where(data['CelSpec_data'] == CEL_UNKNOWN_STAR, data['CelSpec_est'].filled(CEL_UNKNOWN_STAR), data['CelSpec_data']) data.remove_columns(['phot_g_mean_mag', 'bp_rp', 'teff_val', 'SpType', 'B-V', 'e_B-V', 'V-I', 'e_V-I', 'V-K', 'e_V-K', 'J-K', 'e_J-K', 'H-K', 'e_H-K', 'CelSpec_est', 'CelSpec_data']) data['Vmag_abs'] = data['Vmag'] - 5*(np.log10(data['dist_use'])-1) print('Converting coordinates to ecliptic frame') data['ra'].convert_unit_to(u.rad) data['dec'].convert_unit_to(u.rad) data['dist_use'].convert_unit_to(u.lyr) coords = np.matmul(ROT_MATRIX, np.array([data['dist_use']*np.cos(data['ra'])*np.cos(data['dec']), data['dist_use']*np.sin(data['dec']), -data['dist_use']*np.sin(data['ra'])*np.cos(data['dec'])])) data['x'] = coords[0] data['y'] = coords[1] data['z'] = coords[2] data['x'].unit = u.lyr data['y'].unit = u.lyr data['z'].unit = u.lyr return data
def add_magnitude(lcdata, metadata): """ **add_magnitude** Calculates the actual magnitude of the object, as it´s perceived brightness if it was at 10pc. math: m = -2.5 log10f - d where: m = magnitude f = flux measure d = distance modulus @author: luisarribas """ print("") print("COMPUTING MAGNITUDES") print("====================") print("") print("Calculating object´s magnitude as it´s actual") print("brightness from a standard distance of 10 pc") print("") #Normalize data over 0 summing the min value minflux = np.min(lcdata['flux']) lcdata['flux'] = lcdata['flux'] - minflux d = lcdata d = d['object_id', 'passband', 'flux'] d = d[d['passband'] == 3] d = d.group_by(['object_id', 'passband']) d = d['object_id', 'flux'].groups.aggregate(max) metadata = join(metadata, d, keys='object_id') metadata['magnitude'] = 0 for row in metadata: row['magnitude'] = (-2.5) * np.math.log10(row['flux']) - row['distmod'] print("") print("OK! ") print(" ") return metadata