def sift_catalogue(cat, tolerance, iterations): # sift catalogue several times to account for matches in up to 4 images print(' There are initially {0} sources...'.format(len(cat))) cat_original = Table(cat) # make copy for reference # iterate since there could be more than one duplicate match per source. square fields = up to 4 duplicate matches from overlaps? for i in range(iterations): n_init = len(cat) print(' Iteration {0}...'.format(i)) # Generate list of coordinates from catalogue c = SkyCoord(cat['RA'], cat['DEC'], unit=(u.deg, u.deg)) # Determining the nearest neighbour of each source idx, sep2d, dist3d = c.match_to_catalog_sky( c, 2) # nthneighborint=2 since we are matching a cat to itself # Identifying sources closer than the tolerance threshold, which are not from the same mosaic inds = np.nonzero( (sep2d < tolerance * u.deg))[0] # indices where separation is less than tolerance cat.remove_rows(idx[inds]) # sifted catalogue, removing duplicates print( ' Removed {0} sources this iteration, leaving {1} sources'.format( n_init - len(cat), len(cat))) cat_deleted = setdiff(cat_original, cat) # see deleted sources print(' Removed {0} sources in total, leaving {1} in the catalogue'.format( len(cat_deleted), len(cat))) return cat, cat_deleted
def ghostbuster(all_sources, clean_sources, **kwargs): """ Ghostbuster finds stars duplicated by savart plate """ logging.info('Looking for ghosts started..') chosen_stars = clean_sources.copy( ) #creates empty astropy.table object, where we can store our chosen stars coordinates_chart = Table(names=('ID', 'real_xcentroid', 'real_ycentroid', 'ghost_xcentroid', 'ghost_ycentroid'), dtype=('i4', 'f8', 'f8', 'f8', 'f8')) ghost_sources = setdiff(all_sources, clean_sources, keys=['xcentroid', 'ycentroid']) i = 0 for star in chosen_stars: """ using ghostfinder to find a star that has te closest values of x and y distances """ real, ghost = ghostfinder(star, ghost_sources, **kwargs) if ghost == 'no_match': pass else: coordinates_chart.add_row([ int(i), real['xcentroid'], real['ycentroid'], ghost['xcentroid'], ghost['ycentroid'] ]) i += 1 logging.info('Found %s ghosts', str(len(coordinates_chart))) logging.info('Finished') return (coordinates_chart)
def test_join_multiple(): data = [HTable({"a": x, "b": x}) for x in [[1, 2, 3], [8, 10, 13], [435, 13, 3.5]]] multiple_joined = join_multiple(*data, join_type="outer") single_joined = join( join(data[0], data[1], join_type="outer"), data[2], join_type="outer" ) assert len(setdiff(multiple_joined, single_joined)) == 0
def query_gaiadr3_names_from_dr2(input_table): gaiadr3_query_string = "SELECT * FROM gaiaedr3.dr2_neighbourhood " \ "INNER JOIN tap_upload.upload_table ON " \ "gaiaedr3.dr2_neighbourhood.dr2_source_id = tap_upload.upload_table.source_id" job_gaiadr3_query = Gaia.launch_job(gaiadr3_query_string, upload_resource=input_table, upload_table_name="upload_table", verbose=VERBOSE) gaiadr3_names = job_gaiadr3_query.get_results() print("length of eDR3 names table: ", len(gaiadr3_names)) # Find DR2 sources with only one EDR3 entry gaiadr3_unique = table.unique(gaiadr3_names, keys='dr2_source_id', keep='none') print("Number of unique sources: ", len(gaiadr3_unique)) # Find the DR2 sources with multiple EDR3 matches dr3_dupes = setdiff(gaiadr3_names, gaiadr3_unique, keys='dr2_source_id') dr3_dupes_grouped = dr3_dupes.group_by('dr2_source_id') # Find the best EDR3 match for DR2 sources with multiple matches for group in dr3_dupes_grouped.groups: # Find the EDR3 matches with the smallest magnitude difference (if one exists) and angular distance. if not np.ma.is_masked(min(abs(group['magnitude_difference']))): min_mag_index = abs(group['magnitude_difference']).tolist().index( min(abs(group['magnitude_difference']))) min_angdist_index = group['angular_distance'].tolist().index( min(group['angular_distance'])) if min_angdist_index == min_mag_index: # Source with smallest mag difference is the same as the closest source verboseprint( 'best one found \n', group['dr2_source_id', 'dr3_source_id'][min_angdist_index], '\n') gaiadr3_unique.add_row(group[min_mag_index]) elif np.ma.is_masked(min(abs(group['magnitude_difference']))): # Magnitude differences aren't available for all sources so just using closest one verboseprint( 'best one found just with angular distance \n', group['dr2_source_id', 'dr3_source_id'][min_angdist_index], '\n') gaiadr3_unique.add_row(group[min_mag_index]) else: verboseprint( 'no choice for \n', group['dr2_source_id', 'dr3_source_id', 'magnitude_difference', 'angular_distance'], '\n') verboseprint("Number of unique sources after dupe fix:", len(gaiadr3_unique)) return gaiadr3_unique
def test_nircam_tsimage_stage3_phot(run_pipelines): rtdata = run_pipelines rtdata.input = "jw00312-o006_20191225t115310_tso3_001_asn.json" rtdata.output = "jw00312-o006_t001_nircam_f210m-clear-sub64p_phot.ecsv" rtdata.get_truth("truth/test_nircam_tsimg_stage23/jw00312-o006_t001_nircam_f210m-clear-sub64p_phot.ecsv") table = Table.read(rtdata.output) table_truth = Table.read(rtdata.truth) # setdiff returns a table of length zero if there is no difference assert len(setdiff(table, table_truth)) == 0
def test_nircam_tsgrism_stage3_whtlt(run_pipelines): rtdata = run_pipelines rtdata.input = "jw00721-o012_20191119t043909_tso3_001_asn.json" rtdata.output = "jw00721-o012_t004_nircam_f444w-grismr-subgrism256_whtlt.ecsv" rtdata.get_truth( "truth/test_nircam_tsgrism_stages/jw00721-o012_t004_nircam_f444w-grismr-subgrism256_whtlt.ecsv" ) table = Table.read(rtdata.output) table_truth = Table.read(rtdata.truth) # setdiff returns a table of length zero if there is no difference assert len(setdiff(table, table_truth)) == 0
def test_niriss_soss_stage3_whtlt(run_pipelines): rtdata = run_pipelines rtdata.input = "jw00625-o023_20191210t204036_tso3_001_asn.json" rtdata.output = "jw00625-o023_t001_niriss_clear-gr700xd-substrip256_whtlt.ecsv" rtdata.get_truth( "truth/test_niriss_soss_stages/jw00625-o023_t001_niriss_clear-gr700xd-substrip256_whtlt.ecsv" ) table = Table.read(rtdata.output) table_truth = Table.read(rtdata.truth) # setdiff returns a table of length zero if there is no difference assert len(setdiff(table, table_truth)) == 0
def test_fill(): tab = make_sample_table() tab["b"] = tab["b"].astype("float64") tab = tab.populate([i / 4 for i in range(4, 16)]) tab_filled = tab.fill("distributed_convex", (2, 4)) assert list(tab_filled.loc[3:4]["b"]) == [12, 15, 18, 21, 25] tab_filled = tab.fill("distributed_concave", (2, 4)) assert list(tab_filled.loc[3:4]["b"]) == [12, 16, 19, 22, 25] tab_unmasked = tab.filled(50) assert ( len(setdiff(tab_unmasked, tab.fill("distributed_convex", (4, 5)).filled(50))) == 0 ) assert ( len( setdiff(tab_unmasked, tab.fill("distributed_convex", (3.5, 3.5)).filled(50)) ) == 0 ) def fill_50(df: pd.DataFrame): return df.fillna(50) assert len(setdiff(tab_unmasked, tab.fill(fill_50))) == 0 with pytest.raises(ValueError) as err: tab.fill("distributed_convex", (2.5, 4)) assert str(err.value) == "First and last rows must not be masked" with pytest.raises(ValueError) as err: tab.fill("not a method", (2, 4)) assert str(err.value) == "Incorrect fill method"
def append_gaia(self, gaia_tables): """ Append data from Gaia query result tables. Parameters: ----------- gaia_tables : list List of Astropy tables with Gaia query results """ if self.log is not None: self.log.write('Appending data from Gaia query result tables', level=3, event=43) assert isinstance(gaia_tables, list) # Loop through Gaia files for gaia_table in gaia_tables: #gaia_table = Table.read(gaia_file) # Replace column names with generic names gaia_table.rename_column('phot_g_mean_mag', 'mag') gaia_table.rename_column('phot_bp_mean_mag', 'mag1') gaia_table.rename_column('phot_rp_mean_mag', 'mag2') gaia_table.rename_column('bp_rp', 'color_index') # Mask nan values in listed columns for col in ['mag1', 'mag2', 'color_index']: gaia_table[col] = MaskedColumn(gaia_table[col], mask=np.isnan(gaia_table[col])) # If catalog is empty, copy all data from Gaia table if len(self) == 0: self.columns = gaia_table.columns elif len(gaia_table) == 0: if self.log is not None: self.log.write('Gaia query result table is empty!', level=4, event=43) else: # Find rows in the Gaia table that we do not have yet d = setdiff(gaia_table, self, keys=['source_id']) # If there are new sources, add them to the catalog if len(d) > 0: self.columns = vstack([self, d]).columns
def test_miri_image_stage3_catalog(run_pipelines): rtdata = run_pipelines rtdata.input = "det_dithered_5stars_image3_asn.json" rtdata.output = "det_dithered_5stars_f770w_cat.ecsv" rtdata.get_truth( "truth/test_miri_image_stages/det_dithered_5stars_f770w_cat.ecsv") t = Table.read(rtdata.output) tt = Table.read(rtdata.truth) # Compare the first 3 columns only, as the RA/DEC columns cannot be sorted # and thus setdiff cannot work on the whole table table = Table([t[col] for col in ['id', 'xcentroid', 'ycentroid']]) table_truth = Table([tt[col] for col in ['id', 'xcentroid', 'ycentroid']]) # setdiff returns a table of length zero if there is no difference assert len(setdiff(table, table_truth)) == 0
def test_nircam_image_stage3_catalog(run_pipelines): rtdata = run_pipelines rtdata.input = "jw42424-o002_20191220t214154_image3_001_asn.json" rtdata.output = "jw42424-o002_t001_nircam_clear-f444w_cat.ecsv" rtdata.get_truth( "truth/test_nircam_image_stages/jw42424-o002_t001_nircam_clear-f444w_cat.ecsv" ) t = Table.read(rtdata.output) tt = Table.read(rtdata.truth) # Compare the first 3 columns only, as the RA/DEC columns cannot be sorted # and thus setdiff cannot work on the whole table table = Table([t[col] for col in ['id', 'xcentroid', 'ycentroid']]) table_truth = Table([tt[col] for col in ['id', 'xcentroid', 'ycentroid']]) # setdiff returns a table of length zero if there is no difference assert len(setdiff(table, table_truth)) == 0
def test_1_file_preprocess(self): """Take a galaxy data file and return a data table, compute the redshift range in comoving coordinates, and generate output filename. """ f_galaxy_table, f_dist_limits, f_out1_filename, f_out2_filename = \ file_preprocess(self.galaxies_filename, '', '', dist_metric='redshift') # Check the galaxy table self.assertEqual(len(setdiff(f_galaxy_table, self.galaxies_shuffled)), 0) # Check the distance limits TestVoidFinder.dist_limits = np.zeros(2) TestVoidFinder.dist_limits[1] = c*self.redshift_range[-1]/100. self.assertTrue(np.isclose(f_dist_limits, TestVoidFinder.dist_limits).all()) # Check the first output file name self.assertEqual(f_out1_filename, 'test_galaxies_redshift_maximal.txt') # Check the second output file name self.assertEqual(f_out2_filename, 'test_galaxies_redshift_holes.txt')
def remove_by_id(self, ids): """ Returns a new ``Catalogue`` with `ids` sources removed Parameters ---------- ids : ``list`` or ``Column`` List of ids to be selected. """ catids = Table() catids['ID'] = self.ids catids['IDX'] = range(len(self.ids)) rmids = Table() rmids['ID'] = ids rmids['newIDX'] = range(len(ids)) rmcat_ids = setdiff(catids, rmids, keys='ID') rmcat_ids.sort('IDX') return self.select_by_id(rmcat_ids['ID'])
# power, use Total_Flux_2, since we know it's a component source corr_table['power'] = power(corr_table['z_best'], corr_table['Total_flux_2']) # power_thesis, use Total_Flux_1 + new_NN_Flux_1 corr_table['power_thesis'] = power( corr_table['z_best'], corr_table['Total_flux_1'] + corr_table['new_NN_Total_flux']) VA_bs_NNmatch = vstack([VA_bs_MG, corr_table]) print('Number of correct component matches (+MG):', len(VA_bs_NNmatch)) # get the previous Table + the 'Not round' 'wrong' NN sources # not round is defined as: (Maj-1)>Min # incorr_table = Table() # incorr_table['Source_Name_1'] = incorr_sourcenames NN_wrongcomp = setdiff( VA_bs_NN, corr_table, keys='Source_Name_1' ) # Get all wrong NN sources and save only the 1st component indx_notround = np.where( (NN_wrongcomp['Maj_1'] - 1) > NN_wrongcomp['Min_1']) indx_round = np.where((NN_wrongcomp['Maj_1'] - 1) <= NN_wrongcomp['Min_1']) print( 'Number of wrong component NN sources, but still useful because not round:', len(indx_notround[0])) notround_sources = NN_wrongcomp[indx_notround] # not round sources: use PA_1 notround_sources['final_PA'] = notround_sources['PA_1'] # not round sources size: use Maj_1, times two for semi-major axis notround_sources['size'] = notround_sources['Maj_1'] * 2 # not round sources size thesis, dont care about wrong match, just use NN dist notround_sources[ 'size_thesis'] = notround_sources['new_NN_distance(arcmin)'] * 60
def merge_cats(folder_dict, opt_survey='pstarrs', opt_label='PS', nir_survey='2MASS', nir_label='NTM'): catdir = xm_folders(folder_dict['xmatch'], opt_survey, nir_survey) #optid = '{}objID'.format(opt_label) optid = '{}objid'.format(opt_label) nirid = '{}objID'.format(nir_label) otag = opt_survey[0].upper() xo_cat = Table.read('{}.fits'.format(catdir['2cat'])) xow_cat = Table.read('{}.fits'.format(catdir['3catmir'])) xon_cat = Table.read('{}.fits'.format(catdir['3catnir'])) xown_cat = Table.read('{}.fits'.format(catdir['4cat'])) # Sources in XOW but not in XOWN XOW_notXOWN = setdiff(xow_cat, xown_cat, keys=['XMMSRCID', optid, 'WSID']) XOW_notXOWN.rename_column('chi2Pos', 'chi2Pos_X{}W'.format(otag)) # Sources in XON but not in XOWN XON_notXOWN = setdiff(xon_cat, xown_cat, keys=['XMMSRCID', optid, nirid]) XON_notXOWN.rename_column('chi2Pos', 'chi2Pos_X{}N'.format(otag)) # Add 3-cat probabilities to common sources between XOWT and XOW xow_cat_temp = xow_cat[[ 'chi2Pos', 'proba_X{}W'.format(otag), 'XMMSRCID', optid, 'WSID' ]] xow_cat_temp.rename_column('chi2Pos', 'chi2Pos_X{}W'.format(otag)) xown_cat.remove_column('proba_X{}W'.format(otag)) xown_cat.rename_column('chi2Pos', 'chi2Pos_X{}WN'.format(otag)) XOWN_probaXOW = join(xown_cat, xow_cat_temp, join_type='left', keys=['XMMSRCID', optid, 'WSID']) # Add 3-cat probabilities to common sources between XOWT and XON xon_cat_temp = xon_cat[[ 'chi2Pos', 'proba_X{}N'.format(otag), 'XMMSRCID', optid, nirid ]] xon_cat_temp.rename_column('chi2Pos', 'chi2Pos_X{}N'.format(otag)) XOWN_probaXOW.remove_column('proba_X{}N'.format(otag)) XOWN_probaXOW_probaXON = join(XOWN_probaXOW, xon_cat_temp, join_type='left', keys=['XMMSRCID', optid, nirid]) # Concat tables XOW_notXOWN.keep_columns([ 'posRA', 'posDec', 'ePosA', 'ePosB', 'ePosPA', 'chi2Pos_X{}W'.format(otag), 'proba_X{}W'.format(otag), 'nPos', 'XMMSRCID', optid, 'WSID' ]) XON_notXOWN.keep_columns([ 'posRA', 'posDec', 'ePosA', 'ePosB', 'ePosPA', 'chi2Pos_X{}N'.format(otag), 'proba_X{}N'.format(otag), 'nPos', 'XMMSRCID', optid, nirid ]) XOWN_probaXOW_probaXON.keep_columns([ 'posRA', 'posDec', 'ePosA', 'ePosB', 'ePosPA', 'chi2Pos_X{}W'.format(otag), 'proba_X{}W'.format(otag), 'chi2Pos_X{}N'.format(otag), 'proba_X{}N'.format(otag), 'chi2Pos_X{}WN'.format(otag), 'proba_X{}WN'.format(otag), 'nPos', 'XMMSRCID', optid, 'WSID', nirid ]) XOWN_XOW_XON = vstack([XOWN_probaXOW_probaXON, XOW_notXOWN, XON_notXOWN]) # Sources in XO but not XOWN_XOW_XON XO_notXOWN_XOW_XON = setdiff(xo_cat, XOWN_XOW_XON, keys=['XMMSRCID', optid]) XO_notXOWN_XOW_XON.rename_column('chi2Pos', 'chi2Pos_X{}'.format(otag)) # Add 2-cat probabilities to common sources between XO and XOWN_XOW_XON xo_cat_temp = xo_cat[[ 'chi2Pos', 'proba_X{}'.format(otag), 'XMMSRCID', optid ]] xo_cat_temp.rename_column('chi2Pos', 'chi2Pos_X{}'.format(otag)) XOWN_XOW_XON_probaXO = join(XOWN_XOW_XON, xo_cat_temp, join_type='left', keys=['XMMSRCID', optid]) # Concat tables XO_notXOWN_XOW_XON.keep_columns([ 'posRA', 'posDec', 'ePosA', 'ePosB', 'ePosPA', 'chi2Pos_X{}'.format(otag), 'proba_X{}'.format(otag), 'nPos', 'XMMSRCID', optid ]) merged_cat = vstack([XOWN_XOW_XON_probaXO, XO_notXOWN_XOW_XON]) return merged_cat
tmass_phot_file_string = 'scripts/ingests/2MASS/2MASS_data_' + DATE_SUFFIX + '.xml' # tmass_phot.write(tmass_phot_file_string, format='votable') # read results from saved table tmass_phot = Table.read(tmass_phot_file_string, format='votable') tmass_phot_unique = unique(tmass_phot, keys='TYPED_ID', keep='first') # add 2MASS designations to Names table as needed # Find difference between all 2MASS and Names Table tmass_desig_in_db = db.query(db.Names).filter( db.Names.c.other_name.in_(tmass_designations['designation'])).table() # find sources in tmass_designations not in tmass tmass_desig_in_db['designation'] = tmass_desig_in_db['other_name'] tmass_not_in_db = setdiff(tmass_designations, tmass_desig_in_db, keys=['designation']) add_names(db, sources=tmass_not_in_db['db_names'], other_names=tmass_not_in_db['designation']) # There was a problem with 2MASS J12475047-0152142. \t in source name. Modified JSON directly. # ADD J band photometry unmasked_J_phot = np.logical_not(tmass_phot_unique['FLUX_J'].mask).nonzero() tmass_J_phot = tmass_phot_unique[unmasked_J_phot]['TYPED_ID', 'FLUX_J', 'FLUX_ERROR_J'] ingest_photometry(db, tmass_J_phot['TYPED_ID'], '2MASS.J', tmass_J_phot['FLUX_J'],
def xmatch_and_merge_cats(tab1: Table, tab2: Table, tol: units.Quantity = 1 * units.arcsec, table_names: tuple = ('1', '2'), **kwargs) -> Table: """ Given two source catalogs, cross-match and merge them. This function ensures there is a unique match between tables as opposed to the default join_skycoord behavior which matches multiple objects on the right table to a source on the left. The two tables must contain the columns 'ra' and 'dec' (case-sensitive). Args: tab1, tab2 (Table): Photometry catalogs. Must contain columns named ra and dec. tol (Quantity[Angle], optional): Maximum separation for cross-matching. table_names (tuple of str, optional): Names of the two tables for naming unique columns in the merged table. kwargs: Additional keyword arguments to be passed onto xmatch_catalogs Returns: merged_table (Table): Merged catalog. """ if table_names is not None: assert len( table_names) == 2, "Invalid number of table names for two tables." assert (type(table_names[0]) == str) & (type( table_names[1]) == str), "Table names should be strings." assert np.all(np.isin( ['ra', 'dec'], tab1.colnames)), "Table 1 doesn't have column 'ra' and/or 'dec'." assert np.all(np.isin( ['ra', 'dec'], tab2.colnames)), "Table 2 doesn't have column 'ra' and/or 'dec'." # Cross-match tables for tab1 INTERSECTION tab2. matched_tab1, matched_tab2 = xmatch_catalogs(tab1, tab2, tol, **kwargs) # tab1 INTERSECTION tab2 inner_join = hstack([matched_tab1, matched_tab2], table_names=table_names) # Remove unnecessary ra/dec columns and rename remaining coordinate # columns corectly. tab1_coord_cols = ['ra_' + table_names[0], "dec_" + table_names[0]] tab2_coord_cols = ['ra_' + table_names[1], "dec_" + table_names[1]] inner_join.remove_columns(tab2_coord_cols) inner_join.rename_columns(tab1_coord_cols, ['ra', 'dec']) # Now get all objects that weren't matched. not_matched_tab1 = setdiff(tab1, matched_tab1) not_matched_tab2 = setdiff(tab2, matched_tab2) # (tab1 UNION tab2) - (tab1 INTERSECTION tab2) # Are there unmatched entries in both tables? if (len(not_matched_tab1) != 0) & (len(not_matched_tab2) != 0): outer_join = join(not_matched_tab1, not_matched_tab2, keys=['ra', 'dec'], join_type='outer', table_names=table_names) merged = vstack([inner_join, outer_join]).filled(-999.) # Only table 1 has unmatched entries? elif (len(not_matched_tab1) != 0) & (len(not_matched_tab2) == 0): merged = vstack([inner_join, not_matched_tab1]) # Only table 2? elif (len(not_matched_tab1) == 0) & (len(not_matched_tab2) != 0): merged = vstack([inner_join, not_matched_tab2]) # Neither? else: merged = inner_join # Final cleanup. Just in case. weird_cols = np.isin(['ra_1', 'dec_1', 'ra_2', 'dec_2'], merged.colnames) if np.any(weird_cols): merged.remove_columns( np.array(['ra_1', 'dec_1', 'ra_2', 'dec_2'])[weird_cols]) # Fill and return. return merged.filled(-999.) '''
metadata = Table.read(metafilename, format='csv') lcdata = Table.read(lcfilename, format='csv') print("") print("OK! ") print(" ") nobjects = len(metadata) nosamples = len(lcdata) #CLEAN METADATA NAN VALUES metadata = pu.nan_to_zeros(metadata) #SET MODEL TEST SET test_metadata = metadata[0:2000] test_lcdata = join(lcdata, test_metadata, join_type='right', keys='object_id') test_lcdata = test_lcdata[list(lcdata.columns)] lcdata = setdiff(lcdata, test_lcdata, keys=list(lcdata.columns)) metadata = metadata[2000:nobjects] nobjects = len(metadata) #SPLIT METADATA BY Z metadata = ps.z_split(metadata) #NOISE ELIMINATION if flux_noise_elimination == True: lcdata = pu.rem_noise(lcdata, lcdata_nr_file) #NOISE REDUCTION if bayes_flux_noise_reduction == True: lcdata = pu.bayes_noise_reduction(lcdata) #ADD MAGNITUDE