def test_structured_masked_column_roundtrip(): mc = table.MaskedColumn([(1., 2.), (3., 4.)], mask=[(False, False), (False, False)], dtype='f8,f8') assert len(mc.dtype.fields) == 2 mc2 = table.MaskedColumn(mc) assert_array_equal(mc2, mc)
def test_string_truncation_warning_masked(): """ Test warnings associated with in-place assignment to a string to a masked column, specifically where the right hand side contains np.ma.masked. """ # Test for strings, but also cover assignment of np.ma.masked to # int and float masked column setting. This was previously only # covered in an unrelated io.ascii test (test_line_endings) which # showed an unexpected difference between handling of str and numeric # masked arrays. for values in (['a', 'b'], [1, 2], [1.0, 2.0]): mc = table.MaskedColumn(values) with catch_warnings() as w: mc[1] = np.ma.masked assert len(w) == 0 assert np.all(mc.mask == [False, True]) mc[:] = np.ma.masked assert len(w) == 0 assert np.all(mc.mask == [True, True]) mc = table.MaskedColumn(['aa', 'bb']) with catch_warnings() as w: mc[:] = [np.ma.masked, 'ggg'] # replace item with string that gets truncated assert mc[1] == 'gg' assert np.all(mc.mask == [True, False]) assert len(w) == 1 assert ('truncated right side string(s) longer than 2 character(s)' in str(w[0].message))
def __fill_catalogue(self, table, cattable, colname, n, colnumlist=None): if colnumlist is None: colnum = input( "Please enter the column number of %s (if this column isn't necessary, please leave blank): " % colname) else: if type(colnumlist[n]) != str: colnum = str(colnumlist[n]) else: colnum = colnumlist[n] if len(colnum) == 0: newcol = asttab.MaskedColumn(name=colname, data=np.zeros(len(table)), mask=[True] * len(table)) table.add_column(newcol) return table else: if colnum.isalnum(): m = int(eval(colnum)) cattablecolnames = cattable.colnames newcol = asttab.MaskedColumn( data=cattable[cattablecolnames[m]].data, name=colname, mask=[False] * len(table)) table.add_column(newcol) return table else: print( "There was a problem understanding the column number, please try again." ) return self.__fill_catalogue(table, colname, n, colnum)
def internal_coordmatch(d, racol, deccol, conrad, verbose=False): """ Perform an internal crossmatch by sky coordinates on an astropy table d, whereas the column names for the coordinates have to be provided as well as the cone radius (conrad) in arcsec. Match rows are marked by a "groupID" column of groupsize equal to the possible matches (not involved members). """ ntot = len(d) coords = coordinates.SkyCoord(ra=d[racol] * u.degree, dec=d[deccol] * u.degree) idxr, idxl, sep, _ = coords.search_around_sky(coords, conrad * u.arcsec) sep = sep.value * 3600 groupids, _, _, _ = _identify_groups(idxl, idxr, ntot, ntot, verbose=verbose) if "groupID" not in d.colnames: d.add_column( T.MaskedColumn(np.ma.zeros(ntot, dtype=int), name="groupID")) else: print( "INTERNAL_COORDMATCH: WARNING: column 'groupID' already present. Will overwrite..." ) if "groupsize" not in d.colnames: d.add_column( T.MaskedColumn(np.ma.zeros(ntot, dtype=int), name="groupsize")) else: print( "INTERNAL_COORDMATCH: WARNING: column 'groupsize' already present. Will overwrite..." ) d["groupID"][:] = np.ma.masked d["groupsize"][:] = np.ma.masked d["groupID"][idxl] = groupids[idxl] # --- the group size needs to be calculated a new because the one returned from # the identify_groups routine is not applicable for an internal match ngroups = np.ma.max(d["groupID"]) for i in range(ngroups): ids = np.where(d["groupID"] == i + 1)[0] d["groupsize"][ids] = len(ids) if verbose: print("INTERNAL_COORDMATCH: Real largest groupsize: ", np.ma.max(d["groupsize"]))
def test_getitem_metadata_regression(): """ Regression test for #1471: MaskedArray does not call __array_finalize__ so the meta-data was not getting copied over. By overloading _update_from we are able to work around this bug. """ # Make sure that meta-data gets propagated with __getitem__ c = table.Column(data=[1, 2], name='a', description='b', unit='m', format="%i", meta={'c': 8}) assert c[1:2].name == 'a' assert c[1:2].description == 'b' assert c[1:2].unit == 'm' assert c[1:2].format == '%i' assert c[1:2].meta['c'] == 8 c = table.MaskedColumn(data=[1, 2], name='a', description='b', unit='m', format="%i", meta={'c': 8}) assert c[1:2].name == 'a' assert c[1:2].description == 'b' assert c[1:2].unit == 'm' assert c[1:2].format == '%i' assert c[1:2].meta['c'] == 8 # As above, but with take() - check the method and the function c = table.Column(data=[1, 2, 3], name='a', description='b', unit='m', format="%i", meta={'c': 8}) for subset in [c.take([0, 1]), np.take(c, [0, 1])]: assert subset.name == 'a' assert subset.description == 'b' assert subset.unit == 'm' assert subset.format == '%i' assert subset.meta['c'] == 8 # Metadata isn't copied for scalar values for subset in [c.take(0), np.take(c, 0)]: assert subset == 1 assert subset.shape == () assert not isinstance(subset, table.Column) c = table.MaskedColumn(data=[1, 2, 3], name='a', description='b', unit='m', format="%i", meta={'c': 8}) for subset in [c.take([0, 1]), np.take(c, [0, 1])]: assert subset.name == 'a' assert subset.description == 'b' assert subset.unit == 'm' assert subset.format == '%i' assert subset.meta['c'] == 8 # Metadata isn't copied for scalar values for subset in [c.take(0), np.take(c, 0)]: assert subset == 1 assert subset.shape == () assert not isinstance(subset, table.MaskedColumn)
def test_info_serialize_method(): """ Unit test of context manager to set info.serialize_method. Normally just used to set this for writing a Table to file (FITS, ECSV, HDF5). """ t = table.Table({ 'tm': time.Time([1, 2], format='cxcsec'), 'sc': coordinates.SkyCoord([1, 2], [1, 2], unit='deg'), 'mc': table.MaskedColumn([1, 2], mask=[True, False]), 'mc2': table.MaskedColumn([1, 2], mask=[True, False]) }) origs = {} for name in ('tm', 'mc', 'mc2'): origs[name] = deepcopy(t[name].info.serialize_method) # Test setting by name and getting back to originals with serialize_method_as(t, {'tm': 'test_tm', 'mc': 'test_mc'}): for name in ('tm', 'mc'): assert all(t[name].info.serialize_method[key] == 'test_' + name for key in t[name].info.serialize_method) assert t['mc2'].info.serialize_method == origs['mc2'] assert not hasattr(t['sc'].info, 'serialize_method') for name in ('tm', 'mc', 'mc2'): assert t[name].info.serialize_method == origs[name] # dict compare assert not hasattr(t['sc'].info, 'serialize_method') # Test setting by name and class, where name takes precedence. Also # test that it works for subclasses. with serialize_method_as(t, { 'tm': 'test_tm', 'mc': 'test_mc', table.Column: 'test_mc2' }): for name in ('tm', 'mc', 'mc2'): assert all(t[name].info.serialize_method[key] == 'test_' + name for key in t[name].info.serialize_method) assert not hasattr(t['sc'].info, 'serialize_method') for name in ('tm', 'mc', 'mc2'): assert t[name].info.serialize_method == origs[name] # dict compare assert not hasattr(t['sc'].info, 'serialize_method') # Test supplying a single string that all applies to all columns with # a serialize_method. with serialize_method_as(t, 'test'): for name in ('tm', 'mc', 'mc2'): assert all(t[name].info.serialize_method[key] == 'test' for key in t[name].info.serialize_method) assert not hasattr(t['sc'].info, 'serialize_method') for name in ('tm', 'mc', 'mc2'): assert t[name].info.serialize_method == origs[name] # dict compare assert not hasattr(t['sc'].info, 'serialize_method')
def test_masked_column_serialize_method_propagation(): mc = table.MaskedColumn([1., 2., 3.], mask=[True, False, True]) assert mc.info.serialize_method['ecsv'] == 'null_value' mc.info.serialize_method['ecsv'] = 'data_mask' assert mc.info.serialize_method['ecsv'] == 'data_mask' mc2 = mc.copy() assert mc2.info.serialize_method['ecsv'] == 'data_mask' mc3 = table.MaskedColumn(mc) assert mc3.info.serialize_method['ecsv'] == 'data_mask' mc4 = mc.view(table.MaskedColumn) assert mc4.info.serialize_method['ecsv'] == 'data_mask' mc5 = mc[1:] assert mc5.info.serialize_method['ecsv'] == 'data_mask'
def test_masked_col_unicode_sandwich(): """ Create a bytestring MaskedColumn and ensure that it works in Python 3 in a convenient way like in Python 2. """ c = table.MaskedColumn([b'abc', b'def']) c[1] = np.ma.masked assert isinstance(c[:0], table.MaskedColumn) assert isinstance(c[0], str) assert c[0] == 'abc' assert c[1] is np.ma.masked assert isinstance(c[:], table.MaskedColumn) assert c[:].dtype.char == 'S' ok = c == ['abc', 'def'] assert ok[0] == True assert ok[1] is np.ma.masked assert np.all(c == [b'abc', b'def']) assert np.all(c == np.array(['abc', 'def'])) assert np.all(c == np.array([b'abc', b'def'])) for cmp in ('abc', b'abc'): ok = c == cmp assert type(ok) is np.ma.MaskedArray assert ok[0] == True assert ok[1] is np.ma.masked
def test_array_wrap(self): """Test that the __array_wrap__ method converts a reduction ufunc output that has a different shape into an ndarray view. Without this a method call like c.mean() returns a Column array object with length=1.""" # Mean and sum for a 1-d float column c = table.Column(name='a', data=[1., 2., 3.]) assert np.allclose(c.mean(), 2.0) assert isinstance(c.mean(), (np.floating, float)) assert np.allclose(c.sum(), 6.) assert isinstance(c.sum(), (np.floating, float)) # Non-reduction ufunc preserves Column class assert isinstance(np.cos(c), table.Column) # Sum for a 1-d int column c = table.Column(name='a', data=[1, 2, 3]) assert np.allclose(c.sum(), 6) assert isinstance(c.sum(), (np.integer, int)) # Sum for a 2-d int column c = table.Column(name='a', data=[[1, 2, 3], [4, 5, 6]]) assert c.sum() == 21 assert isinstance(c.sum(), (np.integer, int)) assert np.all(c.sum(axis=0) == [5, 7, 9]) assert c.sum(axis=0).shape == (3, ) assert isinstance(c.sum(axis=0), np.ndarray) # Sum and mean for a 1-d masked column c = table.MaskedColumn(name='a', data=[1., 2., 3.], mask=[0, 0, 1]) assert np.allclose(c.mean(), 1.5) assert isinstance(c.mean(), (np.floating, float)) assert np.allclose(c.sum(), 3.) assert isinstance(c.sum(), (np.floating, float))
def test_col_and_masked_col(self): c1 = table.Column(name='a', dtype=int, unit='mJy', format='%i', description='test column', meta={'c': 8, 'd': 12}) c2 = table.MaskedColumn(name='a', dtype=int, unit='mJy', format='%i', description='test column', meta={'c': 8, 'd': 12}) assert c1.attrs_equal(c2) assert c2.attrs_equal(c1)
def test_unicode_sandwich_masked_compare(): """Test the fix for #6839 from #6899.""" c1 = table.MaskedColumn(['a', 'b', 'c', 'd'], mask=[True, False, True, False]) c2 = table.MaskedColumn([b'a', b'b', b'c', b'd'], mask=[True, True, False, False]) for cmp in ((c1 == c2), (c2 == c1)): assert cmp[0] is np.ma.masked assert cmp[1] is np.ma.masked assert cmp[2] is np.ma.masked assert cmp[3] for cmp in ((c1 != c2), (c2 != c1)): assert cmp[0] is np.ma.masked assert cmp[1] is np.ma.masked assert cmp[2] is np.ma.masked assert not cmp[3]
def test_insert_string_masked_values(self): c = table.MaskedColumn(['a', 'b']) c1 = c.insert(0, np.ma.masked) assert np.all(c1 == ['', 'a', 'b']) assert np.all(c1.mask == [True, False, False]) assert c1.dtype == 'U1' c2 = c.insert(1, np.ma.MaskedArray(['ccc', 'dd'], mask=[True, False])) assert np.all(c2 == ['a', 'ccc', 'dd', 'b']) assert np.all(c2.mask == [False, True, False, False]) assert c2.dtype == 'U3'
def parse_dwarf_table(alpha=False, table_path=datapath + '/abundance_tables/dwarf_lit_all.tab'): def _process_column(d, elem): N = len(d) if elem not in d.colnames: raise ValueError(elem) old_col = d[elem] ul_col = table.MaskedColumn(data=np.zeros(N), name='ul_' + elem, dtype=int) if old_col.dtype == np.float: return old_col, ul_col new_col = table.MaskedColumn(data=np.zeros(N, dtype=np.float), name=elem) for i in range(N): if np.ma.is_masked(old_col[i]): new_col[i] = np.ma.masked ul_col[i] = np.ma.masked elif old_col[i][0] == '<': new_col[i] = np.float(old_col[i][1:]) ul_col[i] = 1 else: new_col[i] = np.float(old_col[i]) return new_col, ul_col d = table.Table(ascii.read(table_path)) elems = d.colnames[2:-4] for elem in elems: elem_col, ul_col = _process_column(d, elem) d.remove_column(elem) d.add_column(elem_col) d.add_column(ul_col) if alpha: col = table.MaskedColumn(np.nanmean([d['Mg'], d['CaI'], d['TiII']], axis=0), name='alpha') d.add_column(col) col = table.MaskedColumn(np.zeros(len(d)), name='ul_alpha', dtype=int) d.add_column(col) return d
def test_column_value_access(): """Can a column's underlying data consistently be accessed via `.value`, whether it is a `Column`, `MaskedColumn`, `Quantity`, or `Time`?""" data = np.array([1, 2, 3]) tbl = table.QTable({'a': table.Column(data), 'b': table.MaskedColumn(data), 'c': u.Quantity(data), 'd': time.Time(data, format='mjd')}) assert type(tbl['a'].value) == np.ndarray assert type(tbl['b'].value) == np.ma.MaskedArray assert type(tbl['c'].value) == np.ndarray assert type(tbl['d'].value) == np.ndarray
def calc_xyref(self, refcat): """ Compute x- and y-positions of the sources from the image catalog in the reference image plane. This create the following columns in the catalog's table: ``'xref'`` and ``'yref'``. """ if 'RA' not in self._catalog.colnames or \ 'DEC' not in self._catalog.colnames: raise RuntimeError("'recalc_catalog_radec()' should have been run " "prior to calc_xyref()") # compute x & y in the reference WCS: xref, yref = refcat.all_world2pix(self.catalog['RA'], self.catalog['DEC']) self._catalog['xref'] = table.MaskedColumn( xref, name='xref', dtype=np.float64, mask=False ) self._catalog['yref'] = table.MaskedColumn( yref, name='yref', dtype=np.float64, mask=False )
def _process_column(d, elem): N = len(d) if elem not in d.colnames: raise ValueError(elem) old_col = d[elem] ul_col = table.MaskedColumn(data=np.zeros(N), name='ul_' + elem, dtype=int) if old_col.dtype == np.float: return old_col, ul_col new_col = table.MaskedColumn(data=np.zeros(N, dtype=np.float), name=elem) for i in range(N): if np.ma.is_masked(old_col[i]): new_col[i] = np.ma.masked ul_col[i] = np.ma.masked elif old_col[i][0] == '<': new_col[i] = np.float(old_col[i][1:]) ul_col[i] = 1 else: new_col[i] = np.float(old_col[i]) return new_col, ul_col
def tobool_column(table, colname, trueval='true'): """ Little helper function to convert columns (back) to bool type, e.g., when written by Topcat (which uses 'true' instead of 'True') """ # --- test if the column is already in bool: if table[colname].dtype != bool: n = len(table) newcol = T.MaskedColumn(np.zeros(n, dtype=bool), name=colname) idt = table[colname] == trueval newcol[idt] = True table.replace_column(colname, newcol)
def test_insert_masked_multidim(self): c = table.MaskedColumn([[1, 2], [3, 4]], name='a', dtype=int) c1 = c.insert(1, [100, 200], mask=True) assert np.all(c1.data.data == [[1, 2], [100, 200], [3, 4]]) assert np.all(c1.data.mask == [[False, False], [True, True], [False, False]]) c1 = c.insert(1, [100, 200], mask=[True, False]) assert np.all(c1.data.data == [[1, 2], [100, 200], [3, 4]]) assert np.all(c1.data.mask == [[False, False], [True, False], [False, False]]) with pytest.raises(ValueError): c1 = c.insert(1, [100, 200], mask=[True, False, True])
def test_insert_masked(self): c = table.MaskedColumn([0, 1, 2], name='a', fill_value=9999, mask=[False, True, False]) # Basic insert c1 = c.insert(1, 100) assert np.all(c1.data.data == [0, 100, 1, 2]) assert c1.fill_value == 9999 assert np.all(c1.data.mask == [False, False, True, False]) assert type(c) is type(c1) for mask in (False, True): c1 = c.insert(1, 100, mask=mask) assert np.all(c1.data.data == [0, 100, 1, 2]) assert np.all(c1.data.mask == [False, mask, True, False])
def test_data_info(): """ Test getting info for just a column. """ cols = [table.Column([1.0, 2.0, np.nan], name='name', description='description', unit='m/s'), table.MaskedColumn([1.0, 2.0, 3.0], name='name', description='description', unit='m/s', mask=[False, False, True])] for c in cols: # Test getting the full ordered dict cinfo = c.info(out=None) assert cinfo == OrderedDict([('name', 'name'), ('dtype', 'float64'), ('shape', ''), ('unit', 'm / s'), ('format', ''), ('description', 'description'), ('class', type(c).__name__), ('n_bad', 1), ('length', 3)]) # Test the console (string) version which omits trivial values out = StringIO() c.info(out=out) exp = ['name = name', 'dtype = float64', 'unit = m / s', 'description = description', 'class = {0}'.format(type(c).__name__), 'n_bad = 1', 'length = 3'] assert out.getvalue().splitlines() == exp # repr(c.info) gives the same as c.info() assert repr(c.info) == out.getvalue() # Test stats info cinfo = c.info('stats', out=None) assert cinfo == OrderedDict([('name', 'name'), ('mean', '1.5'), ('std', '0.5'), ('min', '1.0'), ('max', '2.0'), ('n_bad', 1), ('length', 3)])
def create_group_catalog(self): """ Combine member's image catalogs into a single group's catalog. Returns ------- group_catalog : astropy.table.Table Combined group catalog. """ catalogs = [] catno = 0 for image in self._images: catlen = len(image.catalog) if image.name is None: catname = 'Catalog #{:d}'.format(catno) else: catname = image.name col_catname = table.MaskedColumn(catlen * [catname], name='cat_name') col_imcatidx = table.MaskedColumn(catlen * [catno], name='_imcat_idx') col_id = table.MaskedColumn(image.catalog['id']) col_x = table.MaskedColumn(image.catalog['x'], dtype=np.float64) col_y = table.MaskedColumn(image.catalog['y'], dtype=np.float64) ra, dec = image.all_pix2world( image.catalog['x'], image.catalog['y'] ) col_ra = table.MaskedColumn(ra, dtype=np.float64, name='RA') col_dec = table.MaskedColumn(dec, dtype=np.float64, name='DEC') cat = table.Table( [col_imcatidx, col_catname, col_id, col_x, col_y, col_ra, col_dec], masked=True ) catalogs.append(cat) catno += 1 return table.vstack(catalogs, join_type='exact')
def tbl_to_astropy(tbl): typedict = { "int": "i4", \ "long": "i8", \ "float": "f4", \ "double": "f8", \ "real": "f8", \ "char": "S", \ "date": "S" } collist = [] for n in tbl.colnames: col = tbl.cols[n] longtype = IPACExpandType(col.type) m = [not x for x in col.mask] newC = aptb.MaskedColumn( col.data, name=n, \ mask=m, dtype=typedict[longtype] ) collist.append(newC) newT = aptb.Table(collist) return newT
def test_masked_multidim_as_list(self): data = np.ma.MaskedArray([1, 2], mask=[True, False]) c = table.MaskedColumn([data]) assert c.shape == (1, 2) assert np.all(c[0].mask == [True, False])
def match2ref(self, refcat, minobj=15, searchrad=1.0, searchunits='arcseconds', separation=0.5, use2dhist=True, xoffset=0.0, yoffset=0.0, tolerance=1.0): """ Uses xyxymatch to cross-match sources between this catalog and a reference catalog. Parameters ---------- refcat : RefCatalog A `RefCatalog` object that contains a catalog of reference sources as well as a valid reference WCS. minobj : int, None, optional Minimum number of identified objects from each input image to use in matching objects from other images. If the default `None` value is used then `align` will automatically deternmine the minimum number of sources from the value of the `fitgeom` parameter. searchrad : float, optional The search radius for a match. searchunits : str, optional Units for search radius. separation : float, optional The minimum separation for sources in the input and reference catalogs in order to be considered to be disctinct sources. Objects closer together than 'separation' pixels are removed from the input and reference coordinate lists prior to matching. This parameter gets passed directly to :py:func:`~stsci.stimage.xyxymatch` for use in matching the object lists from each image with the reference image's object list. use2dhist : bool, optional Use 2D histogram to find initial offset? xoffset : float, optional Initial estimate for the offset in X between the images and the reference frame. This offset will be used for all input images provided. This parameter is ignored when `use2dhist` is `True`. yoffset : float (Default = 0.0) Initial estimate for the offset in Y between the images and the reference frame. This offset will be used for all input images provided. This parameter is ignored when `use2dhist` is `True`. tolerance : float, optional The matching tolerance in pixels after applying an initial solution derived from the 'triangles' algorithm. This parameter gets passed directly to :py:func:`~stsci.stimage.xyxymatch` for use in matching the object lists from each image with the reference image's object list. """ colnames = self._catalog.colnames if 'xref' not in colnames or 'yref' not in colnames: raise RuntimeError("'calc_xyref()' should have been run prior " "to match2ref()") im_xyref = np.asanyarray([self._catalog['xref'], self._catalog['yref']]).T refxy = np.asanyarray([refcat.catalog['xref'], refcat.catalog['yref']]).T log.info("Matching sources from '{}' with sources from reference " "{:s} '{}'".format(self.name, 'image', refcat.name)) # convert tolerance from units of arcseconds to pixels, as needed if searchunits == 'arcseconds': searchrad /= refcat.pscale xyoff = (xoffset, yoffset) if use2dhist: # Determine xyoff (X,Y offset) and tolerance # to be used with xyxymatch: zpxoff, zpyoff, flux, zpqual = matchutils.build_xy_zeropoint( im_xyref, refxy, searchrad=searchrad ) if zpqual is not None: xyoff = (zpxoff, zpyoff) # set tolerance as well # This value allows initial guess to be off by 1 in both and # still pick up the identified matches tolerance = 1.5 matches = xyxymatch( im_xyref, refxy, origin=xyoff, tolerance=tolerance, separation=separation ) nmatches = len(matches) self._catalog.meta['nmatches'] = nmatches minput_idx = matches['input_idx'] catlen = len(self._catalog) # matched_ref_id: if 'matched_ref_id' not in colnames: c = table.MaskedColumn(name='matched_ref_id', dtype=int, length=catlen, mask=True) self._catalog.add_column(c) else: self._catalog['matched_ref_id'].mask = True self._catalog['matched_ref_id'][minput_idx] = \ self._catalog['id'][minput_idx] self._catalog['matched_ref_id'].mask[minput_idx] = False # this is needed to index reference catalog directly without using # astropy table indexing which at this moment is experimental: if '_raw_matched_ref_idx' not in colnames: c = table.MaskedColumn(name='_raw_matched_ref_idx', dtype=int, length=catlen, mask=True) self._catalog.add_column(c) else: self._catalog['_raw_matched_ref_idx'].mask = True self._catalog['_raw_matched_ref_idx'][minput_idx] = \ matches['ref_idx'] self._catalog['_raw_matched_ref_idx'].mask[minput_idx] = False log.info("Found {:d} matches for '{}'...".format(nmatches, self.name)) return matches
def crossmatch_tables(d1, d2, fout=None, join_type="1 and 2", ra1='RA_deg', dec1="DEC_deg", ra2='RA_deg', vizier=False, dec2="DEC_deg", conrad=3.0, verbose=False, d2_unicol=None, match_sel="best for both", mark_groups=True): """ Crossmatch two astropy tables by coordinates. At the moment all matches are considered not only the closest. If the vizier flag is set, then d1 has to be the local table, while d2 has to be the vizier ID of the online table (in this case, obviously, the options "1 or 2" and "2 not 1" do not work). """ # --- make sure the tables support masking dN = T.Table(d1, masked=True) nN = len(dN) ncolsN = len(dN.columns) # print(dN.columns) join_type = join_type.lower() match_sel = match_sel.lower() joins = [ "1 and 2", "1 not 2", "2 not 1", "1 or 2", "all from 1", "all from 2" ] matches = ["best for both", "best for 1", "best for 2", "all"] if join_type not in joins: print("CROSSMATCH_TABLES: ERROR: selected join type not valid: ", join_type) print(" - available types are: ", joins) return (-1) if match_sel not in matches: print("CROSSMATCH_TABLES: ERROR: selected match selection not valid: ", match_sel) print(" - available selections are: ", matches) return (-1) if not vizier: dC = T.Table(d2, masked=True) nC = len(dC) ncolsC = len(dC.columns) if np.sum(dC[ra2].mask) + np.sum(dC[dec2].mask) > 0: print( "CROSSMATCH_TABLES: ERROR: Table 2 contains empty coordinates! Aborting..." ) return (-1) coordsN = coordinates.SkyCoord(ra=dN[ra1] * u.degree, dec=dN[dec1] * u.degree) coordsC = coordinates.SkyCoord(ra=dC[ra2] * u.degree, dec=dC[dec2] * u.degree) if verbose: print(timestamp() + ": CROSSMATCH_TABLES: Input parameters: ") print(" - input rows (left): ", nN) if not vizier: print(" - input rows (right): ", nC) print(" - join type: ", join_type) print(" - match selection: ", match_sel) print(" - cone radius: ", conrad) print(" - vizier XMATCH: ", vizier) print(" - RA 1: ", ra1) print(" - DEC 1: ", dec1) print(" - RA 2: ", ra2) print(" - DEC 2: ", dec2) print(" - output file: ", fout) # --- the crossmatch can not handle masked coordinates! if np.sum(dN[ra1].mask) + np.sum(dN[dec1].mask) > 0: print( "CROSSMATCH_TABLES: ERROR: Table 1 contains empty coordinates! Aborting..." ) return (-1) if verbose: print(timestamp() + ": CROSSMATCH_TABLES: Doing the crossmatch...") sys.stdout.flush() # --- in case of the online XMATCH with vizier table: if vizier: if join_type == "1 or 2" or join_type == "2 not 1": print( "CROSSMATCH_TABLES: ERROR: For XMATCH with VISIR '1 or 2' and '2 not 1' are not available! Aborting..." ) return (-1) # --- crerate small table for upload (the columns can not be of masked # type either) dtemp = T.Table({ "UniID": range(nN), ra1: np.array(dN[ra1]), dec1: np.array(dN[dec1]) }) dC = XMatch.query(cat1=dtemp, cat2=d2, max_distance=conrad * u.arcsec, colRA1=ra1, colDec1=dec1) idxl = np.array(dC["UniID"]) idxr = np.array(range(len(dC))) sep = dC["angDist"] nC = len(dC) del dC[ra1] del dC[dec1] del dC["UniID"] del dC["angDist"] ncolsC = len(dC.columns) # --- now we need to reconstruct the right side table if d2_unicol is not None: d2IDs = np.array(dC[d2_unicol]) d2unique = np.unique(d2IDs) nrunique = len(d2unique) else: d2IDs = None else: idxr, idxl, sep, _ = coordsN.search_around_sky(coordsC, conrad * u.arcsec) nrunique = len(np.unique(idxr)) sep = sep.value * 3600 d2IDs = None nmatch = len(idxr) #nmatch_vol = np.sum(dN['NEDredshift'][idxr] < zlim) lunique = np.unique(idxl) nlunique = len(lunique) if verbose: print( timestamp() + ": CROSSMATCH_TABLES: Total number of found matches: ", nmatch) print( " CROSSMATCH_TABLES: Number of unique left sources match: ", nlunique) print( " CROSSMATCH_TABLES: Number of unique right sources match: ", nrunique) # --- if we are only interested in the non-matches we are done here: if join_type == "1 not 2": # --- ids of those not in the match idNnomatch = [x for x in range(nN) if x not in idxl] dout = dN[idNnomatch] if fout is not None: dout.write(fout, delimiter=',', format='ascii', fill_values=[(ascii.masked, '')], overwrite=True) if verbose: print("CROSSMATCH_TABLES: Number of non-matches from 1: ", len(dout)) return (dout) elif join_type == "2 not 1": # --- ids of those not in the match idCnomatch = [x for x in range(nC) if x not in idxr] dout = dC[idCnomatch] if fout is not None: dout.write(fout, delimiter=',', format='ascii', fill_values=[(ascii.masked, '')], overwrite=True) if verbose: print("CROSSMATCH_TABLES: Number of non-matches from 2: ", len(dout)) return (dout) # --- Groups and Duplicates # --- get allleft side duplicates id_uni_l, lcounts = np.unique(idxl, return_counts=True) id_dupl_l = np.array(id_uni_l[lcounts > 1]) n_dupl_l = len(id_dupl_l) # --- get all right side duplicates if vizier and d2_unicol is not None: id_uni_r, rcounts = np.unique(d2IDs, return_counts=True) id_dupl_r = np.array(id_uni_r[rcounts > 1]) n_dupl_r = len(id_dupl_r) else: id_uni_r, rcounts = np.unique(idxr, return_counts=True) id_dupl_r = id_uni_r[rcounts > 1] n_dupl_r = len(id_dupl_r) # --- In case of best matches only select correspondingly select = np.ones(nmatch, dtype=bool) if match_sel == "best for both" or match_sel == "best for 1": if verbose: print(timestamp() + "CROSSMATCH_TABLES: Finding best match on the left side...") for i in tqdm(range(n_dupl_l)): ids = np.where(idxl == id_dupl_l[i])[0] exclude = np.where(sep[ids] > np.nanmin(sep[ids]))[0] keep = np.where(sep[ids] == np.nanmin(sep[ids]))[0] if len(keep) > 1: if verbose: print( " - WARNING: more than 1 object at minimum separation: ", len(keep)) select[ids[exclude]] = False # print(i, idxl[ids], np.nanmin(sep[idxl[ids]]), exclude, idxl[ids[exclude]]) if match_sel == "best for both" or match_sel == "best for 2": if verbose: print(timestamp() + "CROSSMATCH_TABLES: Finding best match on the right side...") for i in tqdm(range(n_dupl_r)): ids = np.where(idxr == id_dupl_r[i])[0] exclude = np.where(sep[ids] > np.nanmin(sep[ids]))[0] select[ids[exclude]] = False if verbose: print("CROSSMATCH_TABLES: Number of excluded matches: ", np.sum(np.invert(select))) idxl = idxl[select] idxr = idxr[select] sep = sep[select] if vizier: # idxr = np.array(range(len(idxr))) d2IDs = d2IDs[select] # --- Group business is only requierd if multiple matches are allowed if match_sel != "best for both" and mark_groups: # --- Add the columns of the tables to each other if verbose: print(timestamp() + ": CROSSMATCH_TABLES: Adding columns...") for i in range(ncolsN): if dN.columns[i].name not in dC.columns: dC.add_column( T.MaskedColumn(np.ma.zeros(nC, dtype=dN.columns[i].dtype), name=dN.columns[i].name)) dC[dN.columns[i].name] = np.ma.masked # print(" Added column: ", dN.columns[i].name) dC.add_column( T.MaskedColumn(np.ma.zeros(nC, dtype=float), name="separation_as")) dC["separation_as"] = np.ma.masked dC.add_column( T.MaskedColumn(np.ma.zeros(nC, dtype=int), name="groupID")) dC.add_column( T.MaskedColumn(np.ma.zeros(nC, dtype=int), name="groupsize")) for i in range(ncolsC): if dC.columns[i].name not in dN.columns: dN.add_column( T.MaskedColumn(np.ma.zeros(nN, dtype=dC.columns[i].dtype), name=dC.columns[i].name)) dN[dC.columns[i].name] = np.ma.masked # print(dN.columns) dN.add_column( T.MaskedColumn(np.ma.zeros(nN, dtype=float), name="separation_as")) dN["separation_as"] = np.ma.masked dN.add_column( T.MaskedColumn(np.ma.zeros(nN, dtype=int), name="groupID")) dN.add_column( T.MaskedColumn(np.ma.zeros(nN, dtype=int), name="groupsize")) # --- now identify the groups if verbose: print(timestamp() + ": CROSSMATCH_TABLES: Identifying groups...") lgroupids, lgroupsizes, rgroupids, rgroupsizes = _identify_groups( idxl, idxr, nN, nC, match_sel=match_sel, d2IDs=d2IDs, verbose=verbose) dN['groupID'][idxl] = lgroupids[idxl] dC['groupID'][idxr] = rgroupids[idxr] dN['groupsize'][idxl] = lgroupsizes[idxl] dC['groupsize'][idxr] = rgroupsizes[idxr] # --- then we build a table with the matches dmatch = dN[idxl] for i in range(ncolsC): dmatch[dC.columns[i].name] = dC[dC.columns[i].name][idxr] # --- fill in the separations dmatch['separation_as'] = sep #for i in range(len(dmatch)): print(dmatch['NEDname'][i], " <--> ", dmatch['CDSname'][i]) if verbose: print(timestamp() + ": CROSSMATCH_TABLES: Preparing output...") # --- now the output options: if join_type == "1 or 2": # --- ids of those not in the match idNnomatch = [x for x in range(nN) if x not in idxl] idCnomatch = [x for x in range(nC) if x not in idxr] dout = T.vstack([dN[idNnomatch], dmatch, dC[idCnomatch]]) elif join_type == "all from 1": # --- ids of those not in the match idNnomatch = [x for x in range(nN) if x not in idxl] dout = T.vstack([dN[idNnomatch], dmatch]) elif join_type == "all from 2": # --- ids of those not in the match idCnomatch = [x for x in range(nC) if x not in idxr] dout = T.vstack([dC[idCnomatch], dmatch]) else: # out == "1 and 2" dout = dmatch # --- mask the empy values if 'groupID' in dout.colnames: idnull = dout['groupID'] == 0 dout['groupID'][idnull] = np.ma.masked dout['groupsize'][idnull] = np.ma.masked if fout is not None: dout.write(fout, delimiter=',', format='ascii', fill_values=[(ascii.masked, '')], overwrite=True) if verbose: print(timestamp() + ": CROSSMATCH_TABLES: Number of output lines: ", len(dout)) return (dout, idxl, idxr)
def read_res_file(d, database, ids, i, infile, inname, inra, indec, fail, found_by_name=None, found_by_coord=None, racol=None, deccol=None, typecol=None, preftypes=None, verbose=False): """ Helper routine to load the results of a query from a file and add them to a table """ # --- check whether there is a matching file with the query results if not os.path.isfile(infile): fail[i] = 1 if verbose: print(" - WARNING: No results found for object: ", i, inname) return(-1) else: res = ascii.read(infile, header_start=0, delimiter=',', guess=False) nres = len(res) if nres == 0: fail[i] = 1 if verbose: print(" - WARNING: Length of results = 0: ", i, inname) return(-1) # --- convert types into correct strings for j in range(nres): res[typecol][j] = res[typecol][j].replace("b'", "").replace("'", "") # --- the SDSS query at the moment is still not astroquery # conform and does not return the separation so that # we have to calculate it if "Separation" not in res.colnames: res.add_column(T.MaskedColumn(np.ma.zeros(nres, dtype=float), name="Separation")) if database != "SDSS": found_by_name[i] = 1 else: found_by_coord[i] = 1 # --- it seems to be added but empty in fact else: # --- if it exists, make sure the column has right type res['Separation'] =res['Separation'].astype(float) if T.MaskedColumn(res['Separation']).mask[0]: found_by_name[i] = 1 else: found_by_coord[i] = 1 # --- just to be sure, (re)compute the separations for j in range(nres): res['Separation'][j] = ang_dist(inra, indec, res[racol][j], res[deccol][j]) # --- sort the results by separation so that the closest comes first res = res[np.argsort(res['Separation'])] # --- get the objects in the result table with preferred type idg = [x for x, t in enumerate(res[typecol]) if t in preftypes] if len(idg) > 0: sel = idg[0] else: # --- if no preferred object is available, just take the closest sel = 0 if verbose: print(" - WARNING: No object of preferred type found: ", i, inname[i]) if verbose: print(inname, " ---> " ,res[sel]) # --- fill the corresponding table values if database == "NED": d["NED_name"][ids[i]] = res['Object Name'][sel].replace("b'", "").replace("'", "") if not T.MaskedColumn(res["Redshift"]).mask[sel]: d["NED_redshift"][ids[i]] = float(res["Redshift"][sel]) d["NED_RA_deg"][ids[i]] = float(res[racol][sel]) d["NED_DEC_deg"][ids[i]] = float(res[deccol][sel]) d["NED_type"][ids[i]] = res[typecol][sel] elif database == "SIMBAD": d["CDS_name"][ids[i]] = res['MAIN_ID'][sel] if "b'" in d["CDS_name"][ids[i]]: d["CDS_name"][ids[i]] = d["CDS_name"][ids[i]].replace("b'", "").replace("'", "") elif 'b"' in d["CDS_name"][ids[i]]: d["CDS_name"][ids[i]] = d["CDS_name"][ids[i]].replace('"b""', "").replace('"""', "") if not T.MaskedColumn(res["Z_VALUE"]).mask[sel]: d["CDS_redshift"][ids[i]] = float(res["Z_VALUE"][sel]) d["CDS_RA_deg"][ids[i]] = float(res[racol][sel]) d["CDS_DEC_deg"][ids[i]] = float(res[deccol][sel]) d["CDS_type"][ids[i]] = res[typecol][sel] elif database == "SDSS": # --- construct the SDSS name coords = coordinates.SkyCoord(ra=res[racol][sel]*u.degree, dec=res[deccol][sel]*u.degree) d["SDSS_name"][ids[i]] = 'SDSS J{0}{1}'.format(coords.ra.to_string( unit=u.hourangle, sep='', precision=2, pad=True), coords.dec.to_string( sep='', precision=1, alwayssign=True, pad=True)) if not T.MaskedColumn(res["z"]).mask[sel]: d["SDSS_redshift"][ids[i]] = float(res["z"][sel]) d["SDSS_RA_deg"][ids[i]] = float(res[racol][sel]) d["SDSS_DEC_deg"][ids[i]] = float(res[deccol][sel]) d["SDSS_spectype"][ids[i]] = res[typecol][sel] d["SDSS_specID"][ids[i]] = res['specobjid'][sel] if not T.MaskedColumn(res['subclass']).mask[sel]: d["SDSS_specclass"][ids[i]] = res['subclass'][sel].replace("b'", "").replace("'", "") d["SDSS_warn"][ids[i]] = res['zWarning'][sel] d["SDSS_redshift_unc"][ids[i]] = res['zErr'][sel] if res['type'][sel] == 3: d["SDSS_phottype"][ids[i]] = "GALAXY" elif res['type'][sel] == 6: d["SDSS_phottype"][ids[i]] = "STAR" # --- associate the right coordinate offsets if database == "NED": if d["origin"][ids[i]] == "SIMBAD": d["NED-CDS_sep_as"][ids[i]] = res['Separation'][sel] elif d["origin"][ids[i]] == "SDSS": d["NED-SDSS_sep_as"][ids[i]] = res['Separation'][sel] elif d["origin"][ids[i]] == "2MRS": d["NED-2MRS_sep_as"][ids[i]] = res['Separation'][sel] elif d["origin"][ids[i]] == "B70": d["NED-B70_sep_as"][ids[i]] = res['Separation'][sel] elif database == "SIMBAD": if d["origin"][ids[i]] == "NED": d["NED-CDS_sep_as"][ids[i]] = res['Separation'][sel] elif database == "SDSS": if d["origin"][ids[i]] == "NED": d["NED-SDSS_sep_as"][ids[i]] = res['Separation'][sel] return(0)
def simplify_catalog(mastercat, quickld=True): """ Removes most of the unnecessary columns from the master catalog and joins fields where relevant Parameters ---------- mastercat : astropy.table.Table The table from initial_catalog quickld : bool If True, means do the "quick" version of the luminosity distance calculation (takes <1 sec as opposed to a min or so, but is only good to a few kpc) """ from astropy import table from astropy.constants import c ckps = c.to(u.km / u.s).value tab = table.Table() #RADEC: # use NSA unless it's missing, in which case use LEDA ras = mastercat['al2000'] * 15 ras[~mastercat['RA'].mask] = mastercat['RA'][~mastercat['RA'].mask] decs = mastercat['de2000'] decs[~mastercat['DEC'].mask] = mastercat['DEC'][~mastercat['DEC'].mask] tab.add_column(table.MaskedColumn(name='RA', data=ras, unit=u.deg)) tab.add_column(table.MaskedColumn(name='Dec', data=decs, unit=u.deg)) #Names/IDs: pgc = mastercat['pgc'].copy() pgc.mask = mastercat['pgc'] < 0 tab.add_column(table.MaskedColumn(name='PGC#', data=pgc)) tab.add_column(table.MaskedColumn(name='NSAID', data=mastercat['NSAID'])) #do these in order of how 'preferred' the object name is. nameorder = ('Objname', 'Name_eddkk', 'objname', 'Name_2mass' ) # this is: EDD, KK, LEDA, 2MASS #need to figure out which has the *largest* name strings, because we have a fixed number of characters largestdt = np.dtype('S1') for nm in nameorder: if mastercat.dtype[nm] > largestdt: largestdt = mastercat.dtype[nm] largestdtnm = nm names = mastercat[largestdtnm].copy( ) # these will all be overwritten - just use it for shape for nm in nameorder: msk = ~mastercat[nm].mask names[msk] = mastercat[nm][msk] tab.add_column(table.MaskedColumn(name='othername', data=names)) #After this, everything should have either an NSAID, a PGC#, or a name (or more than one) #VELOCITIES/redshifts #start with LEDA vs = mastercat['v'].astype(float) v_errs = mastercat['e_v'].astype(float) #Now add vhelio from the the EDD eddvhel = mastercat['Vhel_eddkk'] vs[~eddvhel.mask] = eddvhel[~eddvhel.mask] #EDD has no v-errors, so mask them v_errs[~eddvhel.mask] = 0 v_errs.mask[~eddvhel.mask] = True #then the NSA *observed* velocity, if available (NOT the same as distance) vs[~mastercat['Z'].mask] = mastercat['Z'][~mastercat['Z'].mask] * ckps v_errs.mask[~mastercat['Z'].mask] = True #v_errs[~mastercat['Z_ERR'].mask] = mastercat['Z_ERR'][~mastercat['Z_ERR'].mask] * ckps #finally, KK when present if its not available from one of the above kkvh = mastercat['Vh'] vs[~kkvh.mask] = kkvh[~kkvh.mask] #KK has no v-errors, so mask them v_errs[~kkvh.mask] = 0 v_errs.mask[~kkvh.mask] = True #DISTANCES #start with all inf, and all masked dist = np.ones_like(mastercat['Dist_edd']) * np.inf dist.mask[:] = True #first populate those that are in EDD with CMD-based distance msk = mastercat['So_eddkk'] == 1 dist[msk] = mastercat['Dist_edd'][msk] #now populate from the NSA if not in the above msk = (dist.mask) & (~mastercat['ZDIST'].mask) dist[msk] = mastercat['ZDIST'][msk] * ckps / WMAP9.H(0).value #finally, add in anything in the KK that's not elsewhere msk = (dist.mask) & (~mastercat['Dist_kk'].mask) dist[msk] = mastercat['Dist_kk'][msk] # #for those *without* EDD or KK, use the redshift's luminosity distance # premsk = dist.mask.copy() # zs = vs[premsk]/ckps # if quickld: # ldx = np.linspace(zs.min(), zs.max(), 1000) # ldy = WMAP9.luminosity_distance(ldx).to(u.Mpc).value # ld = np.interp(zs, ldx, ldy) # else: # ld = WMAP9.luminosity_distance(zs).to(u.Mpc).value # dist[premsk] = ld # dist.mask[premsk] = vs.mask[premsk] distmod = 5 * np.log10(dist) + 25 # used in phot section tab.add_column(table.MaskedColumn(name='vhelio', data=vs)) #decided to remove v-errors #tab.add_column(table.MaskedColumn(name='vhelio_err', data=v_errs)) tab.add_column(table.MaskedColumn(name='distance', data=dist, unit=u.Mpc)) #PHOTOMETRY tab.add_column( table.MaskedColumn(name='r', data=mastercat['ABSMAG_r'] + distmod)) tab.add_column( table.MaskedColumn(name='i', data=mastercat['ABSMAG_i'] + distmod)) tab.add_column( table.MaskedColumn(name='z', data=mastercat['ABSMAG_z'] + distmod)) tab.add_column(table.MaskedColumn(name='I', data=mastercat['it'])) tab.add_column(table.MaskedColumn(name='K', data=mastercat['K_tc'])) tab.add_column(table.MaskedColumn(name='K_err', data=mastercat['e_K'])) #Stellar mass/SFR tab.add_column( table.MaskedColumn(name='M_star', data=mastercat['MASS'] * (WMAP9.H(0).value / 100)**-2)) tab.add_column(table.MaskedColumn(name='SFR_B300', data=mastercat['B300'])) tab.add_column( table.MaskedColumn(name='SFR_B1000', data=mastercat['B1000'])) return tab
def add_6df(simplifiedmastercat, sixdf, tol=1 * u.arcmin): """ Adds entries in the catalog for the 6dF survey, or updates v when missing """ from astropy import table from astropy.coordinates import SkyCoord from astropy.constants import c ckps = c.to(u.km / u.s).value catcoo = SkyCoord(simplifiedmastercat['RA'].view(np.ndarray) * u.deg, simplifiedmastercat['Dec'].view(np.ndarray) * u.deg) sixdfcoo = SkyCoord(sixdf['obsra'].view(np.ndarray) * u.deg, sixdf['obsdec'].view(np.ndarray) * u.deg) idx, dd, d3d = sixdfcoo.match_to_catalog_sky(catcoo) msk = dd < tol sixdfnomatch = sixdf[~msk] t = table.Table() t.add_column(table.MaskedColumn(name='RA', data=sixdfnomatch['obsra'])) t.add_column(table.MaskedColumn(name='Dec', data=sixdfnomatch['obsdec'])) t.add_column( table.MaskedColumn(name='PGC#', data=-np.ones(len(sixdfnomatch), dtype=int), mask=np.ones(len(sixdfnomatch), dtype=bool))) t.add_column( table.MaskedColumn(name='NSAID', data=-np.ones(len(sixdfnomatch), dtype=int), mask=np.ones(len(sixdfnomatch), dtype=bool))) t.add_column( table.MaskedColumn(name='othername', data=sixdfnomatch['targetname'])) t.add_column( table.MaskedColumn(name='vhelio', data=sixdfnomatch['z_helio'] * ckps)) #t.add_column(table.MaskedColumn(name='vhelio_err', data=sixdfnomatch['zfinalerr']*ckps)) t.add_column( table.MaskedColumn(name='distance', data=WMAP9.luminosity_distance( sixdfnomatch['z_helio']).value)) #fill in anything else needed with -999 and masked for nm in simplifiedmastercat.colnames: if nm not in t.colnames: t.add_column( table.MaskedColumn( name=nm, data=-999 * np.ones(len(sixdfnomatch), dtype=int), mask=np.ones(len(sixdfnomatch), dtype=bool))) t = table.vstack([simplifiedmastercat, t], join_type='exact') #now update anything that *did* match but doesn't have another velocity tcoo = SkyCoord(t['RA'].view(np.ndarray) * u.deg, t['Dec'].view(np.ndarray) * u.deg) idx, dd, d3d = sixdfcoo.match_to_catalog_sky(tcoo) msk = dd < tol catmatch = t[idx[msk]] sixdfmatch = sixdf[msk] msk2 = t['vhelio'][idx[msk]].mask t['vhelio'][idx[msk & msk2]] = sixdf['z_helio'][msk2] * ckps return t