def testDetectDtype(self): """ Test that DBObject.execute_arbitrary can correctly detect the dtypes of the rows it is returning """ db_name = os.path.join(self.scratch_dir, 'testDBObject_dtype_DB.db') if os.path.exists(db_name): os.unlink(db_name) conn = sqlite3.connect(db_name) c = conn.cursor() try: c.execute('''CREATE TABLE testTable (id int, val real, sentence int)''') conn.commit() except: raise RuntimeError("Error creating database.") for ii in range(10): cmd = '''INSERT INTO testTable VALUES (%d, %.5f, %s)''' % (ii, 5.234*ii, "'this, has; punctuation'") c.execute(cmd) conn.commit() conn.close() db = DBObject(database=db_name, driver='sqlite') query = 'SELECT id, val, sentence FROM testTable WHERE id%2 = 0' results = db.execute_arbitrary(query) np.testing.assert_array_equal(results['id'], np.arange(0,9,2,dtype=int)) np.testing.assert_array_almost_equal(results['val'], 5.234*np.arange(0,9,2), decimal=5) for sentence in results['sentence']: self.assertEqual(sentence, 'this, has; punctuation') self.assertEqual(str(results.dtype['id']), 'int64') self.assertEqual(str(results.dtype['val']), 'float64') if sys.version_info.major == 2: self.assertEqual(str(results.dtype['sentence']), '|S22') else: self.assertEqual(str(results.dtype['sentence']), '<U22') self.assertEqual(len(results.dtype), 3) # now test that it works when getting a ChunkIterator chunk_iter = db.get_arbitrary_chunk_iterator(query, chunk_size=3) ct = 0 for chunk in chunk_iter: self.assertEqual(str(chunk.dtype['id']), 'int64') self.assertEqual(str(chunk.dtype['val']), 'float64') if sys.version_info.major == 2: self.assertEqual(str(results.dtype['sentence']), '|S22') else: self.assertEqual(str(results.dtype['sentence']), '<U22') self.assertEqual(len(chunk.dtype), 3) for line in chunk: ct += 1 self.assertEqual(line['sentence'], 'this, has; punctuation') self.assertAlmostEqual(line['val'], line['id']*5.234, 5) self.assertEqual(line['id']%2, 0) self.assertEqual(ct, 5) # test that doing a different query does not spoil dtype detection query = 'SELECT id, sentence FROM testTable WHERE id%2 = 0' results = db.execute_arbitrary(query) self.assertGreater(len(results), 0) self.assertEqual(len(results.dtype.names), 2) self.assertEqual(str(results.dtype['id']), 'int64') if sys.version_info.major == 2: self.assertEqual(str(results.dtype['sentence']), '|S22') else: self.assertEqual(str(results.dtype['sentence']), '<U22') query = 'SELECT id, val, sentence FROM testTable WHERE id%2 = 0' chunk_iter = db.get_arbitrary_chunk_iterator(query, chunk_size=3) ct = 0 for chunk in chunk_iter: self.assertEqual(str(chunk.dtype['id']), 'int64') self.assertEqual(str(chunk.dtype['val']), 'float64') if sys.version_info.major == 2: self.assertEqual(str(results.dtype['sentence']), '|S22') else: self.assertEqual(str(results.dtype['sentence']), '<U22') self.assertEqual(len(chunk.dtype), 3) for line in chunk: ct += 1 self.assertEqual(line['sentence'], 'this, has; punctuation') self.assertAlmostEqual(line['val'], line['id']*5.234, 5) self.assertEqual(line['id']%2, 0) self.assertEqual(ct, 5) if os.path.exists(db_name): os.unlink(db_name)
db = DBObject(database='LSSTCATSIM', host='fatboy.phys.washington.edu', port=1433, driver='mssql+pymssql') query = 'SELECT magnorm_agn, redshift, varParamStr FROM ' query += 'galaxy WHERE varParamStr IS NOT NULL ' query += 'AND dec BETWEEN -2.5 AND 2.5 ' query += 'AND (ra<2.5 OR ra>357.5)' dtype = np.dtype([('magnorm', float), ('redshift', float), ('varParamStr', str, 400)]) data_iter = db.get_arbitrary_chunk_iterator(query, dtype=dtype, chunk_size=10000) with open('data/dc1_agn_params.txt', 'w') as out_file: out_file.write('# z m_i M_i tau sfu sfg sfr sfi sfz sfy\n') for chunk in data_iter: DM = cosmo.distanceModulus(redshift=chunk['redshift']) k_corr = np.interp(chunk['redshift'], z_grid, k_grid) for i_row, agn in enumerate(chunk): ss = Sed(wavelen=base_sed.wavelen, flambda=base_sed.flambda) fnorm = getImsimFluxNorm(ss, agn['magnorm']) ss.multiplyFluxNorm(fnorm) ss.redshiftSED(agn['redshift'], dimming=True) mag = ss.calcMag(bp) abs_m_i = mag - DM[i_row] - k_corr[i_row]
def write_sprinkled_lc(out_file_name, total_obs_md, pointing_dir, opsim_db_name, ra_colname='descDitheredRA', dec_colname='descDitheredDec', rottel_colname = 'descDitheredRotTelPos', sql_file_name=None, bp_dict=None): """ Create database of light curves Note: this is still under development. It has not yet been used for a production-level truth catalog Parameters ---------- out_file_name is the name of the sqlite file to be written total_obs_md is an ObservationMetaData covering the whole survey area pointing_dir contains a series of files that are two columns: obshistid, mjd. The files must each have 'visits' in their name. These specify the pointings for which we are assembling data. See: https://github.com/LSSTDESC/DC2_Repo/tree/master/data/Run1.1 for an example. opsim_db_name is the name of the OpSim database to be queried for pointings ra_colname is the column used for RA of the pointing (default: descDitheredRA) dec_colname is the column used for the Dec of the pointing (default: descDitheredDec) rottel_colname is the column used for the rotTelPos of the pointing (default: desckDitheredRotTelPos') sql_file_name is the name of the parameter database produced by write_sprinkled_param_db to be used bp_dict is a BandpassDict of the telescope filters to be used Returns ------- None Writes out a database to out_file_name. The tables of this database and their columns are: light_curves: - uniqueId -- an int unique to all objects - obshistid -- an int unique to all pointings - mag -- the magnitude observed for this object at that pointing obs_metadata: - obshistid -- an int unique to all pointings - mjd -- the date of the pointing - filter -- an int corresponding to the telescope filter (0==u, 1==g..) variables_and_transients: - uniqueId -- an int unique to all objects - galaxy_id -- an int indicating the host galaxy - ra -- in degrees - dec -- in degrees - agn -- ==1 if object is an AGN - sn -- ==1 if object is a supernova """ t0_master = time.time() if not os.path.isfile(sql_file_name): raise RuntimeError('%s does not exist' % sql_file_name) sn_simulator = SneSimulator(bp_dict) sed_dir = os.environ['SIMS_SED_LIBRARY_DIR'] create_sprinkled_sql_file(out_file_name) t_start = time.time() # get data about the pointings being simulated (htmid_dict, mjd_dict, filter_dict, obsmd_dict) = get_pointing_htmid(pointing_dir, opsim_db_name, ra_colname=ra_colname, dec_colname=dec_colname) t_htmid_dict = time.time()-t_start bp_to_int = {'u':0, 'g':1, 'r':2, 'i':3, 'z':4, 'y':5} # put the data about the pointings in the obs_metadata table with sqlite3.connect(out_file_name) as conn: cursor = conn.cursor() values = ((int(obs), mjd_dict[obs], bp_to_int[filter_dict[obs]]) for obs in mjd_dict) cursor.executemany('''INSERT INTO obs_metadata VALUES (?,?,?)''', values) cursor.execute('''CREATE INDEX obs_filter ON obs_metadata (obshistid, filter)''') conn.commit() print('\ngot htmid_dict -- %d in %e seconds' % (len(htmid_dict), t_htmid_dict)) db = DBObject(sql_file_name, driver='sqlite') # get a list of htmid corresponding to trixels in which # variables and transients can be found query = 'SELECT DISTINCT htmid FROM zpoint WHERE is_agn=1 OR is_sn=1' dtype = np.dtype([('htmid', int)]) results = db.execute_arbitrary(query, dtype=dtype) object_htmid = results['htmid'] agn_dtype = np.dtype([('uniqueId', int), ('galaxy_id', int), ('ra', float), ('dec', float), ('redshift', float), ('sed', str, 500), ('magnorm', float), ('varParamStr', str, 500), ('is_sprinkled', int)]) agn_base_query = 'SELECT uniqueId, galaxy_id, ' agn_base_query += 'raJ2000, decJ2000, ' agn_base_query += 'redshift, sedFilepath, ' agn_base_query += 'magNorm, varParamStr, is_sprinkled ' agn_base_query += 'FROM zpoint WHERE is_agn=1 ' sn_dtype = np.dtype([('uniqueId', int), ('galaxy_id', int), ('ra', float), ('dec', float), ('redshift', float), ('sn_truth_params', str, 500), ('is_sprinkled', int)]) sn_base_query = 'SELECT uniqueId, galaxy_id, ' sn_base_query += 'raJ2000, decJ2000, ' sn_base_query += 'redshift, sn_truth_params, is_sprinkled ' sn_base_query += 'FROM zpoint WHERE is_sn=1 ' filter_to_int = {'u':0, 'g':1, 'r':2, 'i':3, 'z':4, 'y':5} n_floats = 0 with sqlite3.connect(out_file_name) as conn: cursor = conn.cursor() t_before_htmid = time.time() # loop over trixels containing variables and transients, simulating # the light curves of those objects for htmid_dex, htmid in enumerate(object_htmid): if htmid_dex>0: htmid_duration = (time.time()-t_before_htmid)/3600.0 htmid_prediction = len(object_htmid)*htmid_duration/htmid_dex print('%d htmid out of %d in %e hours; predict %e hours remaining' % (htmid_dex, len(object_htmid), htmid_duration,htmid_prediction-htmid_duration)) mjd_arr = [] obs_arr = [] filter_arr = [] # Find only those pointings which overlap the current trixel for obshistid in htmid_dict: is_contained = False for bounds in htmid_dict[obshistid]: if htmid<=bounds[1] and htmid>=bounds[0]: is_contained = True break if is_contained: mjd_arr.append(mjd_dict[obshistid]) obs_arr.append(obshistid) filter_arr.append(filter_to_int[filter_dict[obshistid]]) if len(mjd_arr) == 0: continue mjd_arr = np.array(mjd_arr) obs_arr = np.array(obs_arr) filter_arr = np.array(filter_arr) sorted_dex = np.argsort(mjd_arr) mjd_arr = mjd_arr[sorted_dex] obs_arr = obs_arr[sorted_dex] filter_arr = filter_arr[sorted_dex] agn_query = agn_base_query + 'AND htmid=%d' % htmid agn_iter = db.get_arbitrary_chunk_iterator(agn_query, dtype=agn_dtype, chunk_size=10000) # put static data about the AGN (position, etc.) into the # variables_and_transients table for i_chunk, agn_results in enumerate(agn_iter): values = ((int(agn_results['uniqueId'][i_obj]), int(agn_results['galaxy_id'][i_obj]), np.degrees(agn_results['ra'][i_obj]), np.degrees(agn_results['dec'][i_obj]), int(agn_results['is_sprinkled'][i_obj]), 1,0) for i_obj in range(len(agn_results))) cursor.executemany('''INSERT INTO variables_and_transients VALUES (?,?,?,?,?,?,?)''', values) agn_simulator = AgnSimulator(agn_results['redshift']) quiescent_mag = np.zeros((len(agn_results), 6), dtype=float) for i_obj, (sed_name, zz, mm) in enumerate(zip(agn_results['sed'], agn_results['redshift'], agn_results['magnorm'])): spec = Sed() spec.readSED_flambda(os.path.join(sed_dir, sed_name)) fnorm = getImsimFluxNorm(spec, mm) spec.multiplyFluxNorm(fnorm) spec.redshiftSED(zz, dimming=True) mag_list = bp_dict.magListForSed(spec) quiescent_mag[i_obj] = mag_list # simulate AGN variability dmag = agn_simulator.applyVariability(agn_results['varParamStr'], expmjd=mjd_arr) # loop over pointings that overlap the current trixel, writing # out simulated photometry for each AGN observed in that pointing for i_time, obshistid in enumerate(obs_arr): # only include objects that were actually on a detector are_on_chip = _actually_on_chip(np.degrees(agn_results['ra']), np.degrees(agn_results['dec']), obsmd_dict[obshistid]) valid_agn = np.where(are_on_chip) if len(valid_agn[0])==0: continue values = ((int(agn_results['uniqueId'][i_obj]), int(obs_arr[i_time]), quiescent_mag[i_obj][filter_arr[i_time]]+ dmag[filter_arr[i_time]][i_obj][i_time]) for i_obj in valid_agn[0]) cursor.executemany('''INSERT INTO light_curves VALUES (?,?,?)''', values) conn.commit() n_floats += len(dmag.flatten()) sn_query = sn_base_query + 'AND htmid=%d' % htmid sn_iter = db.get_arbitrary_chunk_iterator(sn_query, dtype=sn_dtype, chunk_size=10000) for sn_results in sn_iter: t0_sne = time.time() # write static information about SNe to the # variables_and_transients table values = ((int(sn_results['uniqueId'][i_obj]), int(sn_results['galaxy_id'][i_obj]), np.degrees(sn_results['ra'][i_obj]), np.degrees(sn_results['dec'][i_obj]), int(sn_results['is_sprinkled'][i_obj]), 0,1) for i_obj in range(len(sn_results))) cursor.executemany('''INSERT INTO variables_and_transients VALUES (?,?,?,?,?,?,?)''', values) conn.commit() sn_mags = sn_simulator.calculate_sn_magnitudes(sn_results['sn_truth_params'], mjd_arr, filter_arr) print(' did %d sne in %e seconds' % (len(sn_results), time.time()-t0_sne)) # loop over pointings that overlap the current trixel, writing # out simulated photometry for each SNe observed in that pointing for i_time, obshistid in enumerate(obs_arr): # only include objects that fell on a detector are_on_chip = _actually_on_chip(np.degrees(sn_results['ra']), np.degrees(sn_results['dec']), obsmd_dict[obshistid]) valid_obj = np.where(np.logical_and(np.isfinite(sn_mags[:,i_time]), are_on_chip)) if len(valid_obj[0]) == 0: continue values = ((int(sn_results['uniqueId'][i_obj]), int(obs_arr[i_time]), sn_mags[i_obj][i_time]) for i_obj in valid_obj[0]) cursor.executemany('''INSERT INTO light_curves VALUES (?,?,?)''', values) conn.commit() n_floats += len(valid_obj[0]) cursor.execute('CREATE INDEX unq_obs ON light_curves (uniqueId, obshistid)') conn.commit() print('n_floats %d' % n_floats) print('in %e seconds' % (time.time()-t0_master))
def testDetectDtype(self): """ Test that DBObject.execute_arbitrary can correctly detect the dtypes of the rows it is returning """ db_name = os.path.join(self.scratch_dir, 'testDBObject_dtype_DB.db') if os.path.exists(db_name): os.unlink(db_name) conn = sqlite3.connect(db_name) c = conn.cursor() try: c.execute('''CREATE TABLE testTable (id int, val real, sentence int)''') conn.commit() except: raise RuntimeError("Error creating database.") for ii in range(10): cmd = '''INSERT INTO testTable VALUES (%d, %.5f, %s)''' % (ii, 5.234*ii, "'this, has; punctuation'") c.execute(cmd) conn.commit() conn.close() db = DBObject(database=db_name, driver='sqlite') query = 'SELECT id, val, sentence FROM testTable WHERE id%2 = 0' results = db.execute_arbitrary(query) np.testing.assert_array_equal(results['id'], np.arange(0,9,2,dtype=int)) np.testing.assert_array_almost_equal(results['val'], 5.234*np.arange(0,9,2), decimal=5) for sentence in results['sentence']: self.assertEqual(sentence, 'this, has; punctuation') self.assertEqual(str(results.dtype['id']), 'int64') self.assertEqual(str(results.dtype['val']), 'float64') if sys.version_info.major == 2: self.assertEqual(str(results.dtype['sentence']), '|S22') else: self.assertEqual(str(results.dtype['sentence']), '<U22') self.assertEqual(len(results.dtype), 3) # now test that it works when getting a ChunkIterator chunk_iter = db.get_arbitrary_chunk_iterator(query, chunk_size=3) ct = 0 for chunk in chunk_iter: self.assertEqual(str(chunk.dtype['id']), 'int64') self.assertEqual(str(chunk.dtype['val']), 'float64') if sys.version_info.major == 2: self.assertEqual(str(results.dtype['sentence']), '|S22') else: self.assertEqual(str(results.dtype['sentence']), '<U22') self.assertEqual(len(chunk.dtype), 3) for line in chunk: ct += 1 self.assertEqual(line['sentence'], 'this, has; punctuation') self.assertAlmostEqual(line['val'], line['id']*5.234, 5) self.assertEqual(line['id']%2, 0) self.assertEqual(ct, 5) # test that doing a different query does not spoil dtype detection query = 'SELECT id, sentence FROM testTable WHERE id%2 = 0' results = db.execute_arbitrary(query) self.assertGreater(len(results), 0) self.assertEqual(len(results.dtype.names), 2) self.assertEqual(str(results.dtype['id']), 'int64') if sys.version_info.major == 2: self.assertEqual(str(results.dtype['sentence']), '|S22') else: self.assertEqual(str(results.dtype['sentence']), '<U22') query = 'SELECT id, val, sentence FROM testTable WHERE id%2 = 0' chunk_iter = db.get_arbitrary_chunk_iterator(query, chunk_size=3) ct = 0 for chunk in chunk_iter: self.assertEqual(str(chunk.dtype['id']), 'int64') self.assertEqual(str(chunk.dtype['val']), 'float64') if sys.version_info.major == 2: self.assertEqual(str(results.dtype['sentence']), '|S22') else: self.assertEqual(str(results.dtype['sentence']), '<U22') self.assertEqual(len(chunk.dtype), 3) for line in chunk: ct += 1 self.assertEqual(line['sentence'], 'this, has; punctuation') self.assertAlmostEqual(line['val'], line['id']*5.234, 5) self.assertEqual(line['id']%2, 0) self.assertEqual(ct, 5) if os.path.exists(db_name): os.unlink(db_name)
class AGN_postprocessing_mixin(object): def _postprocess_results(self, master_chunk): """ query the database specified by agn_params_db to find the AGN varParamStr associated with each AGN """ if self.agn_objid is None: gid_name = 'galaxy_id' varpar_name = 'varParamStr' magnorm_name = 'magNorm' else: gid_name = self.agn_objid + '_' + 'galaxy_id' varpar_name = self.agn_objid + '_' + 'varParamStr' magnorm_name = self.agn_objid + '_' + 'magNorm' if self.agn_params_db is None: return(master_chunk) if not os.path.exists(self.agn_params_db): raise RuntimeError('\n%s\n\ndoes not exist' % self.agn_params_db) if not hasattr(self, '_agn_dbo'): self._agn_dbo = DBObject(database=self.agn_params_db, driver='sqlite') self._agn_dtype = np.dtype([('galaxy_id', int), ('magNorm', float), ('varParamStr', str, 500)]) gid_arr = master_chunk[gid_name].astype(float) gid_min = np.nanmin(gid_arr) gid_max = np.nanmax(gid_arr) query = 'SELECT galaxy_id, magNorm, varParamStr ' query += 'FROM agn_params ' query += 'WHERE galaxy_id BETWEEN %d AND %d ' % (gid_min, gid_max) query += 'ORDER BY galaxy_id' agn_data_iter = self._agn_dbo.get_arbitrary_chunk_iterator(query, dtype=self._agn_dtype, chunk_size=1000000) m_sorted_dex = np.argsort(gid_arr) m_sorted_id = gid_arr[m_sorted_dex] for agn_chunk in agn_data_iter: # find the indices of the elements in master_chunk # that correspond to elements in agn_chunk m_elements = np.in1d(m_sorted_id, agn_chunk['galaxy_id']) m_dex = m_sorted_dex[m_elements] # find the indices of the elements in agn_chunk # that correspond to elements in master_chunk a_dex = np.in1d(agn_chunk['galaxy_id'], m_sorted_id) # make sure we have matched elements correctly np.testing.assert_array_equal(agn_chunk['galaxy_id'][a_dex], master_chunk[gid_name][m_dex]) if varpar_name in master_chunk.dtype.names: master_chunk[varpar_name][m_dex] = agn_chunk['varParamStr'][a_dex] if magnorm_name in master_chunk.dtype.names: master_chunk[magnorm_name][m_dex] = agn_chunk['magNorm'][a_dex] return self._final_pass(master_chunk)
from mdwarf_utils import activity_type_from_color_z from mdwarf_utils import xyz_from_lon_lat_px import os import numpy as np import time t_start = time.time() rng = np.random.RandomState(args.seed) dtype = np.dtype([('htmid', int), ('id', int), ('lon', float), ('lat', float), ('parallax', float), ('sdssr', float), ('sdssi', float), ('sdssz', float)]) chunk_iter = db.get_arbitrary_chunk_iterator(query, chunk_size=args.chunk_size, dtype=dtype) out_name = os.path.join(args.out_dir, '%s_flaring_varParamStr.txt' % args.table) print('outname ',out_name) has_written = False for data_chunk in chunk_iter: xyz = xyz_from_lon_lat_px(np.degrees(data_chunk['lon']), np.degrees(data_chunk['lat']), data_chunk['parallax']*0.001) (activity_class, spectral_type) = activity_type_from_color_z(data_chunk['sdssr']-data_chunk['sdssi'], data_chunk['sdssi']-data_chunk['sdssz'], xyz[2], rng)
# actually construct the query query = 'SELECT ra, dec, phot_g_mean_mag ' query += 'FROM gaia_2016 ' query += 'WHERE ' for i_pair, pair in enumerate(tx_list): min_tx = int(pair[0]<<n_bits_off) max_tx = int((pair[1]+1)<<n_bits_off) query += '(htmid>=%d AND htmid<=%d)' % (min_tx, max_tx) if i_pair<len(tx_list)-1: query += ' OR ' dtype = np.dtype([('ra', float), ('dec', float), ('mag', float)]) results = gaia_db.get_arbitrary_chunk_iterator(query, dtype=dtype, chunk_size=10000) result = list(results)[0] distances = angularSeparation(result['ra'], result['dec'], ra[i], dec[i]) # Degrees result = result[np.where(distances < radius)] import pdb ; pdb.set_trace() # I could think of setting the chunksize to something really large, then only doing one chunk? # Or maybe setting up a way to break out of the loop if everything gets really dense? tempHist = np.zeros(np.size(bins)-1, dtype=float) counter = 0 for chunk in results: chunkHist, bins = np.histogram(chunk[colName], bins) tempHist += chunkHist counter += chunk_size