def test_database_function(self): # SETUP ALL DATABASE CONNECTIONS # SETUP ALL DATABASE CONNECTIONS from sherlock import database db = database( log=log, settings=settings ) dbConns, dbVersions = db.connect() self.transientsDbConn = dbConns["transients"] self.cataloguesDbConn = dbConns["catalogues"] from fundamentals.mysql import readquery sqlQuery = u""" SELECT VERSION(); """ % locals() rows = readquery( log=log, sqlQuery=sqlQuery, dbConn=self.transientsDbConn, quiet=False ) print(rows) rows = readquery( log=log, sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, quiet=False ) print(rows)
def _execute_query(self, sqlQuery): """* execute query and trim results* **Key Arguments** - ``sqlQuery`` -- the sql database query to grab low-resolution results. **Return** - ``databaseRows`` -- the database rows found on HTM trixles with requested IDs """ self.log.debug('completed the ````_execute_query`` method') try: databaseRows = readquery(log=self.log, sqlQuery=sqlQuery, dbConn=self.dbConn) except Exception as e: if "Unknown column 'htm" in str(e): message = "Please add and populate the HTM columns to this database table BEFORE running any conesearches. You can use HMpTy to do this: http://hmpty.readthedocs.io/en/stable/" self.log.error(message) raise IOError(message) elif "Truncated incorrect DOUBLE value" in str( e) or "Truncated incorrect DECIMAL value" in str(e): databaseRows = readquery(log=self.log, sqlQuery=sqlQuery, dbConn=self.dbConn, quiet=True) else: print(sqlQuery) raise e if self.distinct and ( self.columns != "*" and (self.raCol.lower() not in self.columns.lower() or self.decCol.lower() not in self.columns.lower())): distinctRows = [] theseKeys = [] for r in databaseRows: constraintKey = "" for k, v in list(r.items()): if k.lower() != self.raCol.lower() and k.lower( ) != self.decCol.lower(): constraintKey += str(v) if self.raCol.lower() in self.columns.lower(): constraintKey += str(databaseRows[self.raCol]) if self.decCol.lower() in self.columns.lower(): constraintKey += str(databaseRows[self.decCol]) if constraintKey not in theseKeys: theseKeys.append(constraintKey) distinctRows.append(r) databaseRows = distinctRows self.log.debug('completed the ``_execute_query`` method') return databaseRows
def test_update_summaries_function(self): from fundamentals.mysql import writequery sqlQuery = """INSERT IGNORE INTO `fs_user_added` (`id`,`candidateID`,`ra_deg`,`dec_deg`,`mag`,`magErr`,`filter`,`observationMJD`,`discDate`,`discMag`,`suggestedType`,`catalogType`,`hostZ`,`targetImageURL`,`objectURL`,`summaryRow`,`ingested`,`htm16ID`,`survey`,`author`,`dateCreated`,`dateLastModified`,`suggestedClassification`,`htm13ID`,`htm10ID`,`transientBucketId`) VALUES (856,'TestSource',155.125958333,-15.1787369444,20.3,NULL,NULL,57627.5,'2016-08-27',20.3,'SN',NULL,0.34,'http://thespacedoctor.co.uk/images/thespacedoctor_icon_white_circle.png','http://thespacedoctor.co.uk',1,0,NULL,'testSurvey','None','2019-07-30 14:25:39','2019-07-30 14:25:39',NULL,NULL,NULL,NULL);""" % locals( ) writequery(log=log, sqlQuery=sqlQuery, dbConn=dbConn) from marshallEngine.feeders.useradded.data import data ingester = data(log=log, settings=settings, dbConn=dbConn).insert_into_transientBucket( updateTransientSummaries=False) from fundamentals.mysql import readquery sqlQuery = u""" SELECT transientBucketId FROM fs_user_added order by dateLastModified desc limit 1; """ % locals() rows = readquery( log=log, sqlQuery=sqlQuery, dbConn=dbConn, ) transientBucketId = rows[0]["transientBucketId"] from marshallEngine.feeders.useradded.data import data ingester = data(log=log, settings=settings, dbConn=dbConn).insert_into_transientBucket( updateTransientSummaries=transientBucketId)
def test_readquery_function(self): from fundamentals.mysql import readquery rows = readquery(log=log, sqlQuery="show tables", dbConn=dbConn, quiet=False)
def _get_stream_view_infos(self, trimmed=False): """query the sherlock-catalogues database streamed data tables' metadata """ self.log.debug('starting the ``_get_stream_view_infos`` method') sqlQuery = u""" SELECT * FROM tcs_helper_catalogue_tables_info where legacy_table = 0 and table_name not like "legacy%%" and table_name like "%%stream" order by number_of_rows desc; """ % locals() streamInfo = readquery(log=self.log, sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, quiet=False) if trimmed: cleanTable = [] for r in streamInfo: orow = collections.OrderedDict(sorted({}.items())) for c in self.basicColumns: if c in r: orow[c] = r[c] cleanTable.append(orow) streamInfo = cleanTable self.log.debug('completed the ``_get_stream_view_infos`` method') return streamInfo
def _get_data_to_send( self, docType): """*Select the rows from the reading-list table in the database containing webarticles or PDFs that need sent to kindle device and/or apps* **Key Arguments:** - ``docType`` -- either PDFs of webpages. [pdf|web] **Return:** - ``data`` -- a list of dictionaries giving details of the data to send to kindle """ self.log.info('starting the ``_get_data_to_send`` method') if docType == "pdf": sqlQuery = u""" select * from `reading-list` where kind = "imported-pdf" and sentToKindle =0 and workflowStage = "reading" and pdfName is not null order by dateCreated limit 30 """ % locals() elif docType == "web": sqlQuery = u""" select * from `reading-list` where kind = "webpage" and sentToKindle =0 and workflowStage = "reading" and pdfName is not null order by dateCreated limit 10 """ % locals() data = readquery( log=self.log, sqlQuery=sqlQuery, dbConn=self.dbConn, quiet=False ) self.log.info('completed the ``_get_data_to_send`` method') return data
def _get_view_infos(self, trimmed=False): """query the sherlock-catalogues database view metadata """ self.log.debug('starting the ``_get_view_infos`` method') sqlQuery = u""" SELECT v.*, t.description as "master table" FROM tcs_helper_catalogue_views_info as v, tcs_helper_catalogue_tables_info AS t where v.legacy_view = 0 and v.view_name not like "legacy%%" and t.id=v.table_id order by number_of_rows desc """ % locals() viewInfo = readquery(log=self.log, sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, quiet=False) if trimmed: cleanTable = [] for r in viewInfo: orow = collections.OrderedDict(sorted({}.items())) for c in self.basicColumns: if c in r: orow[c] = r[c] cleanTable.append(orow) viewInfo = cleanTable self.log.debug('completed the ``_get_view_infos`` method') return viewInfo
def _select_exposures_requiring_dophot_extraction(self, batch=10): """* select exposures requiring dophot extraction* **Key Arguments:** - ``batch`` -- the batch size of dophot file to process **Return:** - ``expnames`` -- the names of the expsoures in the batch - ``remaining`` -- the number of exposured remainging that require orbfit/dophot crossmatching """ self.log.info( 'starting the ``_select_exposures_requiring_dophot_extraction`` method' ) sqlQuery = u""" select expname, floor(mjd) as mjd from atlas_exposures where local_data = 1 and dophot_match = 0 and orbfit_positions = 1; """ % locals() rows = readquery(log=self.log, sqlQuery=sqlQuery, dbConn=self.atlasMoversDBConn, quiet=False) remaining = len(rows) expnames = [] expnames[:] = [(r["expname"], int(r["mjd"])) for r in rows[:batch]] self.log.info( 'completed the ``_select_exposures_requiring_dophot_extraction`` method' ) return expnames, remaining
def _get_ned_sources_needing_metadata(self): """*Get the names of 50000 or less NED sources that still require metabase in the database* **Return:** - ``len(self.theseIds)`` -- the number of NED IDs returned *Usage:* .. code-block:: python numberSources = stream._get_ned_sources_needing_metadata() """ self.log.debug( 'starting the ``_get_ned_sources_needing_metadata`` method') tableName = self.dbTableName # SELECT THE DATA FROM NED TABLE sqlQuery = u""" select ned_name from %(tableName)s where raDeg is null and (download_error != 1 or download_error is null) limit 50000; """ % locals() rows = readquery(log=self.log, sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, quiet=False) self.theseIds = [] self.theseIds[:] = [r["ned_name"] for r in rows] self.log.debug( 'completed the ``_get_ned_sources_needing_metadata`` method') return len(self.theseIds)
def _count_ned_sources_in_database_requiring_metadata(self): """*Count the sources in the NED table requiring metadata* **Return:** - ``self.total``, ``self.batches`` -- total number of galaxies needing metadata & the number of batches required to be sent to NED *Usage:* .. code-block:: python totalRemaining, numberOfBatches = stream._count_ned_sources_in_database_requiring_metadata() """ self.log.debug( 'starting the ``_count_ned_sources_in_database_requiring_metadata`` method' ) tableName = self.dbTableName sqlQuery = u""" select count(*) as count from %(tableName)s where raDeg is null and (download_error != 1 or download_error is null) """ % locals() rows = readquery(log=self.log, sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, quiet=False) self.total = rows[0]["count"] self.batches = int(self.total / 50000.) + 1 if self.total == 0: self.batches = 0 self.log.debug( 'completed the ``_count_ned_sources_in_database_requiring_metadata`` method' ) return self.total, self.batches
def _get_exposures_requiring_pyephem_positions(self, concurrentSnapshots=10): """*get next batch of exposures requiring pyephem positions* **Key Arguments:** - ``concurrentSnapshots`` -- number of concurrent PyEphem snapshots to process """ self.log.info( 'starting the ``_get_exposures_requiring_pyephem_positions`` method' ) if self.dev_flag == True: dev_flag = " and dev_flag = 1" else: dev_flag = "" sqlQuery = u""" select distinct pyephem_mjd from atlas_exposures where pyephem_positions = 0 and local_data = 1 %(dev_flag)s order by pyephem_mjd asc """ % locals() rows = readquery(log=self.log, sqlQuery=sqlQuery, dbConn=self.atlasMoversDBConn, quiet=False) snapshotsRequired = len(rows) if snapshotsRequired == 0: return [], [], 0 nextMjds = [] nextMjds[:] = [r["pyephem_mjd"] for r in rows[:concurrentSnapshots]] exposures = [] for nextMjd in nextMjds: sqlQuery = u""" select * from atlas_exposures where pyephem_positions = 0 %(dev_flag)s and pyephem_mjd = %(nextMjd)s """ % locals() theseExps = readquery(log=self.log, sqlQuery=sqlQuery, dbConn=self.atlasMoversDBConn, quiet=False) exposures.append(list(theseExps)) self.log.info( 'completed the ``_get_exposures_requiring_pyephem_positions`` method' ) return nextMjds, exposures, snapshotsRequired
def _remove_processed_data(self): """*remove processed data* """ self.log.info('starting the ``_remove_processed_data`` method') archivePath = self.settings["atlas archive path"] from fundamentals.mysql import readquery sqlQuery = u""" select mjd from (SELECT DISTINCT FLOOR(mjd) as mjd FROM atlas_exposures WHERE local_data = 1 AND dophot_match > 0) as a where mjd NOT IN (SELECT * FROM (SELECT DISTINCT FLOOR(mjd) FROM atlas_exposures WHERE local_data = 1 AND dophot_match = 0) AS a); """ % locals() mjds = readquery(log=self.log, sqlQuery=sqlQuery, dbConn=self.atlasMoversDBConn) if not len(mjds): return None oldMjds = [] oldMjds[:] = [str(int(o["mjd"])) for o in mjds] for m in oldMjds: for i in ["01a", "02a"]: datapath = archivePath + "/%(i)s/%(m)s" % locals() # shutil.rmtree(datapath) try: shutil.rmtree(datapath) except: self.log.warning( "The path %(datapath)s does not exist - no need to delete" % locals()) mjdString = (',').join(oldMjds) sqlQuery = """ update day_tracker set local_data = 0 where floor(mjd) in (%(mjdString)s); update atlas_exposures set local_data = 0 where floor(mjd) in (%(mjdString)s) and dophot_match != 0;""" % locals( ) print sqlQuery writequery(log=self.log, sqlQuery=sqlQuery, dbConn=self.atlasMoversDBConn) self.log.info('completed the ``_remove_processed_data`` method') return None
def get_crossmatch_catalogues_column_map(dbConn, log): """*Query the sherlock-catalogues helper tables to generate a map of the important columns of each catalogue* Within your sherlock-catalogues database you need to manually map the inhomogeneous column-names from the sherlock-catalogues to an internal homogeneous name-set which includes *ra*, *dec*, *redshift*, *object name*, *magnitude*, *filter* etc. The column-name map is set within the two database helper tables called `tcs_helper_catalogue_views_info` and `tcs_helper_catalogue_views_info`. See the *'Checklist for Adding A New Reference Catalogue to the Sherlock Catalogues Database'* for more information. .. todo:: - write a checklist for adding a new catalogue to the sherlock database and reference it from here (use the image below of the tcs_helper_catalogue_views_info table) .. image:: https://farm5.staticflickr.com/4604/38429536400_eafa991580_o.png :width: 200 px **Key Arguments** - ``dbConn`` -- the sherlock-catalogues database connection - ``log`` -- logger **Return** - ``colMaps`` -- dictionary of dictionaries with the name of the database-view (e.g. `tcs_view_agn_milliquas_v4_5`) as the key and the column-name dictary map as value (`{view_name: {columnMap}}`). **Usage** To collect the column map dictionary of dictionaries from the catalogues database, use the ``get_crossmatch_catalogues_column_map`` function: ```python from sherlock.commonutils import get_crossmatch_catalogues_column_map colMaps = get_crossmatch_catalogues_column_map( log=log, dbConn=cataloguesDbConn ) ``` """ log.debug('starting the ``get_crossmatch_catalogues_column_map`` function') # GRAB THE NAMES OF THE IMPORTANT COLUMNS FROM DATABASE sqlQuery = u""" SELECT * FROM tcs_helper_catalogue_views_info v, tcs_helper_catalogue_tables_info t WHERE v.table_id = t.id """ % locals() rows = readquery(log=log, sqlQuery=sqlQuery, dbConn=dbConn, quiet=False) colMaps = {} for row in rows: colMaps[row["view_name"]] = row log.debug( 'completed the ``get_crossmatch_catalogues_column_map`` function') return colMaps
def test_readquery_function(self): from fundamentals.mysql import readquery rows = readquery( log=log, sqlQuery="show tables", dbConn=dbConn, quiet=False ) print rows
def ingest( self, withinLastDays): """*Ingest the data into the marshall feeder survey table* **Key Arguments** - ``withinLastDays`` -- within the last number of days. *Default: 50* """ self.log.info('starting the ``ingest`` method') allLists = [] # MIGHT NEED SOMETHING LIKE THIS ... OTHERWISE DELETE AND ADD ANOTHER IMPORT METHOD # csvDicts = self.get_csv_data( # url=self.settings["panstarrs urls"]["ps13pi"]["summary csv"], # user=self.settings["credentials"]["ps13pi"]["username"], # pwd=self.settings["credentials"]["ps13pi"]["password"] # ) # allLists.extend(self._clean_data_pre_ingest( # surveyName="ps13pi", withinLastDays=withinLastDays)) self.dictList = allLists self._import_to_feeder_survey_table() self.insert_into_transientBucket( updateTransientSummaries=False) sqlQuery = u""" select transientBucketId from fs_user_added where transientBucketId is not null order by dateCreated desc limit 1 """ % locals() rows = readquery( log=self.log, sqlQuery=sqlQuery, dbConn=self.dbConn ) if len(rows): transientBucketId = rows[0]["transientBucketId"] print(transientBucketId) else: transientBucketId = False # UPDATE THE TRANSIENT BUCKET SUMMARY TABLE IN THE MARSHALL DATABASE updater = update_transient_summaries( log=self.log, settings=self.settings, dbConn=self.dbConn, transientBucketId=transientBucketId ) updater.update() self.log.info('completed the ``ingest`` method') return None
def test_readquery_error_function_quiet(self): from fundamentals.mysql import readquery try: rows = readquery(log=log, sqlQuery="rubbish query", dbConn=dbConn, quiet=True) assert True except Exception as e: assert False
def table_exists( dbConn, log, dbTableName): """*Probe a database to determine if a given table exists* **Key Arguments:** - ``dbConn`` -- mysql database connection - ``log`` -- logger - ``dbTableName`` -- the database tablename **Return:** - ``tableExists`` -- True or False **Usage:** To test if a table exists in a database: .. code-block:: python from fundamentals.mysql import table_exists exists = table_exists( dbConn=dbConn, log=log, dbTableName="stupid_named_table" ) print exists # OUTPUT: False """ log.info('starting the ``table_exists`` function') sqlQuery = u""" SELECT count(*) FROM information_schema.tables WHERE table_name = '%(dbTableName)s' """ % locals() tableExists = readquery( log=log, sqlQuery=sqlQuery, dbConn=dbConn, quiet=False ) if tableExists[0]["count(*)"] == 0: tableExists = False else: tableExists = True log.info('completed the ``table_exists`` function') return tableExists
def table_exists( dbConn, log, dbTableName): """*Probe a database to determine if a given table exists* **Key Arguments:** - ``dbConn`` -- mysql database connection - ``log`` -- logger - ``dbTableName`` -- the database tablename **Return:** - ``tableExists`` -- True or False **Usage:** To test if a table exists in a database: .. code-block:: python from fundamentals.mysql import table_exists exists = table_exists( dbConn=dbConn, log=log, dbTableName="stupid_named_table" ) print exists # OUTPUT: False """ log.debug('starting the ``table_exists`` function') sqlQuery = u""" SELECT count(*) FROM information_schema.tables WHERE table_name = '%(dbTableName)s' """ % locals() tableExists = readquery( log=log, sqlQuery=sqlQuery, dbConn=dbConn, quiet=False ) if tableExists[0]["count(*)"] == 0: tableExists = False else: tableExists = True log.debug('completed the ``table_exists`` function') return tableExists
def test_readquery_error_function_quiet(self): from fundamentals.mysql import readquery try: rows = readquery( log=log, sqlQuery="rubbish query", dbConn=dbConn, quiet=True ) assert True except Exception, e: assert False print str(e)
def _get_ps1_pointings( self, gwid, inPastDays, inFirstDays): """ *get ps1 pointings to add to the plot* **Key Arguments: ** - ``gwid`` - - the unique ID of the gravitational wave to plot - ``inPastDays`` - - used for the `history` plots(looking back from today) - ``inFirstDays`` - - used in the `timeline` plots(looking forward from wave detection) **Return: ** - ``ps1Pointings`` - - the pointings to place on the plot """ self.log.debug('starting the ``_get_ps1_pointings`` method') # DETERMINE THE TEMPORAL CONSTRAINTS FOR MYSQL QUERY if inPastDays != False or inPastDays == 0: nowMjd = now( log=self.log ).get_mjd() mjdStart = nowMjd - inPastDays mjdEnd = 10000000000 if inPastDays == 0: mjdStart = 0.0 else: print inPastDays if inFirstDays: mjdStart = self.settings["gravitational waves"][ gwid]["mjd"] + inFirstDays[0] mjdEnd = self.settings["gravitational waves"][ gwid]["mjd"] + inFirstDays[1] if inFirstDays[1] == 0: mjdEnd = 10000000000 sqlQuery = u""" SELECT raDeg, decDeg FROM ps1_pointings where gw_id = "%(gwid)s" and mjd between %(mjdStart)s and %(mjdEnd)s """ % locals() ps1Pointings = readquery( log=self.log, sqlQuery=sqlQuery, dbConn=self.ligo_virgo_wavesDbConn ) self.log.debug('completed the ``_get_ps1_pointings`` method') return ps1Pointings
def test_database_function(self): # SETUP ALL DATABASE CONNECTIONS from fundamentals.mysql import database dbConn = database(log=log, dbSettings=dbSettings).connect() from fundamentals.mysql import readquery sqlQuery = u""" SELECT VERSION(); """ % locals() rows = readquery(log=log, sqlQuery=sqlQuery, dbConn=dbConn, quiet=False) print rows
def get_database_table_column_names( dbConn, log, dbTable ): """get database table column names **Key Arguments:** - ``dbConn`` -- mysql database connection - ``log`` -- logger - ``dbTable`` -- database tablename **Return:** - ``columnNames`` -- table column names **Usage:** To get the column names of a table in a given database: .. code-block:: python from fundamentals.mysql import get_database_table_column_names columnNames = get_database_table_column_names( dbConn=dbConn, log=log, dbTable="test_table" ) """ log.debug('starting the ``get_database_table_column_names`` function') sqlQuery = """SELECT * FROM %s LIMIT 1""" \ % (dbTable, ) # ############### >ACTION(S) ################ try: rows = readquery( log=log, sqlQuery=sqlQuery, dbConn=dbConn, ) except Exception as e: log.error( 'could not find column names for dbTable %s - failed with this error: %s ' % (dbTable, str(e))) return -1 columnNames = rows[0].keys() log.debug('completed the ``get_database_table_column_names`` function') return columnNames
def _execute_query( self, sqlQuery): """* execute query and trim results* **Key Arguments:** - ``sqlQuery`` -- the sql database query to grab low-resolution results. **Return:** - ``databaseRows`` -- the database rows found on HTM trixles with requested IDs """ self.log.info( 'starting the ``_execute_query`` method') try: databaseRows = readquery( log=self.log, sqlQuery=sqlQuery, dbConn=self.dbConn ) except Exception as e: if "Unknown column 'htm" in str(e): message = "Please add and populate the HTM columns to this database table BEFORE running any conesearches. You can use HMpTy to do this: http://hmpty.readthedocs.io/en/stable/" self.log.error(message) raise IOError(message) else: raise e if self.distinct and (self.columns != "*" and (self.raCol.lower() not in self.columns.lower() or self.decCol.lower() not in self.columns.lower())): distinctRows = [] theseKeys = [] for r in databaseRows: constraintKey = "" for k, v in r.iteritems(): if k.lower() != self.raCol.lower() and k.lower() != self.decCol.lower(): constraintKey += str(v) if self.raCol.lower() in self.columns.lower(): constraintKey += str(databaseRows[self.raCol]) if self.decCol.lower() in self.columns.lower(): constraintKey += str(databaseRows[self.decCol]) if constraintKey not in theseKeys: theseKeys.append(constraintKey) distinctRows.append(r) databaseRows = distinctRows self.log.info( 'completed the ``_execute_query`` method') return databaseRows
def get_database_table_column_names(dbConn, log, dbTable): """get database table column names **Key Arguments:** - ``dbConn`` -- mysql database connection - ``log`` -- logger - ``dbTable`` -- database tablename **Return:** - ``columnNames`` -- table column names **Usage:** To get the column names of a table in a given database: .. code-block:: python from fundamentals.mysql import get_database_table_column_names columnNames = get_database_table_column_names( dbConn=dbConn, log=log, dbTable="test_table" ) """ log.debug('starting the ``get_database_table_column_names`` function') sqlQuery = """SELECT * FROM %s LIMIT 1""" \ % (dbTable, ) # ############### >ACTION(S) ################ try: rows = readquery( log=log, sqlQuery=sqlQuery, dbConn=dbConn, ) except Exception as e: log.error( 'could not find column names for dbTable %s - failed with this error: %s ' % (dbTable, str(e))) return -1 columnNames = rows[0].keys() log.debug('completed the ``get_database_table_column_names`` function') return columnNames
def _get_3000_galaxies_needing_metadata( self): """ get 3000 galaxies needing metadata **Return** - ``len(self.theseIds)`` -- the number of NED IDs returned .. todo :: - update key arguments values and definitions with defaults - update return values and definitions - update usage examples and text - update docstring text - check sublime snippet exists - clip any useful text to docs mindmap - regenerate the docs and check redendering of this docstring """ self.log.debug( 'starting the ``_get_3000_galaxies_needing_metadata`` method') tableName = self.dbTableName # SELECT THE DATA FROM NED TABLE self.theseIds = {} sqlQuery = u""" select primaryId, primary_ned_id from %(tableName)s where master_row = 1 and in_ned is null limit 3000; """ % locals() rows = readquery( log=self.log, sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, quiet=False ) for row in rows: self.theseIds[row["primary_ned_id"]] = row["primaryId"] self.log.debug( 'completed the ``_get_3000_galaxies_needing_metadata`` method') return len(self.theseIds)
def _count_galaxies_requiring_metadata( self): """ count galaxies requiring metadata **Return** - ``self.total``, ``self.batches`` -- total number of galaxies needing metadata & the number of batches required to be sent to NED .. todo :: - update key arguments values and definitions with defaults - update return values and definitions - update usage examples and text - update docstring text - check sublime snippet exists - clip any useful text to docs mindmap - regenerate the docs and check redendering of this docstring """ self.log.debug( 'starting the ``_count_galaxies_requiring_metadata`` method') tableName = self.dbTableName sqlQuery = u""" select count(*) as count from %(tableName)s where master_row = 1 and in_ned is null """ % locals() rows = readquery( log=self.log, sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, quiet=False ) self.total = rows[0]["count"] self.batches = int(old_div(self.total, 3000.)) + 1 if self.total == 0: self.batches = 0 self.log.debug( 'completed the ``_count_galaxies_requiring_metadata`` method') return self.total, self.batches
def ingest(self, withinLastDays): """*Ingest the data into the marshall feeder survey table* **Key Arguments** - ``withinLastDays`` -- within the last number of days. *Default: 50* """ self.log.info('starting the ``ingest`` method') allLists = [] self.dictList = allLists self._import_to_feeder_survey_table() self.insert_into_transientBucket(updateTransientSummaries=False) sqlQuery = u""" select transientBucketId from fs_user_added where transientBucketId is not null order by dateCreated desc limit 1 """ % locals() rows = readquery(log=self.log, sqlQuery=sqlQuery, dbConn=self.dbConn) if len(rows): transientBucketId = rows[0]["transientBucketId"] print(transientBucketId) else: transientBucketId = False # UPDATE THE TRANSIENT BUCKET SUMMARY TABLE IN THE MARSHALL DATABASE updater = update_transient_summaries( log=self.log, settings=self.settings, dbConn=self.dbConn, transientBucketId=transientBucketId) updater.update() # CLEAN UP TASKS TO MAKE THE TICKET UPDATE self.clean_up() self.log.info('completed the ``ingest`` method') return None
def _get_xephem_orbital_elements(self): """*get xephem orbital elements* **Key Arguments:** - ``xephemOE`` -- a list of xephem database format strings for use with pyephem """ self.log.info('starting the ``_get_xephem_orbital_elements`` method') print "Getting the XEphem orbital element strings from the database" sqlQuery = u""" select pyephem_string, name, mpc_number from orbital_elements where include_in_match = 1 """ % locals() rows = readquery(log=self.log, sqlQuery=sqlQuery, dbConn=self.atlasMoversDBConn, quiet=False) xephemOE = list(rows) self.log.info('completed the ``_get_xephem_orbital_elements`` method') return xephemOE
def _create_dictionary_of_marshall(self, marshallQuery, marshallTable): """create a list of dictionaries containing all the rows in the marshall stream **Key Arguments:** - ``marshallQuery`` -- the query used to lift the required data from the marshall database. - ``marshallTable`` -- the name of the marshall table we are lifting the data from. **Return:** - ``dictList`` - a list of dictionaries containing all the rows in the marshall stream """ self.log.debug( 'starting the ``_create_dictionary_of_marshall`` method') dictList = [] tableName = self.dbTableName rows = readquery(log=self.log, sqlQuery=marshallQuery, dbConn=self.pmDbConn, quiet=False) totalCount = len(rows) count = 0 for row in rows: if "dateCreated" in row: del row["dateCreated"] count += 1 if count > 1: # Cursor up one line and clear line sys.stdout.write("\x1b[1A\x1b[2K") print "%(count)s / %(totalCount)s `%(tableName)s` data added to memory" % locals( ) dictList.append(dict(row)) self.log.debug( 'completed the ``_create_dictionary_of_marshall`` method') return dictList
def add_htm_ids_to_mysql_database_table(raColName, declColName, tableName, dbConn, log, primaryIdColumnName="primaryId", cartesian=False, batchSize=50000, reindex=False, dbSettings=False): """*Given a database connection, a name of a table and the column names for RA and DEC, generates ID for one or more HTM level in the table* **Key Arguments** - ``raColName`` -- ra in sexegesimal - ``declColName`` -- dec in sexegesimal - ``tableName`` -- name of table to add htmid info to - ``dbConn`` -- database hosting the above table - ``log`` -- logger - ``primaryIdColumnName`` -- the primary id for the table - ``cartesian`` -- add cartesian columns. Default *False* - ``batchSize`` -- the size of the batches of rows to add HTMIds to concurrently. Default *2500* - ``reindex`` -- reindex the entire table - ``dbSettings`` -- yaml settings for database **Return** - None **Usage** ```python from HMpTy.mysql import add_htm_ids_to_mysql_database_table add_htm_ids_to_mysql_database_table( raColName="raDeg", declColName="decDeg", tableName="my_big_star_table", dbConn=dbConn, log=log, primaryIdColumnName="primaryId", reindex=False ) ``` """ log.debug('starting the ``add_htm_ids_to_mysql_database_table`` function') # TEST TABLE EXIST sqlQuery = """show tables""" rows = readquery(log=log, sqlQuery=sqlQuery, dbConn=dbConn) log.debug("""Checking the table %(tableName)s exists in the database""" % locals()) tableList = [] for row in rows: tableList.append(list(row.values())[0].lower()) if tableName.lower() not in tableList: message = "The %s table does not exist in the database" % (tableName, ) log.critical(message) raise IOError(message) log.debug( """Checking the RA and DEC columns exist in the %(tableName)s table""" % locals()) # TEST COLUMNS EXISTS cursor = dbConn.cursor(ms.cursors.DictCursor) sqlQuery = """SELECT * FROM %s LIMIT 1""" % (tableName, ) cursor.execute(sqlQuery) rows = cursor.fetchall() desc = cursor.description existingColumns = [] for i in range(len(desc)): existingColumns.append(desc[i][0]) if (raColName not in existingColumns) or (declColName not in existingColumns): message = 'Please make sure you have got the naes of the RA and DEC columns correct' log.critical(message) raise IOError(message) if cartesian: # ACTION(S) ## htmCols = { 'htm16ID': 'BIGINT(20)', 'htm13ID': 'INT', 'htm10ID': 'INT', 'cx': 'DOUBLE', 'cy': 'DOUBLE', 'cz': 'DOUBLE' } else: htmCols = {'htm16ID': 'BIGINT(20)', 'htm13ID': 'INT', 'htm10ID': 'INT'} # CHECK IF COLUMNS EXISTS YET - IF NOT CREATE FROM for key in list(htmCols.keys()): try: log.debug( 'attempting to check and generate the HTMId columns for the %s db table' % (tableName, )) colExists = \ """SELECT * FROM information_schema.COLUMNS WHERE TABLE_SCHEMA=DATABASE() AND COLUMN_NAME='%s' AND TABLE_NAME='%s'""" \ % (key, tableName) colExists = readquery(log=log, sqlQuery=colExists, dbConn=dbConn) switch = 0 if not colExists: if switch == 0: print("Adding the HTMCircle columns to %(tableName)s" % locals()) switch = 1 sqlQuery = 'ALTER TABLE ' + tableName + ' ADD ' + \ key + ' ' + htmCols[key] + ' DEFAULT NULL' writequery( log=log, sqlQuery=sqlQuery, dbConn=dbConn, ) except Exception as e: log.critical( 'could not check and generate the HTMId columns for the %s db table - failed with this error: %s ' % (tableName, str(e))) raise e log.debug( """Counting the number of rows still requiring HTMID information""" % locals()) if reindex: sqlQuery = u""" SELECT INDEX_NAME FROM INFORMATION_SCHEMA.STATISTICS WHERE table_schema=DATABASE() AND table_name='%(tableName)s' and COLUMN_NAME = "%(primaryIdColumnName)s"; """ % locals() keyname = readquery(log=log, sqlQuery=sqlQuery, dbConn=dbConn)[0]["INDEX_NAME"] if keyname != "PRIMARY": log.error( 'To reindex the entire table the primaryID you submit must be unique. "%(primaryIdColumnName)s" is not unique in table "%(tableName)s"' % locals()) return sqlQuery = """ALTER TABLE `%(tableName)s` disable keys""" % locals() writequery(log=log, sqlQuery=sqlQuery, dbConn=dbConn) sqlQuery = """SELECT count(*) as count from `%(tableName)s`""" % locals( ) elif cartesian: # COUNT ROWS WHERE HTMIDs ARE NOT SET sqlQuery = """SELECT count(*) as count from `%(tableName)s` where htm10ID is NULL or cx is null and %(raColName)s is not null""" % locals( ) else: # COUNT ROWS WHERE HTMIDs ARE NOT SET sqlQuery = """SELECT count(*) as count from `%(tableName)s` where htm10ID is NULL and %(raColName)s is not null""" % locals( ) log.debug("""SQLQUERY:\n\n%(sqlQuery)s\n\n""" % locals()) rowCount = readquery(log=log, sqlQuery=sqlQuery, dbConn=dbConn, quiet=False) totalCount = rowCount[0]["count"] # ADD HTMIDs IN BATCHES total = totalCount batches = int(old_div(total, batchSize)) count = 0 lastId = False # NOW GENERATE THE HTMLIds FOR THESE ROWS for i in range(batches + 1): if total == 0: continue count += batchSize if count > batchSize: # Cursor up one line and clear line sys.stdout.write("\x1b[1A\x1b[2K") if count > totalCount: count = totalCount start = time.time() log.debug( """Selecting the next %(batchSize)s rows requiring HTMID information in the %(tableName)s table""" % locals()) if reindex: # SELECT THE ROWS WHERE THE HTMIds ARE NOT SET if lastId: sqlQuery = """SELECT `%s`, `%s`, `%s` from `%s` where `%s` > '%s' order by `%s` limit %s""" % ( primaryIdColumnName, raColName, declColName, tableName, primaryIdColumnName, lastId, primaryIdColumnName, batchSize) else: sqlQuery = """SELECT `%s`, `%s`, `%s` from `%s` order by `%s` limit %s""" % ( primaryIdColumnName, raColName, declColName, tableName, primaryIdColumnName, batchSize) elif cartesian: # SELECT THE ROWS WHERE THE HTMIds ARE NOT SET sqlQuery = """SELECT `%s`, `%s`, `%s` from `%s` where `%s` is not null and `%s` >= 0 and ((htm10ID is NULL or cx is null)) limit %s""" % ( primaryIdColumnName, raColName, declColName, tableName, raColName, raColName, batchSize) else: # SELECT THE ROWS WHERE THE HTMIds ARE NOT SET sqlQuery = """SELECT `%s`, `%s`, `%s` from `%s` where `%s` is not null and `%s` >= 0 and htm10ID is NULL limit %s""" % ( primaryIdColumnName, raColName, declColName, tableName, raColName, raColName, batchSize) batch = readquery(log=log, sqlQuery=sqlQuery, dbConn=dbConn) if reindex and len(batch): lastId = batch[-1][primaryIdColumnName] log.debug( """The next %(batchSize)s rows requiring HTMID information have now been selected""" % locals()) raList = [] decList = [] pIdList = [] raList[:] = [r[raColName] for r in batch] decList[:] = [r[declColName] for r in batch] pIdList[:] = [r[primaryIdColumnName] for r in batch] from HMpTy import htm mesh16 = htm.HTM(16) mesh13 = htm.HTM(13) mesh10 = htm.HTM(10) log.debug('calculating htmIds for batch of %s rows in %s db table' % ( batchSize, tableName, )) htm16Ids = mesh16.lookup_id(raList, decList) htm13Ids = mesh13.lookup_id(raList, decList) htm10Ids = mesh10.lookup_id(raList, decList) log.debug( 'finshed calculating htmIds for batch of %s rows in %s db table' % ( batchSize, tableName, )) if cartesian: log.debug( 'calculating cartesian coordinates for batch of %s rows in %s db table' % ( batchSize, tableName, )) cx = [] cy = [] cz = [] for r, d in zip(raList, decList): r = math.radians(r) d = math.radians(d) cos_dec = math.cos(d) cx.append(math.cos(r) * cos_dec) cy.append(math.sin(r) * cos_dec) cz.append(math.sin(d)) updates = [] updates[:] = [{ "htm16ID": int(h16), "htm13ID": int(h13), "htm10ID": int(h10), primaryIdColumnName: pid, "cx": float(ccx), "cy": float(ccy), "cz": float(ccz) } for h16, h13, h10, pid, ccx, ccy, ccz in zip( htm16Ids, htm13Ids, htm10Ids, pIdList, cx, cy, cz)] log.debug( 'finished calculating cartesian coordinates for batch of %s rows in %s db table' % ( batchSize, tableName, )) else: log.debug('building the sqlquery') updates = [] # updates[:] = ["UPDATE `%(tableName)s` SET htm16ID=%(h16)s, htm13ID=%(h13)s, htm10ID=%(h10)s where %(primaryIdColumnName)s = '%(pid)s';" % locals() for h16, # h13, h10, pid in zip(htm16Ids, htm13Ids, htm10Ids, pIdList)] updates[:] = [{ "htm16ID": int(h16), "htm13ID": int(h13), "htm10ID": int(h10), primaryIdColumnName: pid } for h16, h13, h10, pid in zip( htm16Ids, htm13Ids, htm10Ids, pIdList)] log.debug('finshed building the sqlquery') if len(updates): log.debug( 'starting to update the HTMIds for new objects in the %s db table' % (tableName, )) # USE dbSettings & dbConn TO ACTIVATE MULTIPROCESSING insert_list_of_dictionaries_into_database_tables( dbConn=dbConn, log=log, dictList=updates, dbTableName=tableName, uniqueKeyList=[], dateModified=False, batchSize=20000, replace=True, dbSettings=dbSettings, dateCreated=False) # writequery( # log=log, # sqlQuery=sqlQuery, # dbConn=dbConn, # ) log.debug( 'finished updating the HTMIds for new objects in the %s db table' % (tableName, )) else: log.debug('no HTMIds to add to the %s db table' % (tableName, )) percent = float(count) * 100. / float(totalCount) print( "%(count)s / %(totalCount)s htmIds added to %(tableName)s (%(percent)0.5f%% complete)" % locals()) end = time.time() timediff = end - start timediff = timediff * 1000000. / float(batchSize) print("Update speed: %(timediff)0.2fs/1e6 rows\n" % locals()) # APPLY INDEXES IF NEEDED sqlQuery = "" for index in ["htm10ID", "htm13ID", "htm16ID"]: log.debug('adding %(index)s index to %(tableName)s' % locals()) iname = "idx_" + index asqlQuery = u""" SELECT COUNT(1) IndexIsThere FROM INFORMATION_SCHEMA.STATISTICS WHERE table_schema=DATABASE() AND table_name='%(tableName)s' AND index_name='%(iname)s'; """ % locals() count = readquery(log=log, sqlQuery=asqlQuery, dbConn=dbConn)[0]["IndexIsThere"] if count == 0: if not len(sqlQuery): sqlQuery += u""" ALTER TABLE %(tableName)s ADD INDEX `%(iname)s` (`%(index)s` ASC) """ % locals() else: sqlQuery += u""", ADD INDEX `%(iname)s` (`%(index)s` ASC)""" % locals( ) if len(sqlQuery): writequery( log=log, sqlQuery=sqlQuery + ";", dbConn=dbConn, ) log.debug('finished adding indexes to %(tableName)s' % locals()) if reindex: print("Re-enabling keys within the '%(tableName)s' table" % locals()) sqlQuery = """ALTER TABLE `%(tableName)s` enable keys""" % locals() writequery(log=log, sqlQuery=sqlQuery, dbConn=dbConn) print("All HTMIds added to %(tableName)s" % locals()) log.debug('completed the ``add_htm_ids_to_mysql_database_table`` function') return None
def _sampled_area_only_points( self): """ *sampled area only points* """ self.log.debug('starting the ``_sampled_area_only_points`` method') coords1 = [ (140.0, 6.0), (153.0, -12.0) ] coords2 = [ (149.0, 2.0), (152.0, -7.0) ] coords3 = [ (132.0, 4.0), (154.0, -16.5) ] final = [ (140.0, 6.0), (154.0, -16.5) ] coords = final # CREATE AN ARRAY OF RELEVANT HTMIDS AND FIND MAX AND MIN mesh16 = htm.HTM(16) theseArrays = [] radius = 2.5 ra = [] dec = [] for co in coords: ra1 = co[0] dec1 = co[1] thisArray = mesh16.intersect( ra1, dec1, radius, inclusive=True) hmax = thisArray.max() hmin = thisArray.min() ratio = float(hmax - hmin + 1) / float(thisArray.size) if ratio < 100 or thisArray.size > 2000: htmWhereClause = "where htm16ID between %(hmin)s and %(hmax)s" % locals( ) else: s = StringIO() np.savetxt(s, thisArray, fmt='%d', newline=",") thesHtmIds = s.getvalue()[:-1] htmWhereClause = "where htm16ID in (%(thesHtmIds)s)" % locals() # FINALLY BUILD THE FULL QUERY sqlQuery = """select raDeg, decDeg, redshift, object_type from tcs_cat_ned_stream %(htmWhereClause)s and redshift is not null and redshift < 0.15 and (redshift_quality is null or redshift_quality not like 'PHOT%%') and (object_type is null or object_type not like "%%*%%") """ % locals( ) rows = readquery( log=self.log, sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn ) raList = [] decList = [] for row in rows: raList.append(row["raDeg"]) decList.append(row["decDeg"]) tRa = np.array([ra1]) tDec = np.array([dec1]) raList = np.array(raList) decList = np.array(decList) indexList1, indexList2, separation = mesh16.match( tRa, tDec, raList, decList, radius, maxmatch=0) redshiftList = [] for i in range(indexList1.size): ra.append(rows[indexList2[i]]["raDeg"]) dec.append(rows[indexList2[i]]["decDeg"]) ra = np.array(ra) dec = np.array(dec) self.log.debug('completed the ``_sampled_area_only_points`` method') return ra, dec
def get(self): """ *get the panstarrs_location_stamps object* """ self.log.debug('starting the ``get`` method') # FOR A SINGLE TRANSIENT if self.transientId: transientId = self.transientId sqlQuery = u""" select t.transientBucketId, t.raDeg,t.decDeg from pesstoObjects p, transientBucketSummaries t where p.transientBucketId = t.transientBucketId and t.transientBucketId = %(transientId)s; """ % locals() # OR THE NEXT 200 TRANSIENTS NEEDING STAMPS else: # GET NEXT 200 TRANSIENTS NEEDING PANSTARRS STAMPS sqlQuery = u""" select * from pesstoObjects p, transientBucketSummaries t where (ps1_map is null or ps1_map not in (0,1)) and p.transientBucketId = t.transientBucketId order by t.transientBucketId desc limit 200; """ % locals() rows = readquery(log=self.log, sqlQuery=sqlQuery, dbConn=self.dbConn) # FOR EACH TRANSIENT DOWNLOAD STAMP TO CACHE DIRECTORY downloadDirectoryPath = self.settings[ "cache-directory"] + "/transients/" for row in rows: transientBucketId = row["transientBucketId"] downloadPath = f"{downloadDirectoryPath}/{transientBucketId}" ra = row["raDeg"] dec = row["decDeg"] fitsPaths, jpegPaths, colorPath = downloader( log=self.log, settings=self.settings, downloadDirectory=downloadPath, fits=False, jpeg=False, arcsecSize=60, filterSet='gri', color=True, singleFilters=False, ra=ra, dec=dec, imageType="stack" # warp | stack ).get() # CHECK FOR FAILED IMAGES AND FLAG IN DATABASE if len(colorPath) == 0 or not colorPath[0]: sqlQuery = u""" update pesstoObjects p, transientBucketSummaries t set p.ps1_map = 0 where p.transientBucketId=t.transientBucketId and (ps1_map is null or ps1_map != 0) and t.decDeg < -40; update pesstoObjects set ps1_map = 2 where transientBucketId = %(transientBucketId)s and ps1_map is null; update pesstoObjects set ps1_map = 2+ps1_map where transientBucketId = %(transientBucketId)s and ps1_map is not null; update pesstoObjects set ps1_map = 0 where transientBucketId = %(transientBucketId)s and ps1_map > 10; """ % locals() writequery(log=self.log, sqlQuery=sqlQuery, dbConn=self.dbConn) continue source = colorPath[0] destination = downloadPath + "/ps1_map_color.jpeg" try: os.rename(source, destination) # DOWNLOAD THE COLOR IMAGE myimage = image(log=self.log, settings=self.settings, imagePath=destination, arcsecSize=60, crosshairs=True, transient=False, scale=True, invert=False, greyscale=False).get() # UPDATE DATABASE FLAG sqlQuery = u""" update pesstoObjects set ps1_map = 1 where transientBucketId = %(transientBucketId)s """ % locals() writequery(log=self.log, sqlQuery=sqlQuery, dbConn=self.dbConn) except: self.log.warning( "Could not process the image %(destination)s" % locals()) self.log.debug('completed the ``get`` method') return None
def convert_dictionary_to_mysql_table( log, dictionary, dbTableName, uniqueKeyList=[], dbConn=False, createHelperTables=False, dateModified=False, returnInsertOnly=False, replace=False, batchInserts=True, reDatetime=False, skipChecks=False, dateCreated=True): """convert dictionary to mysql table **Key Arguments:** - ``log`` -- logger - ``dictionary`` -- python dictionary - ``dbConn`` -- the db connection - ``dbTableName`` -- name of the table you wish to add the data to (or create if it does not exist) - ``uniqueKeyList`` - a lists column names that need combined to create the primary key - ``createHelperTables`` -- create some helper tables with the main table, detailing original keywords etc - ``returnInsertOnly`` -- returns only the insert command (does not execute it) - ``dateModified`` -- add a modification date and updated flag to the mysql table - ``replace`` -- use replace instead of mysql insert statements (useful when updates are required) - ``batchInserts`` -- if returning insert statements return separate insert commands and value tuples - ``reDatetime`` -- compiled regular expression matching datetime (passing this in cuts down on execution time as it doesn't have to be recompiled everytime during multiple iterations of ``convert_dictionary_to_mysql_table``) - ``skipChecks`` -- skip reliability checks. Less robust but a little faster. - ``dateCreated`` -- add a timestamp for dateCreated? **Return:** - ``returnInsertOnly`` -- the insert statement if requested **Usage:** To add a python dictionary to a database table, creating the table and/or columns if they don't yet exist: .. code-block:: python from fundamentals.mysql import convert_dictionary_to_mysql_table dictionary = {"a newKey": "cool", "and another": "super cool", "uniquekey1": "cheese", "uniqueKey2": "burgers"} convert_dictionary_to_mysql_table( dbConn=dbConn, log=log, dictionary=dictionary, dbTableName="testing_table", uniqueKeyList=["uniquekey1", "uniqueKey2"], dateModified=False, returnInsertOnly=False, replace=True ) Or just return the insert statement with a list of value tuples, i.e. do not execute the command on the database: insertCommand, valueTuple = convert_dictionary_to_mysql_table( dbConn=dbConn, log=log, dictionary=dictionary, dbTableName="testing_table", uniqueKeyList=["uniquekey1", "uniqueKey2"], dateModified=False, returnInsertOnly=True, replace=False, batchInserts=True ) print insertCommand, valueTuple # OUT: 'INSERT IGNORE INTO `testing_table` # (a_newKey,and_another,dateCreated,uniqueKey2,uniquekey1) VALUES # (%s, %s, %s, %s, %s)', ('cool', 'super cool', # '2016-06-21T12:08:59', 'burgers', 'cheese') You can also return a list of single insert statements using ``batchInserts = False``. Using ``replace = True`` will also add instructions about how to replace duplicate entries in the database table if found: inserts = convert_dictionary_to_mysql_table( dbConn=dbConn, log=log, dictionary=dictionary, dbTableName="testing_table", uniqueKeyList=["uniquekey1", "uniqueKey2"], dateModified=False, returnInsertOnly=True, replace=True, batchInserts=False ) print inserts # OUT: INSERT INTO `testing_table` (a_newKey,and_another,dateCreated,uniqueKey2,uniquekey1) # VALUES ("cool" ,"super cool" ,"2016-09-14T13:12:08" ,"burgers" ,"cheese") # ON DUPLICATE KEY UPDATE a_newKey="cool", and_another="super # cool", dateCreated="2016-09-14T13:12:08", uniqueKey2="burgers", # uniquekey1="cheese" """ log.debug('starting the ``convert_dictionary_to_mysql_table`` function') if not reDatetime: reDatetime = re.compile('^[0-9]{4}-[0-9]{2}-[0-9]{2}T') if not replace: insertVerb = "INSERT" else: insertVerb = "INSERT IGNORE" if returnInsertOnly == False: # TEST THE ARGUMENTS if str(type(dbConn).__name__) != "Connection": message = 'Please use a valid MySQL DB connection.' log.critical(message) raise TypeError(message) if not isinstance(dictionary, dict): message = 'Please make sure "dictionary" argument is a dict type.' log.critical(message) raise TypeError(message) if not isinstance(uniqueKeyList, list): message = 'Please make sure "uniqueKeyList" is a list' log.critical(message) raise TypeError(message) for i in uniqueKeyList: if i not in dictionary.keys(): message = 'Please make sure values in "uniqueKeyList" are present in the "dictionary" you are tring to convert' log.critical(message) raise ValueError(message) for k, v in dictionary.iteritems(): # log.debug('k: %s, v: %s' % (k, v,)) if isinstance(v, list) and len(v) != 2: message = 'Please make sure the list values in "dictionary" 2 items in length' log.critical("%s: in %s we have a %s (%s)" % (message, k, v, type(v))) raise ValueError(message) if isinstance(v, list): if not (isinstance(v[0], str) or isinstance(v[0], int) or isinstance(v[0], bool) or isinstance(v[0], float) or isinstance(v[0], long) or isinstance(v[0], datetime.date) or v[0] == None): message = 'Please make sure values in "dictionary" are of an appropriate value to add to the database, must be str, float, int or bool' log.critical("%s: in %s we have a %s (%s)" % (message, k, v, type(v))) raise ValueError(message) else: if not (isinstance(v, str) or isinstance(v, int) or isinstance(v, bool) or isinstance(v, float) or isinstance(v, long) or isinstance(v, unicode) or isinstance(v, datetime.date) or v == None): this = type(v) message = 'Please make sure values in "dictionary" are of an appropriate value to add to the database, must be str, float, int or bool : %(k)s is a %(this)s' % locals( ) log.critical("%s: in %s we have a %s (%s)" % (message, k, v, type(v))) raise ValueError(message) if not isinstance(createHelperTables, bool): message = 'Please make sure "createHelperTables" is a True or False' log.critical(message) raise TypeError(message) # TEST IF TABLE EXISTS if not skipChecks: tableExists = table_exists.table_exists( dbConn=dbConn, log=log, dbTableName=dbTableName ) else: tableExists = False # CREATE THE TABLE IF IT DOES NOT EXIST if tableExists is False: sqlQuery = """ CREATE TABLE IF NOT EXISTS `%(dbTableName)s` (`primaryId` bigint(20) NOT NULL AUTO_INCREMENT COMMENT 'An internal counter', `dateCreated` DATETIME NULL DEFAULT CURRENT_TIMESTAMP, `dateLastModified` DATETIME NULL DEFAULT CURRENT_TIMESTAMP, `updated` tinyint(4) DEFAULT '0', PRIMARY KEY (`primaryId`)) ENGINE=MyISAM AUTO_INCREMENT=0 DEFAULT CHARSET=latin1; """ % locals() writequery( log=log, sqlQuery=sqlQuery, dbConn=dbConn, ) qCreateColumn = '' formattedKey = '' formattedKeyList = [] myValues = [] # ADD EXTRA COLUMNS TO THE DICTIONARY todo: do I need this? if dateModified: dictionary['dateLastModified'] = [ str(times.get_now_sql_datetime()), "date row was modified"] if replace == False: dictionary['updated'] = [0, "this row has been updated"] else: dictionary['updated'] = [1, "this row has been updated"] # ITERATE THROUGH THE DICTIONARY AND GENERATE THE TABLE COLUMN WITH THE # NAME OF THE KEY, IF IT DOES NOT EXIST count = len(dictionary) i = 1 for (key, value) in dictionary.items(): if (isinstance(value, list) and value[0] is None): del dictionary[key] # SORT THE DICTIONARY BY KEY odictionary = c.OrderedDict(sorted(dictionary.items())) for (key, value) in odictionary.iteritems(): formattedKey = key.replace(" ", "_").replace("-", "_") # DEC A KEYWORD IN MYSQL - NEED TO CHANGE BEFORE INGEST if formattedKey == "dec": formattedKey = "decl" if formattedKey == "DEC": formattedKey = "DECL" formattedKeyList.extend([formattedKey]) if len(key) > 0: # CONVERT LIST AND FEEDPARSER VALUES TO YAML (SO I CAN PASS IT AS A # STRING TO MYSQL) if isinstance(value, list) and (isinstance(value[0], list)): value[0] = yaml.dump(value[0]) value[0] = str(value[0]) # REMOVE CHARACTERS THAT COLLIDE WITH MYSQL # if type(value[0]) == str or type(value[0]) == unicode: # value[0] = value[0].replace('"', """'""") # JOIN THE VALUES TOGETHER IN A LIST - EASIER TO GENERATE THE MYSQL # COMMAND LATER if isinstance(value, str): value = value.replace('\\', '\\\\') value = value.replace('"', '\\"') try: udata = value.decode("utf-8", "ignore") value = udata.encode("ascii", "ignore") except: log.error('cound not decode value %(value)s' % locals()) # log.debug('udata: %(udata)s' % locals()) if isinstance(value, unicode): value = value.replace('"', '\\"') value = value.encode("ascii", "ignore") if isinstance(value, list) and isinstance(value[0], unicode): myValues.extend(['%s' % value[0].strip()]) elif isinstance(value, list): myValues.extend(['%s' % (value[0], )]) else: myValues.extend(['%s' % (value, )]) if returnInsertOnly == False: # CHECK IF COLUMN EXISTS YET colExists = \ "SELECT * FROM information_schema.COLUMNS WHERE TABLE_SCHEMA=DATABASE() AND COLUMN_NAME='" + \ formattedKey + "'AND TABLE_NAME='" + dbTableName + """'""" try: # log.debug('checking if the column '+formattedKey+' exists # in the '+dbTableName+' table') rows = readquery( log=log, sqlQuery=colExists, dbConn=dbConn, ) except Exception as e: log.error('something went wrong' + str(e) + '\n') # IF COLUMN DOESN'T EXIT - GENERATE IT if len(rows) == 0: qCreateColumn = """ALTER TABLE `%s` ADD `%s""" % ( dbTableName, formattedKey) if not isinstance(value, list): value = [value] if reDatetime.search(str(value[0])): # log.debug('Ok - a datetime string was found') qCreateColumn += '` datetime DEFAULT NULL' elif formattedKey == 'updated_parsed' or formattedKey == 'published_parsed' or formattedKey \ == 'feedName' or formattedKey == 'title': qCreateColumn += '` varchar(100) DEFAULT NULL' elif (isinstance(value[0], str) or isinstance(value[0], unicode)) and len(value[0]) < 30: qCreateColumn += '` varchar(100) DEFAULT NULL' elif (isinstance(value[0], str) or isinstance(value[0], unicode)) and len(value[0]) >= 30 and len(value[0]) < 80: qCreateColumn += '` varchar(100) DEFAULT NULL' elif isinstance(value[0], str) or isinstance(value[0], unicode): columnLength = 450 + len(value[0]) * 2 qCreateColumn += '` varchar(' + str( columnLength) + ') DEFAULT NULL' elif isinstance(value[0], int) and abs(value[0]) <= 9: qCreateColumn += '` tinyint DEFAULT NULL' elif isinstance(value[0], int): qCreateColumn += '` int DEFAULT NULL' elif isinstance(value[0], float) or isinstance(value[0], long): qCreateColumn += '` double DEFAULT NULL' elif isinstance(value[0], bool): qCreateColumn += '` tinyint DEFAULT NULL' elif isinstance(value[0], list): qCreateColumn += '` varchar(1024) DEFAULT NULL' else: # log.debug('Do not know what format to add this key in # MySQL - removing from dictionary: %s, %s' # % (key, type(value[0]))) formattedKeyList.pop() myValues.pop() qCreateColumn = None if qCreateColumn: # ADD COMMENT TO GIVE THE ORGINAL KEYWORD IF formatted FOR # MYSQL if key is not formattedKey: qCreateColumn += " COMMENT 'original keyword: " + \ key + """'""" # CREATE THE COLUMN IF IT DOES NOT EXIST try: log.info('creating the ' + formattedKey + ' column in the ' + dbTableName + ' table') writequery( log=log, sqlQuery=qCreateColumn, dbConn=dbConn ) except Exception as e: # log.debug('qCreateColumn: %s' % (qCreateColumn, # )) log.error('could not create the ' + formattedKey + ' column in the ' + dbTableName + ' table -- ' + str(e) + '\n') if returnInsertOnly == False: # GENERATE THE INDEX NAME - THEN CREATE INDEX IF IT DOES NOT YET EXIST if len(uniqueKeyList): for i in range(len(uniqueKeyList)): uniqueKeyList[i] = uniqueKeyList[ i].replace(" ", "_").replace("-", "_") if uniqueKeyList[i] == "dec": uniqueKeyList[i] = "decl" if uniqueKeyList[i] == "DEC": uniqueKeyList[i] = "DECL" indexName = uniqueKeyList[0].replace(" ", "_").replace("-", "_") for i in range(len(uniqueKeyList) - 1): indexName += '_' + uniqueKeyList[i + 1] indexName = indexName.lower().replace(" ", " ").replace(" ", "_") sqlQuery = u"""SELECT COUNT(*) FROM INFORMATION_SCHEMA.STATISTICS WHERE TABLE_SCHEMA = DATABASE() AND TABLE_NAME = '""" + \ dbTableName + """' AND INDEX_NAME = '""" + indexName + """'""" rows = readquery( log=log, sqlQuery=sqlQuery, dbConn=dbConn, quiet=False ) exists = rows[0]['COUNT(*)'] # log.debug('uniqueKeyList: %s' % (uniqueKeyList,)) if exists == 0: if isinstance(uniqueKeyList, list): uniqueKeyList = ','.join(uniqueKeyList) addUniqueKey = 'ALTER TABLE `' + dbTableName + \ '` ADD unique ' + indexName + \ """ (""" + uniqueKeyList + ')' # log.debug('HERE IS THE COMMAND:'+addUniqueKey) writequery( log=log, sqlQuery=addUniqueKey, dbConn=dbConn ) if returnInsertOnly == True and batchInserts == True: myKeys = '`,`'.join(formattedKeyList) valueString = ("%s, " * len(myValues))[:-2] insertCommand = insertVerb + """ INTO `""" + dbTableName + \ """` (`""" + myKeys + """`, dateCreated) VALUES (""" + \ valueString + """, NOW())""" mv = [] mv[:] = [None if m == "None" else m for m in myValues] valueTuple = tuple(mv) dup = "" if replace: dup = " ON DUPLICATE KEY UPDATE " for k, v in zip(formattedKeyList, mv): dup = """%(dup)s %(k)s=values(%(k)s),""" % locals() insertCommand = insertCommand + dup insertCommand = insertCommand.replace('\\""', '\\" "') insertCommand = insertCommand.replace('""', "null") insertCommand = insertCommand.replace('!!python/unicode:', '') insertCommand = insertCommand.replace('!!python/unicode', '') insertCommand = insertCommand.replace('"None"', 'null') if not dateCreated: insertCommand = insertCommand.replace( ", dateCreated)", ")").replace(", NOW())", ")") return insertCommand, valueTuple # GENERATE THE INSERT COMMAND - IGNORE DUPLICATE ENTRIES myKeys = '`,`'.join(formattedKeyList) myValues = '" ,"'.join(myValues) # log.debug(myValues+" ------ PRESTRIP") # REMOVE SOME CONVERSION NOISE myValues = myValues.replace('time.struct_time', '') myValues = myValues.replace( '- !!python/object/new:feedparser.FeedParserDict', '') myValues = myValues.replace( '!!python/object/new:feedparser.FeedParserDict', '') myValues = myValues.replace('dictitems:', '') myValues = myValues.replace('dictitems', '') myValues = myValues.replace('!!python/unicode:', '') myValues = myValues.replace('!!python/unicode', '') myValues = myValues.replace('"None"', 'null') # myValues = myValues.replace('"None', 'null') if myValues[-4:] != 'null': myValues += '"' dup = "" if replace: dupValues = ('"' + myValues).split(" ,") dupKeys = formattedKeyList dup = dup + " ON DUPLICATE KEY UPDATE " for k, v in zip(dupKeys, dupValues): dup = """%(dup)s `%(k)s`=%(v)s,""" % locals() if dateModified: dup = """%(dup)s updated=IF(""" % locals() for k, v in zip(dupKeys, dupValues): if v == "null": dup = """%(dup)s `%(k)s` is %(v)s AND """ % locals() else: dup = """%(dup)s `%(k)s`=%(v)s AND """ % locals() dup = dup[:-5] + ", 0, 1), dateLastModified=IF(" for k, v in zip(dupKeys, dupValues): if v == "null": dup = """%(dup)s `%(k)s` is %(v)s AND """ % locals() else: dup = """%(dup)s `%(k)s`=%(v)s AND """ % locals() dup = dup[:-5] + ", dateLastModified, NOW())" else: dup = dup[:-1] # log.debug(myValues+" ------ POSTSTRIP") addValue = insertVerb + """ INTO `""" + dbTableName + \ """` (`""" + myKeys + """`, dateCreated) VALUES (\"""" + \ myValues + """, NOW()) %(dup)s """ % locals() if not dateCreated: addValue = addValue.replace( ", dateCreated)", ")").replace(", NOW())", ")", 1) addValue = addValue.replace('\\""', '\\" "') addValue = addValue.replace('""', "null") addValue = addValue.replace('!!python/unicode:', '') addValue = addValue.replace('!!python/unicode', '') addValue = addValue.replace('"None"', 'null') # log.debug(addValue) if returnInsertOnly == True: return addValue message = "" try: # log.debug('adding new data to the %s table; query: %s' % # (dbTableName, addValue))" writequery( log=log, sqlQuery=addValue, dbConn=dbConn ) except Exception as e: log.error("could not add new data added to the table '" + dbTableName + "' : " + str(e) + '\n') log.debug('completed the ``convert_dictionary_to_mysql_table`` function') return None, None
def connect(self): """connect to the various databases, the credientals and settings of which are found in the rockAtlas settings file **Return:** - ``atlas3DbConn`` -- the database hosting the atlas3 metadata - ``atlas4DbConn`` -- the database hosting the atlas4 metadata See the class docstring for usage """ self.log.debug('starting the ``get`` method') atlas3Settings = self.settings["database settings"]["atlas3"] atlas4Settings = self.settings["database settings"]["atlas4"] atlasMovers = self.settings["database settings"]["atlasMovers"] dbConns = [] for dbSettings in [atlas3Settings, atlas4Settings, atlasMovers]: port = False if dbSettings["tunnel"]: port = self._setup_tunnel( tunnelParameters=dbSettings["tunnel"]) # SETUP A DATABASE CONNECTION FOR atlas4 host = dbSettings["host"] user = dbSettings["user"] passwd = dbSettings["password"] dbName = dbSettings["db"] thisConn = ms.connect( host=host, user=user, passwd=passwd, db=dbName, port=port, use_unicode=True, charset='utf8', client_flag=ms.constants.CLIENT.MULTI_STATEMENTS, connect_timeout=3600) thisConn.autocommit(True) dbConns.append(thisConn) # CREATE A DICTIONARY OF DATABASES dbConns = { "atlas3": dbConns[0], "atlas4": dbConns[1], "atlasMovers": dbConns[2], } dbVersions = {} for k, v in dbConns.iteritems(): sqlQuery = u""" SELECT VERSION() as v; """ % locals() rows = readquery(log=self.log, sqlQuery=sqlQuery, dbConn=v, quiet=False) version = rows[0]['v'] dbVersions[k] = version self.log.debug('completed the ``get`` method') return dbConns, dbVersions
def _get_matched_sources( self, gwid, plotParameters, redshiftLimit=False, allNed=False, match2mass=False): """ *get matched sources* **Key Arguments: ** - ``gwid`` -- gravitational wave ID - ``plotParameters`` -- plot parameters from settings - ``redshiftLimit`` -- limit in redshift for returned sources - ``allNed`` -- no limits on query - ``match2mass`` -- NED sources need to be 2MASS sources with semi-major axis measurement **Return: ** - ``ra`` -- array of match NED source RAs - ``dec`` -- array of match NED source DECs """ self.log.debug('starting the ``_get_matched_sources`` method') # return self._sampled_area_only_points() if allNed == True: # UNPACK THE PLOT PARAMETERS centralCoordinate = plotParameters["centralCoordinate"] raRange = plotParameters["raRange"] decRange = plotParameters["decRange"] raMax = centralCoordinate[0] + raRange / 2. raMin = centralCoordinate[0] - raRange / 2. decMax = centralCoordinate[1] + decRange / 2. decMin = centralCoordinate[1] - decRange / 2. sqlQuery = u""" select raDeg, decDeg from tcs_cat_ned_stream where raDeg > %(raMin)s and raDeg < %(raMax)s and decDeg > %(decMin)s and decDeg < %(decMax)s """ % locals() rows = readquery( log=self.log, sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn ) else: if redshiftLimit: redshiftClause = " and t.z is not null and t.z < %(redshiftLimit)s and (t.z_quality is null or t.z_quality not like 'PHOT%%') and (t.catalogue_object_subtype is null or t.catalogue_object_subtype not like '%%*%%')" % locals( ) else: redshiftClause = "" if match2mass: match2massClause = " and t.2mass_id is not null and t.major_axis_arcsec is not null" else: match2massClause = "" tcs_cross_matches = "tcs_%(gwid)s_catalogued_sources" % locals() sqlQuery = u""" select t.catalogue_object_ra as raDeg, t.catalogue_object_dec as decDeg from ps1_pointings p, %(tcs_cross_matches)s t where p.ps1_exp_id=t.transient_object_id and gw_id = "%(gwid)s" %(redshiftClause)s %(match2massClause)s; """ % locals() rows = readquery( log=self.log, sqlQuery=sqlQuery, dbConn=self.ligo_virgo_wavesDbConn ) ra = [] dec = [] ra[:] = [row["raDeg"] for row in rows] dec[:] = [row["decDeg"] for row in rows] ra = np.array(ra) dec = np.array(dec) self.log.debug('completed the ``_get_matched_sources`` method') return ra, dec
def label_pointings_with_gw_ids( self): """ *Attempt to label the PS1 pointing with the GW IDs* The GW metadata used to associate PS1 pointings is taken from the settings file **Return:** - None **Usage:** .. code-block:: python # ATTEMPT TO LABEL PS1 POINTINGS IN DATABASE WITH A GW ID from breaker import update_ps1_atlas_footprint_tables dbUpdater = update_ps1_atlas_footprint_tables( log=log, settings=settings ) dbUpdater.label_pointings_with_gw_ids() """ self.log.debug('starting the ``label_pointings_with_gw_ids`` method') # WAVE METADATA FOUND IN SETTINGS FILE for wave in self.settings["gravitational waves"]: # UNPACK THE PLOT PARAMETERS FROM THE SETTINGS FILE centralCoordinate = self.settings["gravitational waves"][ wave]["plot"]["centralCoordinate"] raRange = float(self.settings["gravitational waves"][ wave]["plot"]["raRange"]) decRange = float(self.settings["gravitational waves"][ wave]["plot"]["decRange"]) raMax = (centralCoordinate[0] + raRange / 2.) + 5. raMin = (centralCoordinate[0] - raRange / 2.) - 5. decMax = (centralCoordinate[1] + decRange / 2.) + 5. decMin = (centralCoordinate[1] - decRange / 2.) - 5. mjdLower = self.settings["gravitational waves"][ wave]["mjd"] - 21. mjdUpper = self.settings["gravitational waves"][ wave]["mjd"] + 31 if raMin > 0. and raMax < 360.: raWhere = """(raDeg > %(raMin)s and raDeg < %(raMax)s)""" % locals( ) elif raMin < 0.: raMin2 = raMin + 360. raWhere = """((raDeg > 0. and raDeg < %(raMax)s) or raDeg > %(raMin2)s)""" % locals( ) elif raMax > 360.: raMax2 = raMax - 360. raWhere = """((raDeg > %(raMin)s and raDeg < 360.) or raDeg < %(raMax2)s)""" % locals( ) decWhere = """(decDeg > %(decMin)s and decDeg < %(decMax)s)""" % locals( ) mjdWhere = "(mjd>%(mjdLower)s and mjd<%(mjdUpper)s)" % locals() sqlQuery = u""" update ps1_pointings set gw_id = "%(wave)s" where %(raWhere)s and %(decWhere)s and %(mjdWhere)s and gw_id is null """ % locals() writequery( log=self.log, sqlQuery=sqlQuery, dbConn=self.ligo_virgo_wavesDbConn, ) sqlQuery = u""" update ps1_pointings set gw_id = CONCAT(gw_id, " %(wave)s") where %(raWhere)s and %(decWhere)s and %(mjdWhere)s and gw_id is not null and gw_id not like "%%%(wave)s%%"; """ % locals() writequery( log=self.log, sqlQuery=sqlQuery, dbConn=self.ligo_virgo_wavesDbConn, ) sqlQuery = u""" update atlas_pointings set gw_id = "%(wave)s" where %(raWhere)s and %(decWhere)s and %(mjdWhere)s and gw_id is null """ % locals() writequery( log=self.log, sqlQuery=sqlQuery, dbConn=self.ligo_virgo_wavesDbConn, ) sqlQuery = u""" update atlas_pointings set gw_id = CONCAT(gw_id, " %(wave)s") where %(raWhere)s and %(decWhere)s and %(mjdWhere)s and gw_id is not null and gw_id not like "%%%(wave)s%%"; """ % locals() writequery( log=self.log, sqlQuery=sqlQuery, dbConn=self.ligo_virgo_wavesDbConn, ) mjdWhere = mjdWhere.replace("mjd", "mjd_registered") sqlQuery = u""" update ps1_nightlogs set gw_id = "%(wave)s" where %(raWhere)s and %(decWhere)s and %(mjdWhere)s and gw_id is null and type = "OBJECT" """ % locals() writequery( log=self.log, sqlQuery=sqlQuery, dbConn=self.ligo_virgo_wavesDbConn, ) sqlQuery = u""" update ps1_nightlogs set gw_id = CONCAT(gw_id, " %(wave)s") where %(raWhere)s and %(decWhere)s and %(mjdWhere)s and gw_id is not null and type = "OBJECT" and gw_id not like "%%%(wave)s%%"; """ % locals() writequery( log=self.log, sqlQuery=sqlQuery, dbConn=self.ligo_virgo_wavesDbConn, ) sqlQuery = u""" select count(*) as count from ps1_pointings where gw_id is null; """ % locals() count = readquery( log=self.log, sqlQuery=sqlQuery, dbConn=self.ligo_virgo_wavesDbConn, quiet=False )[0]["count"] print "PS1 pointings labelled with their associated GW id" if count == 0: print " Note all pointings have been labelled with GW ID" else: print " %(count)s pointings remain unlabelled with a GW ID" % locals() self.log.debug('completed the ``label_pointings_with_gw_ids`` method') return None
def populate_ps1_subdisk_table( self): """ *Calculate 49 subdisks for each of the PS1 pointings (used to query NED in manageable sized batches) and add them to the ``ps1_pointings_subdisks`` table of the database* .. image:: http://i.imgur.com/y3G0aax.png :width: 600 px **Return:** - None **Usage:** .. code-block:: python # SPLIT PS1 POINTINGS INTO SUB-DISKS AND ADD TO LV DATABASE from breaker import update_ps1_atlas_footprint_tables dbUpdater = update_ps1_atlas_footprint_tables( log=log, settings=settings ) dbUpdater.populate_ps1_subdisk_table() """ self.log.debug( 'completed the ````populate_ps1_subdisk_table`` method') # SELECT THE PS1 POINTINGS NEEDING SUBDISKS CALCULATED sqlQuery = u""" select ps1_exp_id, raDeg, decDeg from ps1_pointings where subdisks_calculated = 0 and raDeg is not null """ % locals() rows = readquery( log=self.log, sqlQuery=sqlQuery, dbConn=self.ligo_virgo_wavesDbConn, quiet=False ) ps1PointNum = len(rows) # CALCULATE ALL OF THE SUBDISKS inserts = [] expIds = [] for row in rows: subDiskCoordinates = self._get_subdisk_parameters( row["raDeg"], row["decDeg"], 1.5) ps1_exp_id = row["ps1_exp_id"] expIds.append(ps1_exp_id) for i, c in enumerate(subDiskCoordinates): insert = { "raDeg": c[0], "decDeg": c[1], "ps1_exp_id": ps1_exp_id, "circleId": i + 1 } inserts.append(insert) # ADD SUBDISKS TO DATABASE if len(inserts): insert_list_of_dictionaries_into_database_tables( dbConn=self.ligo_virgo_wavesDbConn, log=self.log, dictList=inserts, dbTableName="ps1_pointings_subdisks", uniqueKeyList=["ps1_exp_id", "circleId"], dateModified=False, batchSize=2500, replace=True ) # UPDATE POINTINGS TABLE TO INDICATE SUBDISKS HAVE BEEN CALCULATED theseIds = ",".join(expIds) sqlQuery = u""" update ps1_pointings set subdisks_calculated = 1 where ps1_exp_id in (%(theseIds)s) """ % locals() writequery( log=self.log, sqlQuery=sqlQuery, dbConn=self.ligo_virgo_wavesDbConn, ) if ps1PointNum == 0: print "All PS1 pointings have been split into their 49 sub-disks" % locals() else: print "%(ps1PointNum)s new PS1 pointings have been split into 49 sub-disks - parameters added to the `ps1_pointings_subdisks` database table" % locals() # APPEND HTMIDs TO THE ps1_pointings_subdisks TABLE add_htm_ids_to_mysql_database_table( raColName="raDeg", declColName="decDeg", tableName="ps1_pointings_subdisks", dbConn=self.ligo_virgo_wavesDbConn, log=self.log, primaryIdColumnName="primaryId" ) self.log.debug( 'completed the ``populate_ps1_subdisk_table`` method') return None
def update_ned_database_table( self): """ *Use Sherlock & Neddy to query NED and update the catalogues database for previously unseen/stale PS1 footprint areas* **Return:** - None **Usage:** .. code-block:: python # UPDATE THE NED STREAM FOR NEW PS1 FOOTPRINTS from breaker import update_ps1_atlas_footprint_tables dbUpdater = update_ps1_atlas_footprint_tables( log=log, settings=settings ) dbUpdater.update_ned_database_table() """ self.log.debug('starting the ``update_ned_database_table`` method') from sherlock.update_ned_stream import update_ned_stream numDisksToConesearch = 100 rowCount = 100 while rowCount > 0: sqlQuery = u""" select primaryId, raDeg as "ra", decDeg as "dec", htm16ID from ps1_pointings_subdisks where nedQueried = 0 limit %(numDisksToConesearch)s """ % locals() rows = readquery( log=self.log, sqlQuery=sqlQuery, dbConn=self.ligo_virgo_wavesDbConn, quiet=False ) rowCount = len(rows) ids = [] ids[:] = [str(row["primaryId"]) for row in rows] ids = ",".join(ids) if rowCount > 0: print "Selecting the next %(rowCount)s subdisks areas to conesearch against NED from the `ps1_pointings_subdisks` table" % locals() else: print "NED stream is up-to-date, no queries required" % locals() update_ned_stream( log=self.log, cataloguesDbConn=self.cataloguesDbConn, settings=self.settings, transientsMetadataList=rows ).get() if len(ids): sqlQuery = u""" update ps1_pointings_subdisks set nedQueried = 1 where primaryId in (%(ids)s) """ % locals() writequery( log=self.log, sqlQuery=sqlQuery, dbConn=self.ligo_virgo_wavesDbConn, ) sqlQuery = u""" select count(*) as count from ps1_pointings_subdisks where nedQueried = 0 """ % locals() count = readquery( log=self.log, sqlQuery=sqlQuery, dbConn=self.ligo_virgo_wavesDbConn, quiet=False ) count = count[0]["count"] if rowCount > 0: print "NED stream updated for %(rowCount)s PS1 pointing sub-disks (%(count)s to go)" % locals() print "-----\n\n" self.log.debug('completed the ``update_ned_database_table`` method') return None
def update_gravity_event_annotations( self): """*update gravity event annotations* **Key Arguments:** # - **Return:** - None **Usage:** .. todo:: - add usage info - create a sublime snippet for usage - write a command-line tool for this method - update package tutorial with command-line tool info if needed .. code-block:: python usage code """ self.log.debug( 'completed the ````update_gravity_event_annotations`` method') from breaker.transients import annotator # CREATE THE ANNOTATION HELPER TABLES IF THEY DON"T EXIST moduleDirectory = os.path.dirname(__file__) mysql_scripts = moduleDirectory + "/resources/mysql" for db in ["ps1gw", "ps13pi", "atlas"]: directory_script_runner( log=self.log, pathToScriptDirectory=mysql_scripts, databaseName=self.settings["database settings"][db]["db"], loginPath=self.settings["database settings"][db]["loginPath"], waitForResult=True, successRule=False, failureRule=False ) for db in ["ligo_virgo_waves"]: directory_script_runner( log=self.log, pathToScriptDirectory=mysql_scripts + "/ps1_skycell_help_tables", databaseName=self.settings["database settings"][db]["db"], loginPath=self.settings["database settings"][db]["loginPath"], waitForResult=True, successRule=False, failureRule=False ) # UPDATE THE TABLE WITH THE METADATA OF EACH GRAVITY EVENT sqlQuery = "" for g in self.settings["gravitational waves"]: h = self.settings["gravitational waves"][g]["human-name"] m = self.settings["gravitational waves"][g]["mjd"] cmd = """insert ignore into tcs_gravity_events (`gracedb_id`, `gravity_event_id`, `mjd`) VALUES ("%(g)s", "%(h)s", %(m)s) on duplicate key update mjd=%(m)s;\n""" % locals( ) sqlQuery += cmd for db in [self.atlasDbConn, self.ps1gwDbConn, self.ps13piDbConn]: writequery( log=self.log, sqlQuery=sqlQuery, dbConn=db ) sqlQuery = sqlQuery.replace("tcs_gravity_events", "gravity_events") writequery( log=self.log, sqlQuery=sqlQuery, dbConn=self.ligo_virgo_wavesDbConn, ) for db in ["ps1gw", "ps13pi", "atlas"]: directory_script_runner( log=self.log, pathToScriptDirectory=mysql_scripts, databaseName=self.settings["database settings"][db]["db"], loginPath=self.settings["database settings"][db]["loginPath"], waitForResult=True, successRule=False, failureRule=False ) for db in ["ligo_virgo_waves"]: directory_script_runner( log=self.log, pathToScriptDirectory=mysql_scripts + "/ps1_skycell_help_tables", databaseName=self.settings["database settings"][db]["db"], loginPath=self.settings["database settings"][db]["loginPath"], waitForResult=True, successRule=False, failureRule=False ) dbDict = { "ps1gw": self.ps1gwDbConn, "atlas": self.atlasDbConn, "ps13pi": self.ps13piDbConn, "ligo_virgo_waves": self.ligo_virgo_wavesDbConn } for db in dbDict.keys(): for g in self.settings["gravitational waves"]: h = self.settings["gravitational waves"][g]["human-name"] print "Annotating new transients associated with gravity event %(h)s" % locals() m = self.settings["gravitational waves"][g]["mjd"] mapPath = self.settings["gravitational waves"][g]["mapPath"] mapName = os.path.basename(mapPath) thisDbConn = dbDict[db] if thisDbConn in [self.ps1gwDbConn, self.ps13piDbConn]: sqlQuery = u""" SELECT a.transient_object_id, a.gracedb_id, t.ra_psf, t.dec_psf FROM tcs_transient_objects t, tcs_gravity_event_annotations a WHERE a.transient_object_id = t.id AND t.detection_list_id != 0 AND (a.map_name != "%(mapName)s" or a.map_name is null) AND a.gracedb_id="%(g)s"; """ % locals() rows = readquery( log=self.log, sqlQuery=sqlQuery, dbConn=thisDbConn, quiet=False ) transients = {} for r in rows: transients[r["transient_object_id"]] = ( r["ra_psf"], r["dec_psf"]) an = annotator( log=self.log, settings=self.settings, gwid=g ) transientNames, probs = an.annotate(transients) if thisDbConn in [self.atlasDbConn]: sqlQuery = u""" SELECT a.transient_object_id, a.gracedb_id, t.ra, t.dec FROM atlas_diff_objects t, tcs_gravity_event_annotations a WHERE a.transient_object_id = t.id AND t.detection_list_id != 0 AND (a.map_name != "%(mapName)s" or a.map_name is null) AND a.gracedb_id="%(g)s"; """ % locals() rows = readquery( log=self.log, sqlQuery=sqlQuery, dbConn=thisDbConn, quiet=False ) transients = {} for r in rows: transients[r["transient_object_id"]] = ( r["ra"], r["dec"]) an = annotator( log=self.log, settings=self.settings, gwid=g ) transientNames, probs = an.annotate(transients) if thisDbConn in [self.ligo_virgo_wavesDbConn]: # PANSTARRS SKYCELLS sqlQuery = u""" SELECT a.skycell_id, a.gracedb_id, t.raDeg, t.decDeg FROM ps1_skycell_map t, ps1_skycell_gravity_event_annotations a WHERE a.skycell_id = t.skycell_id AND (a.map_name != "%(mapName)s" or a.map_name is null) AND a.gracedb_id="%(g)s"; """ % locals() rows = readquery( log=self.log, sqlQuery=sqlQuery, dbConn=thisDbConn, quiet=False ) exposures = {} for r in rows: exposures[r["skycell_id"]] = ( r["raDeg"], r["decDeg"]) stats = survey_footprint( log=self.log, settings=self.settings, gwid=g ) exposureIDs, probs = stats.annotate_exposures( exposures=exposures, pointingSide=0.4 ) dataList = [] for p, t in zip(probs, exposureIDs): dataList.append({ "skycell_id": t, "prob_coverage": p, "gracedb_id": g, "map_name": mapName }) tableName = "ps1_skycell_gravity_event_annotations" dataSet = list_of_dictionaries( log=self.log, listOfDictionaries=dataList, reDatetime=re.compile('^[0-9]{4}-[0-9]{2}-[0-9]{2}T') ) # RECURSIVELY CREATE MISSING DIRECTORIES if not os.path.exists("/tmp/mysqlinsert/%(db)s" % locals()): os.makedirs("/tmp/mysqlinsert/%(db)s" % locals()) now = datetime.now() now = now.strftime("%Y%m%dt%H%M%S%f") mysqlData = dataSet.mysql( tableName=tableName, filepath="/tmp/mysqlinsert/%(db)s/%(now)s.sql" % locals(), createStatement=False) # ATLAS EXPOSURES sqlQuery = u""" SELECT atlas_object_id, gracedb_id, raDeg, decDeg FROM atlas_exposure_gravity_event_annotations WHERE (map_name != "%(mapName)s" or map_name is null) AND gracedb_id="%(g)s"; """ % locals() rows = readquery( log=self.log, sqlQuery=sqlQuery, dbConn=thisDbConn, quiet=False ) exposures = {} for r in rows: exposures[r["atlas_object_id"]] = ( r["raDeg"], r["decDeg"]) stats = survey_footprint( log=self.log, settings=self.settings, gwid=g ) exposureIDs, probs = stats.annotate_exposures( exposures=exposures, pointingSide=5.46 ) dataList = [] for p, t in zip(probs, exposureIDs): dataList.append({ "atlas_object_id": t, "prob_coverage": p, "gracedb_id": g, "map_name": mapName }) tableName = "atlas_exposure_gravity_event_annotations" dataSet = list_of_dictionaries( log=self.log, listOfDictionaries=dataList, reDatetime=re.compile('^[0-9]{4}-[0-9]{2}-[0-9]{2}T') ) # RECURSIVELY CREATE MISSING DIRECTORIES if not os.path.exists("/tmp/mysqlinsert/%(db)s" % locals()): os.makedirs("/tmp/mysqlinsert/%(db)s" % locals()) now = datetime.now() now = now.strftime("%Y%m%dt%H%M%S%f") mysqlData = dataSet.mysql( tableName=tableName, filepath="/tmp/mysqlinsert/%(db)s/%(now)s.sql" % locals(), createStatement=False) if thisDbConn not in [self.ligo_virgo_wavesDbConn]: dataList = [] for p, t in zip(probs, transientNames): dataList.append({ "transient_object_id": t, "enclosing_contour": p, "gracedb_id": g, "map_name": mapName }) tableName = "tcs_gravity_event_annotations" dataSet = list_of_dictionaries( log=self.log, listOfDictionaries=dataList, reDatetime=re.compile('^[0-9]{4}-[0-9]{2}-[0-9]{2}T') ) # RECURSIVELY CREATE MISSING DIRECTORIES if not os.path.exists("/tmp/mysqlinsert/%(db)s" % locals()): os.makedirs("/tmp/mysqlinsert/%(db)s" % locals()) now = datetime.now() now = now.strftime("%Y%m%dt%H%M%S%f") mysqlData = dataSet.mysql( tableName=tableName, filepath="/tmp/mysqlinsert/%(db)s/%(now)s.sql" % locals(), createStatement=False) for db in dbDict.keys(): directory_script_runner( log=self.log, pathToScriptDirectory="/tmp/mysqlinsert/%(db)s" % locals(), databaseName=self.settings["database settings"][db]["db"], loginPath=self.settings["database settings"][db]["loginPath"], waitForResult=True, successRule=False, failureRule=False ) self.log.debug( 'completed the ``update_gravity_event_annotations`` method') return None
def convert_dictionary_to_mysql_table(log, dictionary, dbTableName, uniqueKeyList=[], dbConn=False, createHelperTables=False, dateModified=False, returnInsertOnly=False, replace=False, batchInserts=True, reDatetime=False, skipChecks=False): """convert dictionary to mysql table **Key Arguments:** - ``log`` -- logger - ``dictionary`` -- python dictionary - ``dbConn`` -- the db connection - ``dbTableName`` -- name of the table you wish to add the data to (or create if it does not exist) - ``uniqueKeyList`` - a lists column names that need combined to create the primary key - ``createHelperTables`` -- create some helper tables with the main table, detailing original keywords etc - ``returnInsertOnly`` -- returns only the insert command (does not execute it) - ``dateModified`` -- add a modification date to the mysql table - ``replace`` -- use replace instead of mysql insert statements (useful when updates are required) - ``batchInserts`` -- if returning insert statements return separate insert commands and value tuples - ``reDatetime`` -- compiled regular expression matching datetime (passing this in cuts down on execution time as it doesn't have to be recompiled everytime during multiple iterations of ``convert_dictionary_to_mysql_table``) - ``skipChecks`` -- skip reliability checks. Less robust but a little faster. **Return:** - ``returnInsertOnly`` -- the insert statement if requested **Usage:** To add a python dictionary to a database table, creating the table and/or columns if they don't yet exist: .. code-block:: python from fundamentals.mysql import convert_dictionary_to_mysql_table dictionary = {"a newKey": "cool", "and another": "super cool", "uniquekey1": "cheese", "uniqueKey2": "burgers"} convert_dictionary_to_mysql_table( dbConn=dbConn, log=log, dictionary=dictionary, dbTableName="testing_table", uniqueKeyList=["uniquekey1", "uniqueKey2"], dateModified=False, returnInsertOnly=False, replace=True ) Or just return the insert statement with a list of value tuples, i.e. do not execute the command on the database: insertCommand, valueTuple = convert_dictionary_to_mysql_table( dbConn=dbConn, log=log, dictionary=dictionary, dbTableName="testing_table", uniqueKeyList=["uniquekey1", "uniqueKey2"], dateModified=False, returnInsertOnly=True, replace=False, batchInserts=True ) print insertCommand, valueTuple # OUT: 'INSERT IGNORE INTO `testing_table` # (a_newKey,and_another,dateCreated,uniqueKey2,uniquekey1) VALUES # (%s, %s, %s, %s, %s)', ('cool', 'super cool', # '2016-06-21T12:08:59', 'burgers', 'cheese') You can also return a list of single insert statements using ``batchInserts = False``. Using ``replace = True`` will also add instructions about how to replace duplicate entries in the database table if found: inserts = convert_dictionary_to_mysql_table( dbConn=dbConn, log=log, dictionary=dictionary, dbTableName="testing_table", uniqueKeyList=["uniquekey1", "uniqueKey2"], dateModified=False, returnInsertOnly=True, replace=True, batchInserts=False ) print inserts # OUT: INSERT INTO `testing_table` (a_newKey,and_another,dateCreated,uniqueKey2,uniquekey1) # VALUES ("cool" ,"super cool" ,"2016-09-14T13:12:08" ,"burgers" ,"cheese") # ON DUPLICATE KEY UPDATE a_newKey="cool", and_another="super # cool", dateCreated="2016-09-14T13:12:08", uniqueKey2="burgers", # uniquekey1="cheese" """ log.info('starting the ``convert_dictionary_to_mysql_table`` function') if not reDatetime: reDatetime = re.compile('^[0-9]{4}-[0-9]{2}-[0-9]{2}T') if not replace: insertVerb = "INSERT" else: insertVerb = "INSERT IGNORE" if returnInsertOnly == False: # TEST THE ARGUMENTS if str(type(dbConn).__name__) != "Connection": message = 'Please use a valid MySQL DB connection.' log.critical(message) raise TypeError(message) if not isinstance(dictionary, dict): message = 'Please make sure "dictionary" argument is a dict type.' log.critical(message) raise TypeError(message) if not isinstance(uniqueKeyList, list): message = 'Please make sure "uniqueKeyList" is a list' log.critical(message) raise TypeError(message) for i in uniqueKeyList: if i not in dictionary.keys(): message = 'Please make sure values in "uniqueKeyList" are present in the "dictionary" you are tring to convert' log.critical(message) raise ValueError(message) for k, v in dictionary.iteritems(): # log.debug('k: %s, v: %s' % (k, v,)) if isinstance(v, list) and len(v) != 2: message = 'Please make sure the list values in "dictionary" 2 items in length' log.critical("%s: in %s we have a %s (%s)" % (message, k, v, type(v))) raise ValueError(message) if isinstance(v, list): if not (isinstance(v[0], str) or isinstance(v[0], int) or isinstance(v[0], bool) or isinstance(v[0], float) or isinstance(v[0], long) or isinstance(v[0], datetime.date) or v[0] == None): message = 'Please make sure values in "dictionary" are of an appropriate value to add to the database, must be str, float, int or bool' log.critical("%s: in %s we have a %s (%s)" % (message, k, v, type(v))) raise ValueError(message) else: if not (isinstance(v, str) or isinstance(v, int) or isinstance(v, bool) or isinstance(v, float) or isinstance(v, long) or isinstance(v, unicode) or isinstance(v, datetime.date) or v == None): this = type(v) message = 'Please make sure values in "dictionary" are of an appropriate value to add to the database, must be str, float, int or bool : %(k)s is a %(this)s' % locals( ) log.critical("%s: in %s we have a %s (%s)" % (message, k, v, type(v))) raise ValueError(message) if not isinstance(createHelperTables, bool): message = 'Please make sure "createHelperTables" is a True or False' log.critical(message) raise TypeError(message) # TEST IF TABLE EXISTS if skipChecks: tableExists = table_exists.table_exists(dbConn=dbConn, log=log, dbTableName=dbTableName) else: tableExists = False # CREATE THE TABLE IF IT DOES NOT EXIST if tableExists is False: sqlQuery = """ CREATE TABLE `%(dbTableName)s` (`primaryId` bigint(20) NOT NULL AUTO_INCREMENT COMMENT 'An internal counter', `dateCreated` DATETIME NULL DEFAULT CURRENT_TIMESTAMP, `dateLastModified` DATETIME NULL DEFAULT CURRENT_TIMESTAMP, `updated` tinyint(4) DEFAULT '0', PRIMARY KEY (`primaryId`)) ENGINE=MyISAM AUTO_INCREMENT=0 DEFAULT CHARSET=latin1; """ % locals() writequery( log=log, sqlQuery=sqlQuery, dbConn=dbConn, ) qCreateColumn = '' formattedKey = '' formattedKeyList = [] myValues = [] # ADD EXTRA COLUMNS TO THE DICTIONARY todo: do I need this? if dateModified and replace == False: dictionary['dateLastModified'] = [ str(times.get_now_sql_datetime()), "date row was modified" ] dictionary['updated'] = [0, "this row has been updated"] # ITERATE THROUGH THE DICTIONARY AND GENERATE THE TABLE COLUMN WITH THE # NAME OF THE KEY, IF IT DOES NOT EXIST count = len(dictionary) i = 1 for (key, value) in dictionary.items(): if (isinstance(value, list) and value[0] is None): del dictionary[key] # SORT THE DICTIONARY BY KEY odictionary = c.OrderedDict(sorted(dictionary.items())) for (key, value) in odictionary.iteritems(): formattedKey = key.replace(" ", "_").replace("-", "_") # DEC A KEYWORD IN MYSQL - NEED TO CHANGE BEFORE INGEST if formattedKey == "dec": formattedKey = "decl" if formattedKey == "DEC": formattedKey = "DECL" formattedKeyList.extend([formattedKey]) if len(key) > 0: # CONVERT LIST AND FEEDPARSER VALUES TO YAML (SO I CAN PASS IT AS A # STRING TO MYSQL) if isinstance(value, list) and (isinstance(value[0], list)): value[0] = yaml.dump(value[0]) value[0] = str(value[0]) # REMOVE CHARACTERS THAT COLLIDE WITH MYSQL # if type(value[0]) == str or type(value[0]) == unicode: # value[0] = value[0].replace('"', """'""") # JOIN THE VALUES TOGETHER IN A LIST - EASIER TO GENERATE THE MYSQL # COMMAND LATER if isinstance(value, str): value = value.replace('\\', '\\\\') value = value.replace('"', '\\"') try: udata = value.decode("utf-8", "ignore") value = udata.encode("ascii", "ignore") except: log.error('cound not decode value %(value)s' % locals()) # log.debug('udata: %(udata)s' % locals()) if isinstance(value, unicode): value = value.replace('"', '\\"') value = value.encode("ascii", "ignore") if isinstance(value, list) and isinstance(value[0], unicode): myValues.extend(['%s' % value[0].strip()]) elif isinstance(value, list): myValues.extend(['%s' % (value[0], )]) else: myValues.extend(['%s' % (value, )]) if returnInsertOnly == False: # CHECK IF COLUMN EXISTS YET colExists = \ "SELECT * FROM information_schema.COLUMNS WHERE TABLE_SCHEMA=DATABASE() AND COLUMN_NAME='" + \ formattedKey + "'AND TABLE_NAME='" + dbTableName + """'""" try: # log.debug('checking if the column '+formattedKey+' exists # in the '+dbTableName+' table') rows = readquery( log=log, sqlQuery=colExists, dbConn=dbConn, ) except Exception as e: log.error('something went wrong' + str(e) + '\n') # IF COLUMN DOESN'T EXIT - GENERATE IT if len(rows) == 0: qCreateColumn = """ALTER TABLE `%s` ADD `%s""" % ( dbTableName, formattedKey) if not isinstance(value, list): value = [value] if reDatetime.search(str(value[0])): # log.debug('Ok - a datetime string was found') qCreateColumn += '` datetime DEFAULT NULL' elif formattedKey == 'updated_parsed' or formattedKey == 'published_parsed' or formattedKey \ == 'feedName' or formattedKey == 'title': qCreateColumn += '` varchar(100) DEFAULT NULL' elif (isinstance(value[0], str) or isinstance( value[0], unicode)) and len(value[0]) < 30: qCreateColumn += '` varchar(100) DEFAULT NULL' elif (isinstance(value[0], str) or isinstance(value[0], unicode)) and len( value[0]) >= 30 and len(value[0]) < 80: qCreateColumn += '` varchar(100) DEFAULT NULL' elif isinstance(value[0], str) or isinstance( value[0], unicode): columnLength = 450 + len(value[0]) * 2 qCreateColumn += '` varchar(' + str( columnLength) + ') DEFAULT NULL' elif isinstance(value[0], int) and abs(value[0]) <= 9: qCreateColumn += '` tinyint DEFAULT NULL' elif isinstance(value[0], int): qCreateColumn += '` int DEFAULT NULL' elif isinstance(value[0], float) or isinstance( value[0], long): qCreateColumn += '` double DEFAULT NULL' elif isinstance(value[0], bool): qCreateColumn += '` tinyint DEFAULT NULL' elif isinstance(value[0], list): qCreateColumn += '` varchar(1024) DEFAULT NULL' else: # log.debug('Do not know what format to add this key in # MySQL - removing from dictionary: %s, %s' # % (key, type(value[0]))) formattedKeyList.pop() myValues.pop() qCreateColumn = None if qCreateColumn: # ADD COMMENT TO GIVE THE ORGINAL KEYWORD IF formatted FOR # MYSQL if key is not formattedKey: qCreateColumn += " COMMENT 'original keyword: " + \ key + """'""" # CREATE THE COLUMN IF IT DOES NOT EXIST try: log.info('creating the ' + formattedKey + ' column in the ' + dbTableName + ' table') writequery(log=log, sqlQuery=qCreateColumn, dbConn=dbConn) except Exception as e: # log.debug('qCreateColumn: %s' % (qCreateColumn, # )) log.error('could not create the ' + formattedKey + ' column in the ' + dbTableName + ' table -- ' + str(e) + '\n') if returnInsertOnly == False: # GENERATE THE INDEX NAME - THEN CREATE INDEX IF IT DOES NOT YET EXIST if len(uniqueKeyList): for i in range(len(uniqueKeyList)): uniqueKeyList[i] = uniqueKeyList[i].replace(" ", "_").replace( "-", "_") if uniqueKeyList[i] == "dec": uniqueKeyList[i] = "decl" if uniqueKeyList[i] == "DEC": uniqueKeyList[i] = "DECL" indexName = uniqueKeyList[0].replace(" ", "_").replace("-", "_") for i in range(len(uniqueKeyList) - 1): indexName += '_' + uniqueKeyList[i + 1] indexName = indexName.lower().replace(" ", " ").replace(" ", "_") sqlQuery = u"""SELECT COUNT(*) FROM INFORMATION_SCHEMA.STATISTICS WHERE TABLE_SCHEMA = DATABASE() AND TABLE_NAME = '""" + \ dbTableName + """' AND INDEX_NAME = '""" + indexName + """'""" rows = readquery(log=log, sqlQuery=sqlQuery, dbConn=dbConn, quiet=False) exists = rows[0]['COUNT(*)'] # log.debug('uniqueKeyList: %s' % (uniqueKeyList,)) if exists == 0: if isinstance(uniqueKeyList, list): uniqueKeyList = ','.join(uniqueKeyList) addUniqueKey = 'ALTER TABLE `' + dbTableName + \ '` ADD unique ' + indexName + \ """ (""" + uniqueKeyList + ')' # log.debug('HERE IS THE COMMAND:'+addUniqueKey) writequery(log=log, sqlQuery=addUniqueKey, dbConn=dbConn) if returnInsertOnly == True and batchInserts == True: myKeys = '`,`'.join(formattedKeyList) valueString = ("%s, " * len(myValues))[:-2] insertCommand = insertVerb + """ INTO `""" + dbTableName + \ """` (`""" + myKeys + """`, dateCreated) VALUES (""" + \ valueString + """, NOW())""" mv = [] mv[:] = [None if m == "None" else m for m in myValues] valueTuple = tuple(mv) dup = "" if replace: dup = " ON DUPLICATE KEY UPDATE " for k, v in zip(formattedKeyList, mv): dup = """%(dup)s %(k)s=values(%(k)s),""" % locals() dup = """%(dup)s updated=1, dateLastModified=NOW()""" % locals() insertCommand = insertCommand + dup insertCommand = insertCommand.replace('\\""', '\\" "') insertCommand = insertCommand.replace('""', "null") insertCommand = insertCommand.replace('!!python/unicode:', '') insertCommand = insertCommand.replace('!!python/unicode', '') insertCommand = insertCommand.replace('"None"', 'null') return insertCommand, valueTuple # GENERATE THE INSERT COMMAND - IGNORE DUPLICATE ENTRIES myKeys = '`,`'.join(formattedKeyList) myValues = '" ,"'.join(myValues) # log.debug(myValues+" ------ PRESTRIP") # REMOVE SOME CONVERSION NOISE myValues = myValues.replace('time.struct_time', '') myValues = myValues.replace( '- !!python/object/new:feedparser.FeedParserDict', '') myValues = myValues.replace( '!!python/object/new:feedparser.FeedParserDict', '') myValues = myValues.replace('dictitems:', '') myValues = myValues.replace('dictitems', '') myValues = myValues.replace('!!python/unicode:', '') myValues = myValues.replace('!!python/unicode', '') myValues = myValues.replace('"None"', 'null') # myValues = myValues.replace('"None', 'null') if myValues[-4:] != 'null': myValues += '"' dup = "" if replace: dupValues = ('"' + myValues).split(" ,") dupKeys = formattedKeyList dup = dup + " ON DUPLICATE KEY UPDATE " for k, v in zip(dupKeys, dupValues): dup = """%(dup)s `%(k)s`=%(v)s,""" % locals() dup = """%(dup)s updated=IF(""" % locals() for k, v in zip(dupKeys, dupValues): if v == "null": dup = """%(dup)s `%(k)s` is %(v)s AND """ % locals() else: dup = """%(dup)s `%(k)s`=%(v)s AND """ % locals() dup = dup[:-5] + ", 0, 1), dateLastModified=IF(" for k, v in zip(dupKeys, dupValues): if v == "null": dup = """%(dup)s `%(k)s` is %(v)s AND """ % locals() else: dup = """%(dup)s `%(k)s`=%(v)s AND """ % locals() dup = dup[:-5] + ", dateLastModified, NOW())" # log.debug(myValues+" ------ POSTSTRIP") addValue = insertVerb + """ INTO `""" + dbTableName + \ """` (`""" + myKeys + """`, dateCreated) VALUES (\"""" + \ myValues + """, NOW()) %(dup)s """ % locals() addValue = addValue.replace('\\""', '\\" "') addValue = addValue.replace('""', "null") addValue = addValue.replace('!!python/unicode:', '') addValue = addValue.replace('!!python/unicode', '') addValue = addValue.replace('"None"', 'null') # log.debug(addValue) if returnInsertOnly == True: return addValue message = "" try: # log.debug('adding new data to the %s table; query: %s' % # (dbTableName, addValue))" writequery(log=log, sqlQuery=addValue, dbConn=dbConn) except Exception as e: log.error("could not add new data added to the table '" + dbTableName + "' : " + str(e) + '\n') log.info('completed the ``convert_dictionary_to_mysql_table`` function') return None, None
def main(arguments=None): """ *The main function used when ``find_atlas_exposure_containing_ssobject.py`` is run as a single script from the cl* """ # SETUP VARIABLES # MAKE SURE HEALPIX SMALL ENOUGH TO MATCH FOOTPRINTS CORRECTLY nside = 1024 pi = (4 * math.atan(1.0)) DEG_TO_RAD_FACTOR = pi / 180.0 RAD_TO_DEG_FACTOR = 180.0 / pi tileSide = 5.46 i = 0 outputList = [] rsyncContent = [] obscodes = {"02": "T05", "01": "T08"} # SETUP THE COMMAND-LINE UTIL SETTINGS su = tools(arguments=arguments, docString=__doc__, logLevel="WARNING", options_first=False, projectName=False) arguments, settings, log, dbConn = su.setup() # UNPACK REMAINING CL ARGUMENTS USING `EXEC` TO SETUP THE VARIABLE NAMES # AUTOMATICALLY for arg, val in arguments.iteritems(): if arg[0] == "-": varname = arg.replace("-", "") + "Flag" else: varname = arg.replace("<", "").replace(">", "") if isinstance(val, str) or isinstance(val, unicode): exec(varname + " = '%s'" % (val, )) else: exec(varname + " = %s" % (val, )) if arg == "--dbConn": dbConn = val log.debug('%s = %s' % ( varname, val, )) dbSettings = { 'host': '127.0.0.1', 'user': '******', 'tunnel': { 'remote ip': 'starbase.mp.qub.ac.uk', 'remote datbase host': 'dormammu', 'remote user': '******', 'port': 5003 }, 'password': '******', 'db': 'atlas_moving_objects' } # SETUP DATABASE CONNECTIONS dbConn = database(log=log, dbSettings=dbSettings).connect() # GRAB THE EXPOSURE LISTING for expPrefix, obscode in obscodes.iteritems(): exposureList = [] mjds = [] sqlQuery = "select * from atlas_exposures where expname like '%(expPrefix)s%%'" % locals( ) connected = 0 while connected == 0: try: rows = readquery(log=log, sqlQuery=sqlQuery, dbConn=dbConn, quiet=False) connected = 1 except: # SETUP DATABASE CONNECTIONS dbConn = database(log=log, dbSettings=dbSettings).connect() print "Can't connect to DB - try again" time.sleep(2) t = len(rows) print "There are %(t)s '%(expPrefix)s' exposures to check - hang tight" % locals( ) for row in rows: row["mjd"] = row["mjd"] + row["exp_time"] / (2. * 60 * 60 * 24) exposureList.append(row) mjds.append(row["mjd"]) results = [] batchSize = 500 total = len(mjds[1:]) batches = int(total / batchSize) start = 0 end = 0 theseBatches = [] for i in range(batches + 1): end = end + batchSize start = i * batchSize thisBatch = mjds[start:end] theseBatches.append(thisBatch) i = 0 totalLen = len(theseBatches) index = 0 for batch in theseBatches: i += 1 if index > 1: # Cursor up one line and clear line sys.stdout.write("\x1b[1A\x1b[2K") print "Requesting batch %(i)04d/%(totalLen)s from JPL" % locals() index += 1 eph = jpl_horizons_ephemeris(log=log, objectId=[ssobject], mjd=batch, obscode=obscode, verbose=False) for b in batch: match = 0 # print b for row in eph: if math.floor(row["mjd"] * 10000 + 0.01) == math.floor(b * 10000 + 0.01): match = 1 results.append(row) if match == 0: for row in eph: if math.floor(row["mjd"] * 10000) == math.floor(b * 10000): match = 1 results.append(row) if match == 0: results.append(None) this = math.floor(b * 10000 + 0.01) print "MJD %(b)s (%(this)s) is missing" % locals() for row in eph: print math.floor(row["mjd"] * 10000 + 0.00001) print "" print "Finding the exopsures containing the SS object" for e, r in zip(exposureList, results): # CALCULATE SEPARATION IN ARCSEC if not r: continue calculator = separations( log=log, ra1=r["ra_deg"], dec1=r["dec_deg"], ra2=e["raDeg"], dec2=e["decDeg"], ) angularSeparation, north, east = calculator.get() sep = float(angularSeparation) / 3600. if sep < 5.: # THE SKY-LOCATION AS A HEALPIXEL ID pinpoint = hp.ang2pix(nside, theta=r["ra_deg"], phi=r["dec_deg"], lonlat=True) decCorners = (e["decDeg"] - tileSide / 2, e["decDeg"] + tileSide / 2) corners = [] for d in decCorners: if d > 90.: d = 180. - d elif d < -90.: d = -180 - d raCorners = ( e["raDeg"] - (tileSide / 2) / np.cos(d * DEG_TO_RAD_FACTOR), e["raDeg"] + (tileSide / 2) / np.cos(d * DEG_TO_RAD_FACTOR)) for rc in raCorners: if rc > 360.: rc = 720. - rc elif rc < 0.: rc = 360. + rc corners.append(hp.ang2vec(rc, d, lonlat=True)) # NEAR THE POLES RETURN SQUARE INTO TRIANGE - ALMOST DEGENERATE pole = False for d in decCorners: if d > 87.0 or d < -87.0: pole = True if pole == True: corners = corners[1:] else: # FLIP CORNERS 3 & 4 SO HEALPY UNDERSTANDS POLYGON SHAPE corners = [corners[0], corners[1], corners[3], corners[2]] # RETURN HEALPIXELS IN EXPOSURE AREA expPixels = hp.query_polygon(nside, np.array(corners)) if pinpoint in expPixels: outputList.append({ "obs": e["expname"], "mjd": e["mjd"], "raDeg": r["ra_deg"], "decDeg": r["dec_deg"], "mag": r["apparent_mag"], "sep": sep }) thisMjd = int(math.floor(e["mjd"])) expname = e["expname"] ssobject_ = ssobject.replace(" ", "_") raStr = r["ra_deg"] decStr = r["dec_deg"] rsyncContent.append( "rsync -av [email protected]:/atlas/red/%(expPrefix)sa/%(thisMjd)s/%(expname)s.fits.fz %(ssobject_)s_atlas_exposures/" % locals()) rsyncContent.append( "touch %(ssobject_)s_atlas_exposures/%(expname)s.location" % locals()) rsyncContent.append( 'echo "_RAJ2000,_DEJ2000,OBJECT\n%(raStr)s,%(decStr)s,%(ssobject)s" > %(ssobject_)s_atlas_exposures/%(expname)s.location' % locals()) dataSet = list_of_dictionaries( log=log, listOfDictionaries=outputList, # use re.compile('^[0-9]{4}-[0-9]{2}-[0-9]{2}T') for mysql reDatetime=False) ssobject = ssobject.replace(" ", "_") csvData = dataSet.csv( filepath="./%(ssobject)s_atlas_exposure_matches.csv" % locals()) rsyncContent = ("\n").join(rsyncContent) pathToWriteFile = "./%(ssobject)s_atlas_exposure_rsync.sh" % locals() try: log.debug("attempting to open the file %s" % (pathToWriteFile, )) writeFile = codecs.open(pathToWriteFile, encoding='utf-8', mode='w') except IOError, e: message = 'could not open the file %s' % (pathToWriteFile, ) log.critical(message) raise IOError(message)
def add_htm_ids_to_mysql_database_table( raColName, declColName, tableName, dbConn, log, primaryIdColumnName="primaryId", cartesian=False, batchSize=25000, reindex=False): """*Given a database connection, a name of a table and the column names for RA and DEC, generates ID for one or more HTM level in the table* **Key Arguments:** - ``raColName`` -- ra in sexegesimal - ``declColName`` -- dec in sexegesimal - ``tableName`` -- name of table to add htmid info to - ``dbConn`` -- database hosting the above table - ``log`` -- logger - ``primaryIdColumnName`` -- the primary id for the table - ``cartesian`` -- add cartesian columns. Default *False* - ``batchSize`` -- the size of the batches of rows to add HTMIds to concurrently. Default *2500* - ``reindex`` -- reindex the entire table **Return:** - None **Usage:** .. code-block:: python from HMpTy.mysql import add_htm_ids_to_mysql_database_table add_htm_ids_to_mysql_database_table( raColName="raDeg", declColName="decDeg", tableName="my_big_star_table", dbConn=dbConn, log=log, primaryIdColumnName="primaryId", reindex=False ) """ log.info('starting the ``add_htm_ids_to_mysql_database_table`` function') # TEST TABLE EXIST sqlQuery = """show tables""" rows = readquery( log=log, sqlQuery=sqlQuery, dbConn=dbConn ) log.debug( """Checking the table %(tableName)s exists in the database""" % locals()) tableList = [] for row in rows: tableList.extend(row.values()) if tableName not in tableList: message = "The %s table does not exist in the database" % (tableName,) log.critical(message) raise IOError(message) log.debug( """Checking the RA and DEC columns exist in the %(tableName)s table""" % locals()) # TEST COLUMNS EXISTS cursor = dbConn.cursor(ms.cursors.DictCursor) sqlQuery = """SELECT * FROM %s LIMIT 1""" % (tableName,) cursor.execute(sqlQuery) rows = cursor.fetchall() desc = cursor.description existingColumns = [] for i in range(len(desc)): existingColumns.append(desc[i][0]) if (raColName not in existingColumns) or (declColName not in existingColumns): message = 'Please make sure you have got the naes of the RA and DEC columns correct' log.critical(message) raise IOError(message) if cartesian: # ACTION(S) ## htmCols = { 'htm16ID': 'BIGINT(20)', 'htm13ID': 'INT', 'htm10ID': 'INT', 'cx': 'DOUBLE', 'cy': 'DOUBLE', 'cz': 'DOUBLE' } else: htmCols = { 'htm16ID': 'BIGINT(20)', 'htm13ID': 'INT', 'htm10ID': 'INT' } # CHECK IF COLUMNS EXISTS YET - IF NOT CREATE FROM for key in htmCols.keys(): try: log.debug( 'attempting to check and generate the HTMId columns for the %s db table' % (tableName, )) colExists = \ """SELECT * FROM information_schema.COLUMNS WHERE TABLE_SCHEMA=DATABASE() AND COLUMN_NAME='%s' AND TABLE_NAME='%s'""" \ % (key, tableName) colExists = readquery( log=log, sqlQuery=colExists, dbConn=dbConn ) switch = 0 if not colExists: if switch == 0: print "Adding the HTMCircle columns to %(tableName)s" % locals() switch = 1 sqlQuery = 'ALTER TABLE ' + tableName + ' ADD ' + \ key + ' ' + htmCols[key] + ' DEFAULT NULL' writequery( log=log, sqlQuery=sqlQuery, dbConn=dbConn, ) except Exception as e: log.critical('could not check and generate the HTMId columns for the %s db table - failed with this error: %s ' % (tableName, str(e))) raise e log.debug( """Counting the number of rows still requiring HTMID information""" % locals()) if reindex: sqlQuery = u""" SELECT INDEX_NAME FROM INFORMATION_SCHEMA.STATISTICS WHERE table_schema=DATABASE() AND table_name='%(tableName)s' and COLUMN_NAME = "%(primaryIdColumnName)s"; """ % locals() keyname = readquery( log=log, sqlQuery=sqlQuery, dbConn=dbConn )[0]["INDEX_NAME"] if keyname != "PRIMARY": log.error('To reindex the entire table the primaryID you submit must be unique. "%(primaryIdColumnName)s" is not unique in table "%(tableName)s"' % locals()) return sqlQuery = """ALTER TABLE `%(tableName)s` disable keys""" % locals() writequery( log=log, sqlQuery=sqlQuery, dbConn=dbConn ) sqlQuery = """SELECT count(*) as count from `%(tableName)s`""" % locals( ) elif cartesian: # COUNT ROWS WHERE HTMIDs ARE NOT SET sqlQuery = """SELECT count(*) as count from `%(tableName)s` where htm10ID is NULL or cx is null""" % locals( ) else: # COUNT ROWS WHERE HTMIDs ARE NOT SET sqlQuery = """SELECT count(*) as count from `%(tableName)s` where htm10ID is NULL""" % locals( ) log.debug( """SQLQUERY:\n\n%(sqlQuery)s\n\n""" % locals()) rowCount = readquery( log=log, sqlQuery=sqlQuery, dbConn=dbConn, quiet=False ) totalCount = rowCount[0]["count"] # ADD HTMIDs IN BATCHES total = totalCount batches = int(total / batchSize) count = 0 lastId = False # NOW GENERATE THE HTMLIds FOR THESE ROWS for i in range(batches + 1): if total == 0: continue count += batchSize if count > batchSize: # Cursor up one line and clear line sys.stdout.write("\x1b[1A\x1b[2K") if count > totalCount: count = totalCount start = time.time() log.debug( """Selecting the next %(batchSize)s rows requiring HTMID information in the %(tableName)s table""" % locals()) if reindex: # SELECT THE ROWS WHERE THE HTMIds ARE NOT SET if lastId: sqlQuery = """SELECT `%s`, `%s`, `%s` from `%s` where `%s` > '%s' order by `%s` limit %s""" % ( primaryIdColumnName, raColName, declColName, tableName, primaryIdColumnName, lastId, primaryIdColumnName, batchSize) else: sqlQuery = """SELECT `%s`, `%s`, `%s` from `%s` order by `%s` limit %s""" % ( primaryIdColumnName, raColName, declColName, tableName, primaryIdColumnName, batchSize) elif cartesian: # SELECT THE ROWS WHERE THE HTMIds ARE NOT SET sqlQuery = """SELECT `%s`, `%s`, `%s` from `%s` where `%s` is not null and `%s` > 0 and ((htm10ID is NULL or cx is null)) limit %s""" % ( primaryIdColumnName, raColName, declColName, tableName, raColName, raColName, batchSize) else: # SELECT THE ROWS WHERE THE HTMIds ARE NOT SET sqlQuery = """SELECT `%s`, `%s`, `%s` from `%s` where `%s` is not null and `%s` > 0 and htm10ID is NULL limit %s""" % ( primaryIdColumnName, raColName, declColName, tableName, raColName, raColName, batchSize) batch = readquery( log=log, sqlQuery=sqlQuery, dbConn=dbConn ) if reindex and len(batch): lastId = batch[-1][primaryIdColumnName] log.debug( """The next %(batchSize)s rows requiring HTMID information have now been selected""" % locals()) raList = [] decList = [] pIdList = [] raList[:] = [r[raColName] for r in batch] decList[:] = [r[declColName] for r in batch] pIdList[:] = [r[primaryIdColumnName] for r in batch] from HMpTy import htm mesh16 = htm.HTM(16) mesh13 = htm.HTM(13) mesh10 = htm.HTM(10) log.debug( 'calculating htmIds for batch of %s rows in %s db table' % (batchSize, tableName, )) htm16Ids = mesh16.lookup_id(raList, decList) htm13Ids = mesh13.lookup_id(raList, decList) htm10Ids = mesh10.lookup_id(raList, decList) log.debug( 'finshed calculating htmIds for batch of %s rows in %s db table' % (batchSize, tableName, )) if cartesian: log.debug( 'calculating cartesian coordinates for batch of %s rows in %s db table' % (batchSize, tableName, )) cx = [] cy = [] cz = [] for r, d in zip(raList, decList): r = math.radians(r) d = math.radians(d) cos_dec = math.cos(d) cx.append(math.cos(r) * cos_dec) cy.append(math.sin(r) * cos_dec) cz.append(math.sin(d)) sqlQuery = "" for h16, h13, h10, pid, cxx, cyy, czz in zip(htm16Ids, htm13Ids, htm10Ids, pIdList, cx, cy, cz): sqlQuery += \ """UPDATE `%s` SET htm16ID=%s, htm13ID=%s, htm10ID=%s, cx=%s, cy=%s, cz=%s where `%s` = '%s';\n""" \ % ( tableName, h16, h13, h10, cxx, cyy, czz, primaryIdColumnName, pid ) log.debug( 'finished calculating cartesian coordinates for batch of %s rows in %s db table' % ( batchSize, tableName, )) else: log.debug('building the sqlquery') updates = [] updates[:] = ["UPDATE `%(tableName)s` SET htm16ID=%(h16)s, htm13ID=%(h13)s, htm10ID=%(h10)s where %(primaryIdColumnName)s = '%(pid)s';" % locals() for h16, h13, h10, pid in zip(htm16Ids, htm13Ids, htm10Ids, pIdList)] sqlQuery = "\n".join(updates) log.debug('finshed building the sqlquery') if len(sqlQuery): log.debug( 'starting to update the HTMIds for new objects in the %s db table' % (tableName, )) writequery( log=log, sqlQuery=sqlQuery, dbConn=dbConn, ) log.debug( 'finished updating the HTMIds for new objects in the %s db table' % (tableName, )) else: log.debug( 'no HTMIds to add to the %s db table' % (tableName, )) percent = float(count) * 100. / float(totalCount) print "%(count)s / %(totalCount)s htmIds added to %(tableName)s (%(percent)0.5f%% complete)" % locals() end = time.time() timediff = end - start timediff = timediff * 1000000. / float(batchSize) print "Update speed: %(timediff)0.2fs/1e6 rows\n" % locals() # APPLY INDEXES IF NEEDED sqlQuery = "" for index in ["htm10ID", "htm13ID", "htm16ID"]: log.debug('adding %(index)s index to %(tableName)s' % locals()) iname = "idx_" + index asqlQuery = u""" SELECT COUNT(1) IndexIsThere FROM INFORMATION_SCHEMA.STATISTICS WHERE table_schema=DATABASE() AND table_name='%(tableName)s' AND index_name='%(iname)s'; """ % locals() count = readquery( log=log, sqlQuery=asqlQuery, dbConn=dbConn )[0]["IndexIsThere"] if count == 0: if not len(sqlQuery): sqlQuery += u""" ALTER TABLE %(tableName)s ADD INDEX `%(iname)s` (`%(index)s` ASC) """ % locals() else: sqlQuery += u""", ADD INDEX `%(iname)s` (`%(index)s` ASC)""" % locals() if len(sqlQuery): writequery( log=log, sqlQuery=sqlQuery + ";", dbConn=dbConn, ) log.debug('finished adding indexes to %(tableName)s' % locals()) if reindex: print "Re-enabling keys within the '%(tableName)s' table" % locals() sqlQuery = """ALTER TABLE `%(tableName)s` enable keys""" % locals() writequery( log=log, sqlQuery=sqlQuery, dbConn=dbConn ) print "All HTMIds added to %(tableName)s" % locals() log.info('completed the ``add_htm_ids_to_mysql_database_table`` function') return None
def import_new_atlas_pointings( self, recent=False): """ *Import any new ATLAS GW pointings from the atlas3 database into the ``atlas_pointings`` table of the Ligo-Virgo Waves database* **Key Arguments:** - ``recent`` -- only sync the most recent 2 months of data (speeds things up) **Return:** - None **Usage:** .. code-block:: python # IMPORT NEW ATLAS POINTINGS FROM ATLAS DATABASE INTO # LIGO-VIRGO WAVES DATABASE from breaker import update_ps1_atlas_footprint_tables dbUpdater = update_ps1_atlas_footprint_tables( log=log, settings=settings ) dbUpdater.import_new_atlas_pointings() """ self.log.debug('starting the ``import_new_atlas_pointings`` method') if recent: mjd = mjdnow( log=self.log ).get_mjd() recent = mjd - 62 recent = " mjd_obs > %(recent)s " % locals() else: recent = "1=1" # SELECT ALL OF THE POINTING INFO REQUIRED FROM THE ps1gw DATABASE sqlQuery = u""" SELECT `dec` as `decDeg`, `exptime` as `exp_time`, `filter`, `mjd_obs` as `mjd`, `ra` as `raDeg`, if(mjd_obs<57855.0,mag5sig-0.75,mag5sig) as `limiting_magnitude`, `object` as `atlas_object_id` from atlas_metadata where %(recent)s and object like "TA%%" order by mjd_obs desc; """ % locals() rows = readquery( log=self.log, sqlQuery=sqlQuery, dbConn=self.atlasDbConn, quiet=False ) # TIDY RESULTS BEFORE IMPORT entries = list(rows) # ADD THE NEW RESULTS TO THE ps1_pointings TABLE insert_list_of_dictionaries_into_database_tables( dbConn=self.ligo_virgo_wavesDbConn, log=self.log, dictList=entries, dbTableName="atlas_pointings", uniqueKeyList=["raDeg", "decDeg", "mjd"], dateModified=False, batchSize=2500, replace=True ) # APPEND HTMIDs TO THE ps1_pointings TABLE add_htm_ids_to_mysql_database_table( raColName="raDeg", declColName="decDeg", tableName="atlas_pointings", dbConn=self.ligo_virgo_wavesDbConn, log=self.log, primaryIdColumnName="primaryId" ) print "ATLAS pointings synced between `atlas_metadata` and `altas_pointings` database tables" self.log.debug('completed the ``import_new_atlas_pointings`` method') return None
def import_new_ps1_pointings( self, recent=False): """ *Import any new PS1 GW pointings from the ps1gw database into the ``ps1_pointings`` table of the Ligo-Virgo Waves database* **Key Arguments:** - ``recent`` -- only sync the most recent 2 months of data (speeds things up) **Return:** - None **Usage:** .. code-block:: python # IMPORT NEW PS1 POINTINGS FROM PS1 GW DATABASE INTO LIGO-VIRGO # WAVES DATABASE from breaker import update_ps1_atlas_footprint_tables dbUpdater = update_ps1_atlas_footprint_tables( log=log, settings=settings ) dbUpdater.import_new_ps1_pointings() """ self.log.debug('starting the ``import_new_ps1_pointings`` method') if recent: mjd = mjdnow( log=self.log ).get_mjd() recent = mjd - 62 recent = " and mjd_obs > %(recent)s " % locals() else: recent = "" # SELECT ALL OF THE POINTING INFO REQUIRED FROM THE ps1gw DATABASE tables = ["ps1_warp_stack_diff_skycells", "ps1_stack_stack_diff_skycells"] filenameMatch = ["ws", "ss"] for t, f in zip(tables, filenameMatch): sqlQuery = u""" SELECT imageid, ppsub_input, filename, m.exptime exp_time, TRUNCATE(mjd_obs, 8) mjd, LEFT(fpa_filter, 1) AS filter, IF(deteff_counts < 200, m.zero_pt + m.deteff_magref+2.5*log(10,exptime), m.zero_pt + m.deteff_magref + m.deteff_calculated_offset+2.5*log(10,exptime)) AS limiting_mag FROM tcs_cmf_metadata m where filename like "%%.%(f)s.%%" %(recent)s """ % locals() rows = readquery( log=self.log, sqlQuery=sqlQuery, dbConn=self.ps1gwDbConn, quiet=False ) # TIDY RESULTS BEFORE IMPORT entries = [] converter = unit_conversion( log=self.log ) for row in rows: e = {} e["exp_time"] = row["exp_time"] e["mjd"] = row["mjd"] e["filter"] = row["filter"] e["ps1_exp_id"] = row["imageid"] e["limiting_mag"] = row["limiting_mag"] e["filename"] = row["filename"] e["skycell_id"] = (".").join(row["filename"].split(".")[0:5]) e["target_image"] = row["ppsub_input"] entries.append(e) # ADD THE NEW RESULTS TO THE ps1_pointings TABLE insert_list_of_dictionaries_into_database_tables( dbConn=self.ligo_virgo_wavesDbConn, log=self.log, dictList=entries, dbTableName=t, uniqueKeyList=["filename"], dateModified=False, batchSize=2500, replace=True ) print "PS1 skycells synced between `tcs_cmf_metadata` and `%(t)s` database tables" % locals() self.log.debug('completed the ``import_new_ps1_pointings`` method') return None
def _list_images_needing_cached(self, failedImage=False): """*get lists of the transientBucketIds and images needing cached for those transients* **Key Arguments** - ``failedImage`` -- second pass attempt to download alternative image for transients **Return** - ``transientBucketIds, subtractedUrls, targetUrls, referenceUrls, tripletUrls`` -- synced lists of transientBucketIds, subtracted-, target-, reference- and triplet-image urls. All lists are the same size. """ self.log.debug('starting the ``_list_images_needing_cached`` method') subtractedUrls, targetUrls, referenceUrls, tripletUrls = [], [], [], [] for imageType, v in list(self.stampFlagColumns.items()): if not v: continue imageUrl = imageType + "ImageUrl" # CREATE THE STAMP WHERE CLAUSE if not failedImage: stampWhere = v + " IS NULL " else: stampWhere = v + " = 2 " # CREATE THE SURVEY WHERE CLAUSE dbSurveyNames = "survey LIKE '%%" + \ ("%%' OR survey LIKE '%%").join(self.dbSurveyNames) + "%%'" dbSurveyNames2 = dbSurveyNames.replace("survey L", "a.survey L") # NOW GENERATE SQL TO GET THE URLS OF STAMPS NEEDING DOWNLOADED if self.survey == "useradded": sqlQuery = u""" SELECT a.transientBucketId, a.%(imageUrl)s FROM transientBucket a JOIN (SELECT MIN(magnitude) AS mag, transientBucketId FROM transientBucket WHERE magnitude IS NOT NULL AND %(imageUrl)s IS NOT NULL AND transientBucketId in (select transientBucketId from fs_user_added) AND transientBucketId IN (SELECT transientBucketId FROM pesstoObjects WHERE %(stampWhere)s and limitingMag = 0) GROUP BY transientBucketId ORDER BY transientBucketId) AS b ON a.transientBucketId = b.transientBucketId AND a.magnitude = b.mag WHERE limitingMag = 0 GROUP BY transientBucketId; """ % locals() else: sqlQuery = u""" SELECT distinct a.transientBucketId, a.%(imageUrl)s FROM transientBucket a JOIN (SELECT MIN(magnitude) AS mag, transientBucketId FROM transientBucket WHERE magnitude IS NOT NULL AND %(imageUrl)s IS NOT NULL AND transientBucketId IN (SELECT transientBucketId FROM pesstoObjects WHERE %(stampWhere)s) AND (%(dbSurveyNames)s) and limitingMag = 0 GROUP BY transientBucketId ORDER BY transientBucketId) AS b ON a.transientBucketId = b.transientBucketId AND a.magnitude = b.mag WHERE (%(dbSurveyNames2)s) and limitingMag = 0 and magnitude IS NOT NULL AND %(imageUrl)s IS NOT NULL GROUP BY a.transientBucketId; """ % locals() if failedImage: sqlQuery = sqlQuery.replace( "AND a.magnitude = b.mag", "").replace("GROUP BY a.transientBucketId;", "") rows = readquery( log=self.log, sqlQuery=sqlQuery, dbConn=self.dbConn, ) # SPLIT URLS INTO STAMP TYPES AND ORDER ALONGSIDE # TRANSIENTBUKCETIDs transientBucketIds = [] for row in rows: transientBucketIds.append(row["transientBucketId"]) if imageType == "subtracted": subtractedUrls.append(row["subtractedImageUrl"]) if imageType == "target": targetUrls.append(row["targetImageUrl"]) if imageType == "reference": referenceUrls.append(row["referenceImageUrl"]) if imageType == "triplet": tripletUrls.append(row["tripletImageUrl"]) for imageType, v in list(self.stampFlagColumns.items()): if not v: if imageType == "subtracted": subtractedUrls = [None] * len(transientBucketIds) if imageType == "target": targetUrls = [None] * len(transientBucketIds) if imageType == "reference": referenceUrls = [None] * len(transientBucketIds) if imageType == "triplet": tripletUrls = [None] * len(transientBucketIds) self.log.debug('completed the ``_list_images_needing_cached`` method') self.transientBucketIds = transientBucketIds return transientBucketIds, subtractedUrls, targetUrls, referenceUrls, tripletUrls
def connect(self): """connect to the various databases, the credientals and settings of which are found in the sherlock settings file **Return:** - ``transientsDbConn`` -- the database hosting the transient source data - ``cataloguesDbConn`` -- connection to the database hosting the contextual catalogues the transients are to be crossmatched against - ``pmDbConn`` -- connection to the PESSTO Marshall database See the class docstring for usage .. todo :: - update key arguments values and definitions with defaults - update return values and definitions - update usage examples and text - update docstring text - check sublime snippet exists - clip any useful text to docs mindmap - regenerate the docs and check redendering of this docstring """ self.log.debug('starting the ``get`` method') transientSettings = self.settings["database settings"]["transients"] catalogueSettings = self.settings["database settings"][ "static catalogues"] if "pessto marshall" in self.settings["database settings"]: marshallSettings = self.settings["database settings"][ "pessto marshall"] else: marshallSettings = False dbConns = [] for dbSettings in [ transientSettings, catalogueSettings, marshallSettings ]: port = False if dbSettings and dbSettings["tunnel"]: port = self._setup_tunnel( tunnelParameters=dbSettings["tunnel"]) if dbSettings: # SETUP A DATABASE CONNECTION FOR THE STATIC CATALOGUES host = dbSettings["host"] user = dbSettings["user"] passwd = dbSettings["password"] dbName = dbSettings["db"] thisConn = ms.connect( host=host, user=user, passwd=passwd, db=dbName, port=port, use_unicode=True, charset='utf8', client_flag=ms.constants.CLIENT.MULTI_STATEMENTS, connect_timeout=3600) thisConn.autocommit(True) dbConns.append(thisConn) else: dbConns.append(None) # CREATE A DICTIONARY OF DATABASES dbConns = { "transients": dbConns[0], "catalogues": dbConns[1], "marshall": dbConns[2] } dbVersions = {} for k, v in dbConns.iteritems(): if v: sqlQuery = u""" SELECT VERSION() as v; """ % locals() rows = readquery(log=self.log, sqlQuery=sqlQuery, dbConn=v, quiet=False) version = rows[0]['v'] dbVersions[k] = version else: dbVersions[k] = None self.log.debug('completed the ``get`` method') return dbConns, dbVersions
def _update_sdss_coverage( self): """ update sdss coverage .. todo :: - update key arguments values and definitions with defaults - update return values and definitions - update usage examples and text - update docstring text - check sublime snippet exists - clip any useful text to docs mindmap - regenerate the docs and check redendering of this docstring """ self.log.debug('starting the ``_update_sdss_coverage`` method') tableName = self.dbTableName # SELECT THE LOCATIONS NEEDING TO BE CHECKED sqlQuery = u""" select primary_ned_id, primaryID, raDeg, decDeg, sdss_coverage from %(tableName)s where sdss_coverage is null and master_row = 1 and in_ned = 1 order by dist_mpc; """ % locals() rows = readquery( log=self.log, sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, quiet=False ) totalCount = len(rows) count = 0 for row in rows: count += 1 if count > 1: # Cursor up three lines and clear sys.stdout.write("\x1b[1A\x1b[2K") sys.stdout.write("\x1b[1A\x1b[2K") sys.stdout.write("\x1b[1A\x1b[2K") if count > totalCount: count = totalCount percent = (float(count) / float(totalCount)) * 100. primaryID = row["primaryID"] raDeg = float(row["raDeg"]) decDeg = float(row["decDeg"]) primary_ned_id = row["primary_ned_id"] # SDSS CAN ONLY ACCEPT 60 QUERIES/MIN time.sleep(1.1) print "%(count)s / %(totalCount)s (%(percent)1.1f%%) NED galaxies checked for SDSS coverage" % locals() print "NED NAME: ", primary_ned_id # covered = True | False | 999 (i.e. not sure) sdss_coverage = check_coverage( log=self.log, ra=raDeg, dec=decDeg ).get() if sdss_coverage == 999: sdss_coverage_flag = "null" elif sdss_coverage == True: sdss_coverage_flag = 1 elif sdss_coverage == False: sdss_coverage_flag = 0 else: self.log.error('cound not get sdss coverage' % locals()) sys.exit(0) # UPDATE THE DATABASE FLAG sqlQuery = u""" update %(tableName)s set sdss_coverage = %(sdss_coverage_flag)s where primaryID = %(primaryID)s """ % locals() writequery( log=self.log, sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, ) self.log.debug('completed the ``_update_sdss_coverage`` method') return None