def test_table_exists_function(self): from fundamentals.mysql import writequery sqlQuery = "CREATE TABLE `testing_table` (`id` INT NOT NULL, PRIMARY KEY (`id`))" writequery( log=log, sqlQuery=sqlQuery, dbConn=dbConn, Force=False, manyValueList=False ) from fundamentals.mysql import table_exists tableName = "testing_table" this = table_exists( dbConn=dbConn, log=log, dbTableName=tableName ) print "%(tableName)s exists: %(this)s" % locals() from fundamentals.mysql import writequery sqlQuery = "DROP TABLE `testing_table`;" writequery( log=log, sqlQuery=sqlQuery, dbConn=dbConn, Force=False, manyValueList=False )
def test_writequery_function(self): from fundamentals.mysql import writequery sqlQuery = "CREATE TABLE `testing_table` (`id` INT NOT NULL, PRIMARY KEY (`id`))" writequery( log=log, sqlQuery=sqlQuery, dbConn=dbConn, Force=False, manyValueList=False )
def test_writequery_function_delete(self): from fundamentals.mysql import writequery sqlQuery = "DROP TABLE `testing_table`;" writequery( log=log, sqlQuery=sqlQuery, dbConn=dbConn, Force=False, manyValueList=False )
def test_writequery_error_force(self): from fundamentals.mysql import writequery sqlQuery = "rubbish query;" writequery( log=log, sqlQuery=sqlQuery, dbConn=dbConn, Force=True, manyValueList=False )
def test_manyvalue_insert(self): from fundamentals.mysql import writequery sqlQuery = "CREATE TABLE `testing_table` (`id` INT NOT NULL, PRIMARY KEY (`id`))" writequery( log=log, sqlQuery=sqlQuery, dbConn=dbConn, Force=False, manyValueList=False ) from fundamentals.mysql import writequery sqlQuery = """INSERT INTO testing_table (id) values (%s)""" writequery( log=log, sqlQuery=sqlQuery, dbConn=dbConn, Force=False, manyValueList=[(1,), (2,), (3,), (4,), (5,), (6,), (7,), (8,), (9,), (10,), (11,), (12,), ] )
def _update_database_for_sent_item( self, primaryId, success): """*update the database to indicate that the PDFs have been sent to kindle(s)* **Key Arguments:** - ``primaryId`` -- unique ID of database entry to update - ``success`` -- success message/number **Return:** - None """ self.log.info( 'starting the ``__update_database_for_sent_item`` method') if success == True: sqlQuery = u""" update `reading-list` set sentToKindle = 1 where primaryId = %(primaryId)s """ % locals() writequery( log=self.log, sqlQuery=sqlQuery, dbConn=self.dbConn ) elif success == 404: sqlQuery = u""" update `reading-list` set sentToKindle = -1 where primaryId = %(primaryId)s """ % locals() writequery( log=self.log, sqlQuery=sqlQuery, dbConn=self.dbConn ) self.log.info( 'completed the ``__update_database_for_sent_item`` method') return None
def update_gravity_event_annotations( self): """*update gravity event annotations* **Key Arguments:** # - **Return:** - None **Usage:** .. todo:: - add usage info - create a sublime snippet for usage - write a command-line tool for this method - update package tutorial with command-line tool info if needed .. code-block:: python usage code """ self.log.debug( 'completed the ````update_gravity_event_annotations`` method') from breaker.transients import annotator # CREATE THE ANNOTATION HELPER TABLES IF THEY DON"T EXIST moduleDirectory = os.path.dirname(__file__) mysql_scripts = moduleDirectory + "/resources/mysql" for db in ["ps1gw", "ps13pi", "atlas"]: directory_script_runner( log=self.log, pathToScriptDirectory=mysql_scripts, databaseName=self.settings["database settings"][db]["db"], loginPath=self.settings["database settings"][db]["loginPath"], waitForResult=True, successRule=False, failureRule=False ) for db in ["ligo_virgo_waves"]: directory_script_runner( log=self.log, pathToScriptDirectory=mysql_scripts + "/ps1_skycell_help_tables", databaseName=self.settings["database settings"][db]["db"], loginPath=self.settings["database settings"][db]["loginPath"], waitForResult=True, successRule=False, failureRule=False ) # UPDATE THE TABLE WITH THE METADATA OF EACH GRAVITY EVENT sqlQuery = "" for g in self.settings["gravitational waves"]: h = self.settings["gravitational waves"][g]["human-name"] m = self.settings["gravitational waves"][g]["mjd"] cmd = """insert ignore into tcs_gravity_events (`gracedb_id`, `gravity_event_id`, `mjd`) VALUES ("%(g)s", "%(h)s", %(m)s) on duplicate key update mjd=%(m)s;\n""" % locals( ) sqlQuery += cmd for db in [self.atlasDbConn, self.ps1gwDbConn, self.ps13piDbConn]: writequery( log=self.log, sqlQuery=sqlQuery, dbConn=db ) sqlQuery = sqlQuery.replace("tcs_gravity_events", "gravity_events") writequery( log=self.log, sqlQuery=sqlQuery, dbConn=self.ligo_virgo_wavesDbConn, ) for db in ["ps1gw", "ps13pi", "atlas"]: directory_script_runner( log=self.log, pathToScriptDirectory=mysql_scripts, databaseName=self.settings["database settings"][db]["db"], loginPath=self.settings["database settings"][db]["loginPath"], waitForResult=True, successRule=False, failureRule=False ) for db in ["ligo_virgo_waves"]: directory_script_runner( log=self.log, pathToScriptDirectory=mysql_scripts + "/ps1_skycell_help_tables", databaseName=self.settings["database settings"][db]["db"], loginPath=self.settings["database settings"][db]["loginPath"], waitForResult=True, successRule=False, failureRule=False ) dbDict = { "ps1gw": self.ps1gwDbConn, "atlas": self.atlasDbConn, "ps13pi": self.ps13piDbConn, "ligo_virgo_waves": self.ligo_virgo_wavesDbConn } for db in dbDict.keys(): for g in self.settings["gravitational waves"]: h = self.settings["gravitational waves"][g]["human-name"] print "Annotating new transients associated with gravity event %(h)s" % locals() m = self.settings["gravitational waves"][g]["mjd"] mapPath = self.settings["gravitational waves"][g]["mapPath"] mapName = os.path.basename(mapPath) thisDbConn = dbDict[db] if thisDbConn in [self.ps1gwDbConn, self.ps13piDbConn]: sqlQuery = u""" SELECT a.transient_object_id, a.gracedb_id, t.ra_psf, t.dec_psf FROM tcs_transient_objects t, tcs_gravity_event_annotations a WHERE a.transient_object_id = t.id AND t.detection_list_id != 0 AND (a.map_name != "%(mapName)s" or a.map_name is null) AND a.gracedb_id="%(g)s"; """ % locals() rows = readquery( log=self.log, sqlQuery=sqlQuery, dbConn=thisDbConn, quiet=False ) transients = {} for r in rows: transients[r["transient_object_id"]] = ( r["ra_psf"], r["dec_psf"]) an = annotator( log=self.log, settings=self.settings, gwid=g ) transientNames, probs = an.annotate(transients) if thisDbConn in [self.atlasDbConn]: sqlQuery = u""" SELECT a.transient_object_id, a.gracedb_id, t.ra, t.dec FROM atlas_diff_objects t, tcs_gravity_event_annotations a WHERE a.transient_object_id = t.id AND t.detection_list_id != 0 AND (a.map_name != "%(mapName)s" or a.map_name is null) AND a.gracedb_id="%(g)s"; """ % locals() rows = readquery( log=self.log, sqlQuery=sqlQuery, dbConn=thisDbConn, quiet=False ) transients = {} for r in rows: transients[r["transient_object_id"]] = ( r["ra"], r["dec"]) an = annotator( log=self.log, settings=self.settings, gwid=g ) transientNames, probs = an.annotate(transients) if thisDbConn in [self.ligo_virgo_wavesDbConn]: # PANSTARRS SKYCELLS sqlQuery = u""" SELECT a.skycell_id, a.gracedb_id, t.raDeg, t.decDeg FROM ps1_skycell_map t, ps1_skycell_gravity_event_annotations a WHERE a.skycell_id = t.skycell_id AND (a.map_name != "%(mapName)s" or a.map_name is null) AND a.gracedb_id="%(g)s"; """ % locals() rows = readquery( log=self.log, sqlQuery=sqlQuery, dbConn=thisDbConn, quiet=False ) exposures = {} for r in rows: exposures[r["skycell_id"]] = ( r["raDeg"], r["decDeg"]) stats = survey_footprint( log=self.log, settings=self.settings, gwid=g ) exposureIDs, probs = stats.annotate_exposures( exposures=exposures, pointingSide=0.4 ) dataList = [] for p, t in zip(probs, exposureIDs): dataList.append({ "skycell_id": t, "prob_coverage": p, "gracedb_id": g, "map_name": mapName }) tableName = "ps1_skycell_gravity_event_annotations" dataSet = list_of_dictionaries( log=self.log, listOfDictionaries=dataList, reDatetime=re.compile('^[0-9]{4}-[0-9]{2}-[0-9]{2}T') ) # RECURSIVELY CREATE MISSING DIRECTORIES if not os.path.exists("/tmp/mysqlinsert/%(db)s" % locals()): os.makedirs("/tmp/mysqlinsert/%(db)s" % locals()) now = datetime.now() now = now.strftime("%Y%m%dt%H%M%S%f") mysqlData = dataSet.mysql( tableName=tableName, filepath="/tmp/mysqlinsert/%(db)s/%(now)s.sql" % locals(), createStatement=False) # ATLAS EXPOSURES sqlQuery = u""" SELECT atlas_object_id, gracedb_id, raDeg, decDeg FROM atlas_exposure_gravity_event_annotations WHERE (map_name != "%(mapName)s" or map_name is null) AND gracedb_id="%(g)s"; """ % locals() rows = readquery( log=self.log, sqlQuery=sqlQuery, dbConn=thisDbConn, quiet=False ) exposures = {} for r in rows: exposures[r["atlas_object_id"]] = ( r["raDeg"], r["decDeg"]) stats = survey_footprint( log=self.log, settings=self.settings, gwid=g ) exposureIDs, probs = stats.annotate_exposures( exposures=exposures, pointingSide=5.46 ) dataList = [] for p, t in zip(probs, exposureIDs): dataList.append({ "atlas_object_id": t, "prob_coverage": p, "gracedb_id": g, "map_name": mapName }) tableName = "atlas_exposure_gravity_event_annotations" dataSet = list_of_dictionaries( log=self.log, listOfDictionaries=dataList, reDatetime=re.compile('^[0-9]{4}-[0-9]{2}-[0-9]{2}T') ) # RECURSIVELY CREATE MISSING DIRECTORIES if not os.path.exists("/tmp/mysqlinsert/%(db)s" % locals()): os.makedirs("/tmp/mysqlinsert/%(db)s" % locals()) now = datetime.now() now = now.strftime("%Y%m%dt%H%M%S%f") mysqlData = dataSet.mysql( tableName=tableName, filepath="/tmp/mysqlinsert/%(db)s/%(now)s.sql" % locals(), createStatement=False) if thisDbConn not in [self.ligo_virgo_wavesDbConn]: dataList = [] for p, t in zip(probs, transientNames): dataList.append({ "transient_object_id": t, "enclosing_contour": p, "gracedb_id": g, "map_name": mapName }) tableName = "tcs_gravity_event_annotations" dataSet = list_of_dictionaries( log=self.log, listOfDictionaries=dataList, reDatetime=re.compile('^[0-9]{4}-[0-9]{2}-[0-9]{2}T') ) # RECURSIVELY CREATE MISSING DIRECTORIES if not os.path.exists("/tmp/mysqlinsert/%(db)s" % locals()): os.makedirs("/tmp/mysqlinsert/%(db)s" % locals()) now = datetime.now() now = now.strftime("%Y%m%dt%H%M%S%f") mysqlData = dataSet.mysql( tableName=tableName, filepath="/tmp/mysqlinsert/%(db)s/%(now)s.sql" % locals(), createStatement=False) for db in dbDict.keys(): directory_script_runner( log=self.log, pathToScriptDirectory="/tmp/mysqlinsert/%(db)s" % locals(), databaseName=self.settings["database settings"][db]["db"], loginPath=self.settings["database settings"][db]["loginPath"], waitForResult=True, successRule=False, failureRule=False ) self.log.debug( 'completed the ``update_gravity_event_annotations`` method') return None
def _do_ned_namesearch_queries_and_add_resulting_metadata_to_database( self, batchCount): """*Query NED via name searcha and add result metadata to database* **Key Arguments** - ``batchCount`` - the index number of the batch sent to NED (only needed for printing to STDOUT to give user idea of progress) *Usage:* ```python numberSources = stream._do_ned_namesearch_queries_and_add_resulting_metadata_to_database(batchCount=10) ``` """ self.log.debug( 'starting the ``_do_ned_namesearch_queries_and_add_resulting_metadata_to_database`` method' ) # ASTROCALC UNIT CONVERTER OBJECT converter = unit_conversion(log=self.log) tableName = self.dbTableName # QUERY NED WITH BATCH totalCount = len(self.theseIds) print( "requesting metadata from NED for %(totalCount)s galaxies (batch %(batchCount)s)" % locals()) # QUERY THE ONLINE NED DATABASE USING NEDDY'S NAMESEARCH METHOD search = namesearch(log=self.log, names=self.theseIds, quiet=True) results = search.get() print("results returned from ned -- starting to add to database" % locals()) # CLEAN THE RETURNED DATA AND UPDATE DATABASE totalCount = len(results) count = 0 sqlQuery = "" dictList = [] for thisDict in results: thisDict["tableName"] = tableName count += 1 for k, v in list(thisDict.items()): if not v or len(v) == 0: thisDict[k] = "null" if k in ["major_diameter_arcmin", "minor_diameter_arcmin" ] and (":" in v or "?" in v or "<" in v): thisDict[k] = v.replace(":", "").replace("?", "").replace("<", "") if isinstance(v, ("".__class__, u"".__class__)) and '"' in v: thisDict[k] = v.replace('"', '\\"') if "Input name not" not in thisDict[ "input_note"] and "Same object as" not in thisDict[ "input_note"]: try: thisDict["raDeg"] = converter.ra_sexegesimal_to_decimal( ra=thisDict["ra"]) thisDict["decDeg"] = converter.dec_sexegesimal_to_decimal( dec=thisDict["dec"]) except: name = thisDict["input_name"] self.log.warning( "Could not convert the RA & DEC for the %(name)s NED source" % locals()) continue thisDict["eb_v"] = thisDict["eb-v"] thisDict["ned_name"] = thisDict["input_name"] row = {} for k in [ "redshift_quality", "redshift", "hierarchy", "object_type", "major_diameter_arcmin", "morphology", "magnitude_filter", "ned_notes", "eb_v", "raDeg", "radio_morphology", "activity_type", "minor_diameter_arcmin", "decDeg", "redshift_err", "ned_name" ]: if thisDict[k] == "null": row[k] = None else: row[k] = thisDict[k] dictList.append(row) self.add_data_to_database_table( dictList=dictList, createStatement="""SET SESSION sql_mode="";""") theseIds = ("\", \"").join(self.theseIds) sqlQuery = u""" update %(tableName)s set download_error = 1 where ned_name in ("%(theseIds)s"); """ % locals() writequery( log=self.log, sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, ) print( "%(count)s/%(totalCount)s galaxy metadata batch entries added to database" % locals()) if count < totalCount: # Cursor up one line and clear line sys.stdout.write("\x1b[1A\x1b[2K") sqlQuery = u""" update tcs_helper_catalogue_tables_info set last_updated = now() where table_name = "%(tableName)s" """ % locals() writequery( log=self.log, sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, ) self.log.debug( 'completed the ``_do_ned_namesearch_queries_and_add_resulting_metadata_to_database`` method' ) return None
def _add_dictlist_to_database_via_load_in_file(masterListIndex, log, dbTablename, dbSettings): """*load a list of dictionaries into a database table with load data infile* **Key Arguments:** - ``masterListIndex`` -- the index of the sharedList of dictionary lists to process - ``dbTablename`` -- the name of the database table to add the list to - ``dbSettings`` -- the dictionary of database settings - ``log`` -- logger **Return:** - None **Usage:** .. todo:: add usage info create a sublime snippet for usage .. code-block:: python usage code """ log.info( 'starting the ``_add_dictlist_to_database_via_load_in_file`` function') global sharedList dictList = sharedList[masterListIndex][0] count = sharedList[masterListIndex][1] if count > totalCount: count = totalCount ltotalCount = totalCount # SETUP ALL DATABASE CONNECTIONS dbConn = database(log=log, dbSettings=dbSettings).connect() now = datetime.now() tmpTable = now.strftime("tmp_%Y%m%dt%H%M%S%f") # CREATE A TEMPORY TABLE TO ADD DATA TO sqlQuery = """CREATE TEMPORARY TABLE %(tmpTable)s SELECT * FROM %(dbTablename)s WHERE 1=0;""" % locals( ) writequery(log=log, sqlQuery=sqlQuery, dbConn=dbConn) csvColumns = [k for d in dictList for k in d.keys()] csvColumns = list(set(csvColumns)) csvColumnsString = (', ').join(csvColumns) df = pd.DataFrame(dictList) df.replace(['nan', 'None', '', 'NaN', np.nan], '\\N', inplace=True) df.to_csv('/tmp/%(tmpTable)s' % locals(), sep="|", index=False, escapechar="\\", quotechar='"', columns=csvColumns) sqlQuery = """LOAD DATA LOCAL INFILE '/tmp/%(tmpTable)s' INTO TABLE %(tmpTable)s FIELDS TERMINATED BY '|' OPTIONALLY ENCLOSED BY '"' IGNORE 1 LINES (%(csvColumnsString)s);""" % locals() writequery(log=log, sqlQuery=sqlQuery, dbConn=dbConn) updateStatement = "" for i in csvColumns: updateStatement += "`%(i)s` = VALUES(`%(i)s`), " % locals() updateStatement += "dateLastModified = NOW(), updated = 1" sqlQuery = """ INSERT IGNORE INTO %(dbTablename)s SELECT * FROM %(tmpTable)s ON DUPLICATE KEY UPDATE %(updateStatement)s;""" % locals() writequery(log=log, sqlQuery=sqlQuery, dbConn=dbConn) sqlQuery = """DROP TEMPORARY TABLE %(tmpTable)s;""" % locals() writequery(log=log, sqlQuery=sqlQuery, dbConn=dbConn) try: os.remove('/tmp/%(tmpTable)s' % locals()) except: pass log.info( 'completed the ``_add_dictlist_to_database_via_load_in_file`` function' ) return None
def populate_ps1_subdisk_table( self): """ *Calculate 49 subdisks for each of the PS1 pointings (used to query NED in manageable sized batches) and add them to the ``ps1_pointings_subdisks`` table of the database* .. image:: http://i.imgur.com/y3G0aax.png :width: 600 px **Return:** - None **Usage:** .. code-block:: python # SPLIT PS1 POINTINGS INTO SUB-DISKS AND ADD TO LV DATABASE from breaker import update_ps1_atlas_footprint_tables dbUpdater = update_ps1_atlas_footprint_tables( log=log, settings=settings ) dbUpdater.populate_ps1_subdisk_table() """ self.log.debug( 'completed the ````populate_ps1_subdisk_table`` method') # SELECT THE PS1 POINTINGS NEEDING SUBDISKS CALCULATED sqlQuery = u""" select ps1_exp_id, raDeg, decDeg from ps1_pointings where subdisks_calculated = 0 and raDeg is not null """ % locals() rows = readquery( log=self.log, sqlQuery=sqlQuery, dbConn=self.ligo_virgo_wavesDbConn, quiet=False ) ps1PointNum = len(rows) # CALCULATE ALL OF THE SUBDISKS inserts = [] expIds = [] for row in rows: subDiskCoordinates = self._get_subdisk_parameters( row["raDeg"], row["decDeg"], 1.5) ps1_exp_id = row["ps1_exp_id"] expIds.append(ps1_exp_id) for i, c in enumerate(subDiskCoordinates): insert = { "raDeg": c[0], "decDeg": c[1], "ps1_exp_id": ps1_exp_id, "circleId": i + 1 } inserts.append(insert) # ADD SUBDISKS TO DATABASE if len(inserts): insert_list_of_dictionaries_into_database_tables( dbConn=self.ligo_virgo_wavesDbConn, log=self.log, dictList=inserts, dbTableName="ps1_pointings_subdisks", uniqueKeyList=["ps1_exp_id", "circleId"], dateModified=False, batchSize=2500, replace=True ) # UPDATE POINTINGS TABLE TO INDICATE SUBDISKS HAVE BEEN CALCULATED theseIds = ",".join(expIds) sqlQuery = u""" update ps1_pointings set subdisks_calculated = 1 where ps1_exp_id in (%(theseIds)s) """ % locals() writequery( log=self.log, sqlQuery=sqlQuery, dbConn=self.ligo_virgo_wavesDbConn, ) if ps1PointNum == 0: print "All PS1 pointings have been split into their 49 sub-disks" % locals() else: print "%(ps1PointNum)s new PS1 pointings have been split into 49 sub-disks - parameters added to the `ps1_pointings_subdisks` database table" % locals() # APPEND HTMIDs TO THE ps1_pointings_subdisks TABLE add_htm_ids_to_mysql_database_table( raColName="raDeg", declColName="decDeg", tableName="ps1_pointings_subdisks", dbConn=self.ligo_virgo_wavesDbConn, log=self.log, primaryIdColumnName="primaryId" ) self.log.debug( 'completed the ``populate_ps1_subdisk_table`` method') return None
def generate_atlas_lightcurves(dbConn, log, settings): """generate all atlas FP lightcurves (clipped and stacked) **Key Arguments** - ``dbConn`` -- mysql database connection - ``log`` -- logger - ``settings`` -- settings for the marshall. ```python from marshallEngine.feeders.atlas.lightcurve import generate_atlas_lightcurves generate_atlas_lightcurves( log=log, dbConn=dbConn, settings=settings ) ``` """ log.debug('starting the ``generate_atlas_lightcurves`` function') # SELECT SOURCES THAT NEED THEIR ATLAS FP LIGHTCURVES CREATED/UPDATED sqlQuery = u""" SELECT t.transientBucketId FROM transientBucket t ,pesstoObjects p WHERE p.transientBucketId=t.transientBucketId and t.survey = 'ATLAS FP' and t.limitingMag = 0 and ((p.atlas_fp_lightcurve < t.dateCreated and p.atlas_fp_lightcurve != 0) or p.atlas_fp_lightcurve is null) GROUP BY t.transientBucketId; """ rows = readquery(log=log, sqlQuery=sqlQuery, dbConn=dbConn) transientIds = [r["transientBucketId"] for r in rows] total = len(transientIds) if total > 1000: print( "ATLAS lightcurves need generated for %(total)s sources - generating next 1000" % locals()) transientIds = transientIds[:1000] total = len(transientIds) else: print("Generating ATLAS lightcurves for %(total)s sources" % locals()) # SETUP THE INITIAL FIGURE FOR THE PLOT (ONLY ONCE) fig = plt.figure(num=None, figsize=(10, 10), dpi=100, facecolor=None, edgecolor=None, frameon=True) mpl.rc('ytick', labelsize=18) mpl.rc('xtick', labelsize=18) mpl.rcParams.update({'font.size': 22}) # FORMAT THE AXES ax = fig.add_axes([0.1, 0.1, 0.8, 0.8], polar=False, frameon=True) ax.set_xlabel('MJD', labelpad=20) ax.set_yticks([2.2]) # RHS AXIS TICKS plt.setp(ax.xaxis.get_majorticklabels(), rotation=45, horizontalalignment='right') ax.xaxis.set_major_formatter(mtick.FormatStrFormatter('%5.0f')) y_formatter = mpl.ticker.FormatStrFormatter("%2.1f") ax.yaxis.set_major_formatter(y_formatter) ax.xaxis.grid(False) # ADD SECOND Y-AXIS ax2 = ax.twinx() ax2.yaxis.set_major_formatter(y_formatter) ax2.set_ylabel('Flux ($\mu$Jy)', rotation=-90., labelpad=27) ax2.grid(False) # ADD SECOND X-AXIS ax3 = ax.twiny() ax3.grid(True) plt.setp(ax3.xaxis.get_majorticklabels(), rotation=45, horizontalalignment='left') # CONVERTER TO CONVERT MJD TO DATE converter = conversions(log=log) if len(transientIds) < 3: plotPaths = [] for transientBucketId in transientIds: plotPaths.append( plot_single_result(log=log, transientBucketId=transientBucketId, fig=fig, converter=converter, ax=ax, settings=settings)) else: log.info("""starting multiprocessing""") plotPaths = fmultiprocess(log=log, function=plot_single_result, inputArray=transientIds, poolSize=False, timeout=7200, fig=fig, converter=converter, ax=ax, settings=settings) log.info("""finished multiprocessing""") # REMOVE MISSING PLOTStrn transientIdGood = [t for p, t in zip(plotPaths, transientIds) if p] transientIdBad = [t for p, t in zip(plotPaths, transientIds) if p is None] # UPDATE THE atlas_fp_lightcurve DATE FOR TRANSIENTS WE HAVE JUST # GENERATED PLOTS FOR if len(transientIdGood): transientIdGood = (",").join([str(t) for t in transientIdGood]) sqlQuery = f"""update pesstoObjects set atlas_fp_lightcurve = NOW() where transientBucketID in ({transientIdGood})""" writequery(log=log, sqlQuery=sqlQuery, dbConn=dbConn) # UPDATE THE atlas_fp_lightcurve DATE FOR TRANSIENTS WE HAVE JUST # GENERATED PLOTS FOR if len(transientIdBad): transientIdBad = (",").join([str(t) for t in transientIdBad]) sqlQuery = f"""update pesstoObjects set atlas_fp_lightcurve = 0 where transientBucketID in ({transientIdBad})""" writequery(log=log, sqlQuery=sqlQuery, dbConn=dbConn) log.debug('completed the ``generate_atlas_lightcurves`` function') return None
def plot(self): """*generate a batch of lightcurves using multiprocessing given their transientBucketIds* **Return** - ``filepath`` -- path to the last generated plot file **Usage** ```python from marshallEngine.lightcurves import marshall_lightcurves lc = marshall_lightcurves( log=log, dbConn=dbConn, settings=settings, transientBucketIds=[28421489, 28121353, 4637952, 27409808] ) lc.plot() ``` """ self.log.debug('starting the ``plot`` method') # DEFINE AN INPUT ARRAY total = len(self.transientBucketIds) thisDict = {"database settings": self.settings["database settings"]} if total: print("updating lightcurves for %(total)s transients" % locals()) print() # USE IF ISSUES IN _plot_one FUNCTION # for transientBucketId in self.transientBucketIds: # _plot_one( # transientBucketId=transientBucketId, # log=self.log, # settings=self.settings # ) results = fmultiprocess(log=self.log, function=_plot_one, inputArray=self.transientBucketIds, poolSize=False, timeout=3600, settings=self.settings) sqlQuery = "" updatedTransientBucketIds = [] for t, r in zip(self.transientBucketIds, results): if not r[0]: # LIGHTCURVE NOT GENERATED continue updatedTransientBucketIds.append(t) filepath = r[0] currentMagnitude = r[1] gradient = r[2] sqlQuery += """update transientBucketSummaries set currentMagnitudeEstimate = %(currentMagnitude)s, currentMagnitudeEstimateUpdated = NOW(), recentSlopeOfLightcurve = %(gradient)s where transientBucketId = %(t)s; """ % locals() ids = [] ids[:] = [str(i) for i in updatedTransientBucketIds] updatedTransientBucketIds = (",").join(ids) sqlQuery += "update pesstoObjects set master_pessto_lightcurve = 1 where transientBucketId in (%(updatedTransientBucketIds)s);" % locals( ) if len(updatedTransientBucketIds): writequery( log=self.log, sqlQuery=sqlQuery, dbConn=self.dbConn, ) else: filepath = False self.log.debug('completed the ``plot`` method') return filepath
def _clean_up_columns(self): """clean up columns .. todo :: - update key arguments values and definitions with defaults - update return values and definitions - update usage examples and text - update docstring text - check sublime snippet exists - clip any useful text to docs mindmap - regenerate the docs and check redendering of this docstring """ self.log.debug('starting the ``_clean_up_columns`` method') sqlQueries = [ "update tcs_helper_catalogue_tables_info set old_table_name = table_name where old_table_name is null;", "update tcs_helper_catalogue_tables_info set version_number = 'stream' where table_name like '%%stream' and version_number is null;", """update tcs_helper_catalogue_tables_info set in_ned = 0 where table_name like '%%stream' and in_ned is null;""", """update tcs_helper_catalogue_tables_info set vizier_link = 0 where table_name like '%%stream' and vizier_link is null;""", "update tcs_helper_catalogue_views_info set old_view_name = view_name where old_view_name is null;", ] for sqlQuery in sqlQueries: writequery( log=self.log, sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, ) # VIEW OBJECT TYPES sqlQuery = u""" SELECT view_name FROM tcs_helper_catalogue_views_info where legacy_view = 0 and object_type is null; """ % locals() rows = readquery(log=self.log, sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, quiet=False) for row in rows: view_name = row["view_name"] object_type = view_name.replace("tcs_view_", "").split("_")[0] sqlQuery = u""" update tcs_helper_catalogue_views_info set object_type = "%(object_type)s" where view_name = "%(view_name)s" """ % locals() writequery( log=self.log, sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, ) # MASTER TABLE ID FOR VIEWS sqlQuery = u""" SELECT view_name FROM tcs_helper_catalogue_views_info where legacy_view = 0 and table_id is null; """ % locals() rows = readquery(log=self.log, sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, quiet=False) for row in rows: view_name = row["view_name"] table_name = view_name.replace("tcs_view_", "").split("_")[1:] table_name = ("_").join(table_name) table_name = "tcs_cat_%(table_name)s" % locals() sqlQuery = u""" update tcs_helper_catalogue_views_info set table_id = (select id from tcs_helper_catalogue_tables_info where table_name = "%(table_name)s") where view_name = "%(view_name)s" """ % locals() writequery( log=self.log, sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, ) self.log.debug('completed the ``_clean_up_columns`` method') return None
def add_htm_ids_to_mysql_database_table(raColName, declColName, tableName, dbConn, log, primaryIdColumnName="primaryId", cartesian=False, batchSize=50000, reindex=False, dbSettings=False): """*Given a database connection, a name of a table and the column names for RA and DEC, generates ID for one or more HTM level in the table* **Key Arguments** - ``raColName`` -- ra in sexegesimal - ``declColName`` -- dec in sexegesimal - ``tableName`` -- name of table to add htmid info to - ``dbConn`` -- database hosting the above table - ``log`` -- logger - ``primaryIdColumnName`` -- the primary id for the table - ``cartesian`` -- add cartesian columns. Default *False* - ``batchSize`` -- the size of the batches of rows to add HTMIds to concurrently. Default *2500* - ``reindex`` -- reindex the entire table - ``dbSettings`` -- yaml settings for database **Return** - None **Usage** ```python from HMpTy.mysql import add_htm_ids_to_mysql_database_table add_htm_ids_to_mysql_database_table( raColName="raDeg", declColName="decDeg", tableName="my_big_star_table", dbConn=dbConn, log=log, primaryIdColumnName="primaryId", reindex=False ) ``` """ log.debug('starting the ``add_htm_ids_to_mysql_database_table`` function') # TEST TABLE EXIST sqlQuery = """show tables""" rows = readquery(log=log, sqlQuery=sqlQuery, dbConn=dbConn) log.debug("""Checking the table %(tableName)s exists in the database""" % locals()) tableList = [] for row in rows: tableList.append(list(row.values())[0].lower()) if tableName.lower() not in tableList: message = "The %s table does not exist in the database" % (tableName, ) log.critical(message) raise IOError(message) log.debug( """Checking the RA and DEC columns exist in the %(tableName)s table""" % locals()) # TEST COLUMNS EXISTS cursor = dbConn.cursor(ms.cursors.DictCursor) sqlQuery = """SELECT * FROM %s LIMIT 1""" % (tableName, ) cursor.execute(sqlQuery) rows = cursor.fetchall() desc = cursor.description existingColumns = [] for i in range(len(desc)): existingColumns.append(desc[i][0]) if (raColName not in existingColumns) or (declColName not in existingColumns): message = 'Please make sure you have got the naes of the RA and DEC columns correct' log.critical(message) raise IOError(message) if cartesian: # ACTION(S) ## htmCols = { 'htm16ID': 'BIGINT(20)', 'htm13ID': 'INT', 'htm10ID': 'INT', 'cx': 'DOUBLE', 'cy': 'DOUBLE', 'cz': 'DOUBLE' } else: htmCols = {'htm16ID': 'BIGINT(20)', 'htm13ID': 'INT', 'htm10ID': 'INT'} # CHECK IF COLUMNS EXISTS YET - IF NOT CREATE FROM for key in list(htmCols.keys()): try: log.debug( 'attempting to check and generate the HTMId columns for the %s db table' % (tableName, )) colExists = \ """SELECT * FROM information_schema.COLUMNS WHERE TABLE_SCHEMA=DATABASE() AND COLUMN_NAME='%s' AND TABLE_NAME='%s'""" \ % (key, tableName) colExists = readquery(log=log, sqlQuery=colExists, dbConn=dbConn) switch = 0 if not colExists: if switch == 0: print("Adding the HTMCircle columns to %(tableName)s" % locals()) switch = 1 sqlQuery = 'ALTER TABLE ' + tableName + ' ADD ' + \ key + ' ' + htmCols[key] + ' DEFAULT NULL' writequery( log=log, sqlQuery=sqlQuery, dbConn=dbConn, ) except Exception as e: log.critical( 'could not check and generate the HTMId columns for the %s db table - failed with this error: %s ' % (tableName, str(e))) raise e log.debug( """Counting the number of rows still requiring HTMID information""" % locals()) if reindex: sqlQuery = u""" SELECT INDEX_NAME FROM INFORMATION_SCHEMA.STATISTICS WHERE table_schema=DATABASE() AND table_name='%(tableName)s' and COLUMN_NAME = "%(primaryIdColumnName)s"; """ % locals() keyname = readquery(log=log, sqlQuery=sqlQuery, dbConn=dbConn)[0]["INDEX_NAME"] if keyname != "PRIMARY": log.error( 'To reindex the entire table the primaryID you submit must be unique. "%(primaryIdColumnName)s" is not unique in table "%(tableName)s"' % locals()) return sqlQuery = """ALTER TABLE `%(tableName)s` disable keys""" % locals() writequery(log=log, sqlQuery=sqlQuery, dbConn=dbConn) sqlQuery = """SELECT count(*) as count from `%(tableName)s`""" % locals( ) elif cartesian: # COUNT ROWS WHERE HTMIDs ARE NOT SET sqlQuery = """SELECT count(*) as count from `%(tableName)s` where htm10ID is NULL or cx is null and %(raColName)s is not null""" % locals( ) else: # COUNT ROWS WHERE HTMIDs ARE NOT SET sqlQuery = """SELECT count(*) as count from `%(tableName)s` where htm10ID is NULL and %(raColName)s is not null""" % locals( ) log.debug("""SQLQUERY:\n\n%(sqlQuery)s\n\n""" % locals()) rowCount = readquery(log=log, sqlQuery=sqlQuery, dbConn=dbConn, quiet=False) totalCount = rowCount[0]["count"] # ADD HTMIDs IN BATCHES total = totalCount batches = int(old_div(total, batchSize)) count = 0 lastId = False # NOW GENERATE THE HTMLIds FOR THESE ROWS for i in range(batches + 1): if total == 0: continue count += batchSize if count > batchSize: # Cursor up one line and clear line sys.stdout.write("\x1b[1A\x1b[2K") if count > totalCount: count = totalCount start = time.time() log.debug( """Selecting the next %(batchSize)s rows requiring HTMID information in the %(tableName)s table""" % locals()) if reindex: # SELECT THE ROWS WHERE THE HTMIds ARE NOT SET if lastId: sqlQuery = """SELECT `%s`, `%s`, `%s` from `%s` where `%s` > '%s' order by `%s` limit %s""" % ( primaryIdColumnName, raColName, declColName, tableName, primaryIdColumnName, lastId, primaryIdColumnName, batchSize) else: sqlQuery = """SELECT `%s`, `%s`, `%s` from `%s` order by `%s` limit %s""" % ( primaryIdColumnName, raColName, declColName, tableName, primaryIdColumnName, batchSize) elif cartesian: # SELECT THE ROWS WHERE THE HTMIds ARE NOT SET sqlQuery = """SELECT `%s`, `%s`, `%s` from `%s` where `%s` is not null and `%s` >= 0 and ((htm10ID is NULL or cx is null)) limit %s""" % ( primaryIdColumnName, raColName, declColName, tableName, raColName, raColName, batchSize) else: # SELECT THE ROWS WHERE THE HTMIds ARE NOT SET sqlQuery = """SELECT `%s`, `%s`, `%s` from `%s` where `%s` is not null and `%s` >= 0 and htm10ID is NULL limit %s""" % ( primaryIdColumnName, raColName, declColName, tableName, raColName, raColName, batchSize) batch = readquery(log=log, sqlQuery=sqlQuery, dbConn=dbConn) if reindex and len(batch): lastId = batch[-1][primaryIdColumnName] log.debug( """The next %(batchSize)s rows requiring HTMID information have now been selected""" % locals()) raList = [] decList = [] pIdList = [] raList[:] = [r[raColName] for r in batch] decList[:] = [r[declColName] for r in batch] pIdList[:] = [r[primaryIdColumnName] for r in batch] from HMpTy import htm mesh16 = htm.HTM(16) mesh13 = htm.HTM(13) mesh10 = htm.HTM(10) log.debug('calculating htmIds for batch of %s rows in %s db table' % ( batchSize, tableName, )) htm16Ids = mesh16.lookup_id(raList, decList) htm13Ids = mesh13.lookup_id(raList, decList) htm10Ids = mesh10.lookup_id(raList, decList) log.debug( 'finshed calculating htmIds for batch of %s rows in %s db table' % ( batchSize, tableName, )) if cartesian: log.debug( 'calculating cartesian coordinates for batch of %s rows in %s db table' % ( batchSize, tableName, )) cx = [] cy = [] cz = [] for r, d in zip(raList, decList): r = math.radians(r) d = math.radians(d) cos_dec = math.cos(d) cx.append(math.cos(r) * cos_dec) cy.append(math.sin(r) * cos_dec) cz.append(math.sin(d)) updates = [] updates[:] = [{ "htm16ID": int(h16), "htm13ID": int(h13), "htm10ID": int(h10), primaryIdColumnName: pid, "cx": float(ccx), "cy": float(ccy), "cz": float(ccz) } for h16, h13, h10, pid, ccx, ccy, ccz in zip( htm16Ids, htm13Ids, htm10Ids, pIdList, cx, cy, cz)] log.debug( 'finished calculating cartesian coordinates for batch of %s rows in %s db table' % ( batchSize, tableName, )) else: log.debug('building the sqlquery') updates = [] # updates[:] = ["UPDATE `%(tableName)s` SET htm16ID=%(h16)s, htm13ID=%(h13)s, htm10ID=%(h10)s where %(primaryIdColumnName)s = '%(pid)s';" % locals() for h16, # h13, h10, pid in zip(htm16Ids, htm13Ids, htm10Ids, pIdList)] updates[:] = [{ "htm16ID": int(h16), "htm13ID": int(h13), "htm10ID": int(h10), primaryIdColumnName: pid } for h16, h13, h10, pid in zip( htm16Ids, htm13Ids, htm10Ids, pIdList)] log.debug('finshed building the sqlquery') if len(updates): log.debug( 'starting to update the HTMIds for new objects in the %s db table' % (tableName, )) # USE dbSettings & dbConn TO ACTIVATE MULTIPROCESSING insert_list_of_dictionaries_into_database_tables( dbConn=dbConn, log=log, dictList=updates, dbTableName=tableName, uniqueKeyList=[], dateModified=False, batchSize=20000, replace=True, dbSettings=dbSettings, dateCreated=False) # writequery( # log=log, # sqlQuery=sqlQuery, # dbConn=dbConn, # ) log.debug( 'finished updating the HTMIds for new objects in the %s db table' % (tableName, )) else: log.debug('no HTMIds to add to the %s db table' % (tableName, )) percent = float(count) * 100. / float(totalCount) print( "%(count)s / %(totalCount)s htmIds added to %(tableName)s (%(percent)0.5f%% complete)" % locals()) end = time.time() timediff = end - start timediff = timediff * 1000000. / float(batchSize) print("Update speed: %(timediff)0.2fs/1e6 rows\n" % locals()) # APPLY INDEXES IF NEEDED sqlQuery = "" for index in ["htm10ID", "htm13ID", "htm16ID"]: log.debug('adding %(index)s index to %(tableName)s' % locals()) iname = "idx_" + index asqlQuery = u""" SELECT COUNT(1) IndexIsThere FROM INFORMATION_SCHEMA.STATISTICS WHERE table_schema=DATABASE() AND table_name='%(tableName)s' AND index_name='%(iname)s'; """ % locals() count = readquery(log=log, sqlQuery=asqlQuery, dbConn=dbConn)[0]["IndexIsThere"] if count == 0: if not len(sqlQuery): sqlQuery += u""" ALTER TABLE %(tableName)s ADD INDEX `%(iname)s` (`%(index)s` ASC) """ % locals() else: sqlQuery += u""", ADD INDEX `%(iname)s` (`%(index)s` ASC)""" % locals( ) if len(sqlQuery): writequery( log=log, sqlQuery=sqlQuery + ";", dbConn=dbConn, ) log.debug('finished adding indexes to %(tableName)s' % locals()) if reindex: print("Re-enabling keys within the '%(tableName)s' table" % locals()) sqlQuery = """ALTER TABLE `%(tableName)s` enable keys""" % locals() writequery(log=log, sqlQuery=sqlQuery, dbConn=dbConn) print("All HTMIds added to %(tableName)s" % locals()) log.debug('completed the ``add_htm_ids_to_mysql_database_table`` function') return None
def get(self, days): """ *download a cache of ATLAS nights data* **Key Arguments:** - ``days`` -- the number of days data to cache locally **Return:** - None **Usage:** See class docstring """ self.log.info('starting the ``get`` method') self._remove_processed_data() archivePath = self.settings["atlas archive path"] self._update_day_tracker_table() mjds = self._determine_mjds_to_download(days=days) if len(mjds) == 0: return dbConn = self.atlasMoversDBConn # DOWNLOAD THE DATA IN PARALLEL results = fmultiprocess(log=self.log, function=_download_one_night_of_atlas_data, timeout=3600, inputArray=mjds, archivePath=archivePath) global dbSettings dbSettings = self.settings["database settings"]["atlasMovers"] for d in results: if d and len(d[0]): insert_list_of_dictionaries_into_database_tables( dbConn=dbConn, log=self.log, dictList=d[0], dbTableName="atlas_exposures", dateModified=True, batchSize=10000, replace=True, dbSettings=dbSettings) # UPDATE BOOKKEEPING mjds = [] mjds[:] = [r[1] for r in results if (r and r[1] is not None)] mjds = (',').join(mjds) if len(mjds): sqlQuery = """update atlas_exposures set local_data = 1 where floor(mjd) in (%(mjds)s); update day_tracker set processed = 1 where mjd in (%(mjds)s);""" % locals( ) writequery( log=self.log, sqlQuery=sqlQuery, dbConn=self.atlasMoversDBConn, ) bk = bookkeeper(log=self.log, settings=self.settings, fullUpdate=False) bk.clean_all() self.log.info('completed the ``get`` method') return None
def _updated_row_counts_in_tcs_helper_catalogue_tables_info(self): """ updated row counts in tcs catalogue tables .. todo :: - update key arguments values and definitions with defaults - update return values and definitions - update usage examples and text - update docstring text - check sublime snippet exists - clip any useful text to docs mindmap - regenerate the docs and check redendering of this docstring """ self.log.debug( 'starting the ``_updated_row_counts_in_tcs_helper_catalogue_tables_info`` method' ) sqlQuery = u""" select * from tcs_helper_catalogue_tables_info where table_name like "%%stream" or (number_of_rows is null and legacy_table = 0) """ % locals() rows = readquery(log=self.log, sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, quiet=False) for row in rows: tbName = row["table_name"] sqlQuery = u""" update tcs_helper_catalogue_tables_info set number_of_rows = (select count(*) as count from %(tbName)s) where table_name = "%(tbName)s" """ % locals() writequery( log=self.log, sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, ) sqlQuery = u""" select * from tcs_helper_catalogue_views_info where (number_of_rows is null and legacy_view = 0) """ % locals() rows = readquery(log=self.log, sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, quiet=False) for row in rows: tbName = row["view_name"] print tbName sqlQuery = u""" update tcs_helper_catalogue_views_info set number_of_rows = (select count(*) as count from %(tbName)s) where view_name = "%(tbName)s" """ % locals() writequery( log=self.log, sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, ) self.log.debug( 'completed the ``_updated_row_counts_in_tcs_helper_catalogue_tables_info`` method' ) return None
raise IOError(message) fitsDict = {} for l in thisData.split("\n"): kw = l.split("=")[0].strip() if kw in fitskw.keys() and kw not in fitsDict.keys(): fitsDict[fitskw[kw]] = l.split("=")[1].split( "/")[0].strip().replace("'", "") if len(fitsDict) == 7: allData.append(fitsDict) sqlQuery = """ update atlas_exposures set dev_flag = 1 where dev_flag = 0 and floor(mjd) in (select mjd from day_tracker where dev_flag = 1);""" % locals( ) writequery(log=log, sqlQuery=sqlQuery, dbConn=atlasMoversDBConn) return (allData, str(int(mjd))) class download(): """ *The worker class for the download module* **Key Arguments:** - ``log`` -- logger - ``settings`` -- the settings dictionary **Usage:** To setup your logger, settings and database connections, please use the ``fundamentals`` package (`see tutorial here <http://fundamentals.readthedocs.io/en/latest/#tutorial>`_).
def _create_tcs_help_tables( self): """* create tcs help tables* **Key Arguments** # - **Return** - None **Usage** ```python usage code ``` --- ```eval_rst .. todo:: - add usage info - create a sublime snippet for usage - write a command-line tool for this method - update package tutorial with command-line tool info if needed ``` """ self.log.debug('starting the ``_create_tcs_help_tables`` method') sqlQuery = """ CREATE TABLE IF NOT EXISTS `tcs_helper_catalogue_tables_info` ( `id` smallint(5) unsigned NOT NULL, `table_name` varchar(100) COLLATE utf8_unicode_ci DEFAULT NULL, `description` varchar(60) COLLATE utf8_unicode_ci DEFAULT NULL, `url` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL, `number_of_rows` bigint(20) DEFAULT NULL, `reference_url` varchar(200) COLLATE utf8_unicode_ci DEFAULT NULL, `reference_text` varchar(70) COLLATE utf8_unicode_ci DEFAULT NULL, `notes` text COLLATE utf8_unicode_ci, `vizier_link` varchar(200) COLLATE utf8_unicode_ci DEFAULT NULL, `in_ned` tinyint(4) DEFAULT NULL, `object_types` varchar(100) COLLATE utf8_unicode_ci DEFAULT NULL, `version_number` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL, `last_updated` datetime DEFAULT NULL, `legacy_table` tinyint(4) DEFAULT '0', `old_table_name` varchar(100) COLLATE utf8_unicode_ci DEFAULT NULL, `raColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL, `decColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL, `catalogue_object_subtypeColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL, `catalogue_object_idColName` varchar(100) COLLATE utf8_unicode_ci DEFAULT NULL, `zColName` varchar(100) COLLATE utf8_unicode_ci DEFAULT NULL, `distanceColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL, `object_type_accuracy` tinyint(2) DEFAULT NULL, `semiMajorColName` varchar(100) COLLATE utf8_unicode_ci DEFAULT NULL, `semiMajorToArcsec` float DEFAULT NULL, `transientStream` tinyint(4) DEFAULT '0', `photoZColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL, `photoZErrColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL, `UColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL, `UErrColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL, `BColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL, `BErrColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL, `VColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL, `VErrColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL, `RColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL, `RErrColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL, `IColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL, `IErrColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL, `JColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL, `JErrColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL, `HColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL, `HErrColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL, `KColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL, `KErrColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL, `_uColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL, `_uErrColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL, `_gColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL, `_gErrColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL, `_rColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL, `_rErrColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL, `_iColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL, `_iErrColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL, `_zColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL, `_zErrColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL, `_yColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL, `_yErrColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL, `unkMagColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL, `unkMagErrColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL, `GColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL, `GErrColName` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL, PRIMARY KEY (`id`) ) ENGINE=MyISAM DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; """ writequery( log=self.log, sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn ) sqlQuery = """ CREATE TABLE IF NOT EXISTS `tcs_helper_catalogue_views_info` ( `id` smallint(5) unsigned NOT NULL AUTO_INCREMENT, `view_name` varchar(100) COLLATE utf8_unicode_ci DEFAULT NULL, `number_of_rows` bigint(20) DEFAULT NULL, `object_type` varchar(100) COLLATE utf8_unicode_ci DEFAULT NULL, `legacy_view` tinyint(4) DEFAULT '0', `old_view_name` varchar(100) COLLATE utf8_unicode_ci DEFAULT NULL, `table_id` int(11) DEFAULT NULL, PRIMARY KEY (`id`) ) ENGINE=MyISAM AUTO_INCREMENT=50 DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; """ writequery( log=self.log, sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn ) self.log.debug('completed the ``_create_tcs_help_tables`` method') return None
def ingest(self, withinLastDays=False): """*Ingest the data into the marshall feeder survey table* **Key Arguments** - ``withinLastDays`` -- note this will be handle by the transientNamer import to the database """ self.log.debug('starting the ``ingest`` method') # UPDATE THE TNS SPECTRA TABLE WITH EXTRA INFOS from fundamentals.mysql import writequery sqlQuery = """CALL `update_tns_tables`();""" % locals() writequery(log=self.log, sqlQuery=sqlQuery, dbConn=self.dbConn) # PARSE TNS tns = search(log=self.log, discInLastDays=withinLastDays, settings=self.settings) lists = [tns.sources, tns.photometry, tns.files, tns.spectra] tableNames = [ "tns_sources", "tns_photometry", "tns_files", "tns_spectra" ] for l, t in zip(lists, tableNames): # USE dbSettings TO ACTIVATE MULTIPROCESSING - INSERT LIST OF # DICTIONARIES INTO DATABASE insert_list_of_dictionaries_into_database_tables( dbConn=self.dbConn, log=self.log, dictList=l, dbTableName=t, dateModified=True, dateCreated=True, batchSize=2500, replace=True, dbSettings=self.settings["database settings"]) # INSERT THE SOURCES TABLE self.insert_into_transientBucket() # NOW THE SPECTRA TABLE self.fsTableName = "tns_spectra" self.survey = "tns" self.insert_into_transientBucket(importUnmatched=False) # NOW THE PHOTOMETRY TABLE self.fsTableName = "tns_photometry" self.survey = "tns" self.insert_into_transientBucket(importUnmatched=False) # ALSO MATCH NEW ASTRONOTES sqlQuery = """CALL sync_marshall_feeder_survey_transientBucketId('astronotes_transients');""" % locals( ) writequery(log=self.log, sqlQuery=sqlQuery, dbConn=self.dbConn) # CLEAN UP TASKS TO MAKE THE TICKET UPDATE self.clean_up() self.log.debug('completed the ``ingest`` method') return None
def _feeder_survey_transientbucket_crossmatch(self): """*crossmatch remaining unique, unmatched sources in feeder survey with sources in the transientbucket & copy matched feeder survey rows to the transientbucket* **Return** - ``unmatched`` -- a list of the unmatched (i.e. new to the marshall) feeder survey surveys """ self.log.debug( 'starting the ``_feeder_survey_transientbucket_crossmatch`` method' ) fsTableName = self.fsTableName # GET THE COLUMN MAP FOR THE FEEDER SURVEY TABLE sqlQuery = u""" SELECT * FROM marshall_fs_column_map where fs_table_name = '%(fsTableName)s' and transientBucket_column in ('name','raDeg','decDeg','limitingMag') """ % locals() rows = readquery(log=self.log, sqlQuery=sqlQuery, dbConn=self.dbConn, quiet=False) columns = {} for row in rows: columns[row["transientBucket_column"]] = row["fs_table_column"] if "raDeg" not in columns: print(f"No coordinates to match in the {fsTableName} table") return [] # BUILD QUERY TO GET UNIQUE UN-MATCHED SOURCES fs_name = columns["name"] self.fs_name = fs_name fs_ra = columns["raDeg"] fs_dec = columns["decDeg"] if 'limitingMag' in columns: fs_lim = columns["limitingMag"] limitClause = " and %(fs_lim)s = 0 " % locals() else: limitClause = "" sqlQuery = u""" select %(fs_name)s, avg(%(fs_ra)s) as %(fs_ra)s, avg(%(fs_dec)s) as %(fs_dec)s from %(fsTableName)s where ingested = 0 %(limitClause)s and %(fs_ra)s is not null and %(fs_dec)s is not null group by %(fs_name)s """ % locals() rows = readquery(log=self.log, sqlQuery=sqlQuery, dbConn=self.dbConn, quiet=False) # STOP IF NO MATCHES if not len(rows): return [] # SPLIT INTO BATCHES SO NOT TO OVERWHELM MEMORY batchSize = 200 total = len(rows) batches = int(old_div(total, batchSize)) start = 0 end = 0 theseBatches = [] for i in range(batches + 1): end = end + batchSize start = i * batchSize thisBatch = rows[start:end] theseBatches.append(thisBatch) unmatched = [] ticker = 0 for batch in theseBatches: fs_name_list = [] fs_ra_list = [] fs_dec_list = [] fs_name_list = [row[fs_name] for row in batch if row[fs_ra]] fs_ra_list = [row[fs_ra] for row in batch if row[fs_ra]] fs_dec_list = [row[fs_dec] for row in batch if row[fs_ra]] ticker += len(fs_name_list) print( "Matching %(ticker)s/%(total)s sources in the %(fsTableName)s against the transientBucket table" % locals()) # CONESEARCH TRANSIENT BUCKET FOR PRE-KNOWN SOURCES FROM OTHER # SURVEYS from HMpTy.mysql import conesearch cs = conesearch(log=self.log, dbConn=self.dbConn, tableName="transientBucket", columns="transientBucketId, name", ra=fs_ra_list, dec=fs_dec_list, radiusArcsec=3.5, separations=True, distinct=True, sqlWhere="masterIDFlag=1", closest=True) matchIndies, matches = cs.search() # CREATE SQL QUERY TO UPDATE MATCHES IN FS TABLE WITH MATCHED # TRANSIENTBUCKET IDs updates = [] originalList = matches.list originalTotal = len(originalList) print( "Adding %(originalTotal)s new %(fsTableName)s transient detections to the transientBucket table" % locals()) if originalTotal: updates = [] updates[:] = [ "update " + fsTableName + " set transientBucketId = " + str(o['transientBucketId']) + " where " + fs_name + " = '" + str(fs_name_list[m]) + "' and transientBucketId is null;" for m, o in zip(matchIndies, originalList) ] updates = ("\n").join(updates) writequery(log=self.log, sqlQuery=updates, dbConn=self.dbConn) # RETURN UNMATCHED TRANSIENTS for i, v in enumerate(fs_name_list): if i not in matchIndies: unmatched.append(v) # COPY MATCHED ROWS TO TRANSIENTBUCKET self._feeder_survey_transientbucket_name_match_and_import() self.log.debug( 'completed the ``_feeder_survey_transientbucket_crossmatch`` method' ) return unmatched
def _update_tcs_helper_catalogue_views_info_with_new_views(self): """ update tcs helper catalogue tables info with new tables .. todo :: - update key arguments values and definitions with defaults - update return values and definitions - update usage examples and text - update docstring text - check sublime snippet exists - clip any useful text to docs mindmap - regenerate the docs and check redendering of this docstring """ self.log.debug( 'starting the ``_update_tcs_helper_catalogue_views_info_with_new_views`` method' ) sqlQuery = u""" SELECT max(id) as thisId FROM tcs_helper_catalogue_views_info; """ % locals() thisId = readquery(log=self.log, sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, quiet=False) try: highestId = thisId[0]["thisId"] + 1 except: highestId = 1 sqlQuery = u""" SELECT TABLE_NAME FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_TYPE='VIEW' AND TABLE_SCHEMA like '%%catalogues%%' and TABLE_NAME like "tcs_view%%" and TABLE_NAME not like "%%helper%%"; """ % locals() tablesInDatabase = readquery(log=self.log, sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, quiet=False) sqlQuery = u""" SELECT view_name FROM tcs_helper_catalogue_views_info; """ % locals() tableList = readquery(log=self.log, sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, quiet=False) tbList = [] for tb in tableList: tbList.append(tb["view_name"]) for tb in tablesInDatabase: if tb["TABLE_NAME"] not in tbList: thisViewName = tb["TABLE_NAME"] print "`%(thisViewName)s` added to `tcs_helper_catalogue_views_info` database table" % locals( ) sqlQuery = u""" INSERT INTO tcs_helper_catalogue_views_info ( id, view_name ) VALUES ( %(highestId)s, "%(thisViewName)s" )""" % locals() writequery( log=self.log, sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, ) highestId += 1 self.log.debug( 'completed the ``_update_tcs_helper_catalogue_views_info_with_new_views`` method' ) return None
"/Users/Dave/.config/HMpTy/HMpTy.yaml", 'r') settings = yaml.load(stream) stream.close() # SETUP AND TEARDOWN FIXTURE FUNCTIONS FOR THE ENTIRE MODULE moduleDirectory = os.path.dirname(__file__) utKit = utKit(moduleDirectory) log, dbConn, pathToInputDir, pathToOutputDir = utKit.setupModule() utKit.tearDownModule() from fundamentals.mysql import writequery sqlQuery = """ALTER TABLE tcs_cat_ned_d_v10_2_0 DROP COLUMN htm16ID, DROP COLUMN htm10ID, DROP COLUMN htm13ID""" try: writequery( log=log, sqlQuery=sqlQuery, dbConn=dbConn ) except: pass class test_add_htm_ids_to_mysql_database_table(): def test_add_htm_ids_to_mysql_database_table_function(self): from HMpTy.mysql import add_htm_ids_to_mysql_database_table add_htm_ids_to_mysql_database_table( raColName="raDeg", declColName="decDeg", tableName="tcs_cat_ned_d_v10_2_0",
def _extract_phot_from_exposure(expIdIndex, log, cachePath, settings): """* extract phot from exposure* **Key Arguments:** - ``expIdIndex`` -- index of the exposure to extract the dophot photometry from. A tuple of expId and integer MJD - ``cachePath`` -- path to the cache of ATLAS data **Return:** - ``dophotRows`` -- the list of matched dophot rows """ log.info('starting the ``_extract_phot_from_exposure`` method') global exposureIds expId = exposureIds[expIdIndex] # SETUP A DATABASE CONNECTION FOR THE remote database host = settings["database settings"]["atlasMovers"]["host"] user = settings["database settings"]["atlasMovers"]["user"] passwd = settings["database settings"]["atlasMovers"]["password"] dbName = settings["database settings"]["atlasMovers"]["db"] try: sshPort = settings["database settings"]["atlasMovers"]["tunnel"][ "port"] except: sshPort = False thisConn = ms.connect(host=host, user=user, passwd=passwd, db=dbName, port=sshPort, use_unicode=True, charset='utf8', client_flag=ms.constants.CLIENT.MULTI_STATEMENTS, connect_timeout=3600) thisConn.autocommit(True) matchRadius = float(settings["dophot"]["search radius"]) dophotFilePath = cachePath + "/" + \ expId[0][:3] + "/" + str(expId[1]) + "/" + expId[0] + ".dph" # TEST THE FILE EXISTS exists = os.path.exists(dophotFilePath) expId = expId[0] if not exists: sqlQuery = """update atlas_exposures set dophot_match = 99 where expname = "%(expId)s" """ % locals( ) writequery( log=log, sqlQuery=sqlQuery, dbConn=thisConn, ) log.info( 'the dophot file %(expId)s.dph is missing from the local ATLAS data cache' % locals()) return [] try: log.debug("attempting to open the file %s" % (dophotFilePath, )) dophotFile = codecs.open(dophotFilePath, encoding='utf-8', mode='r') dophotData = dophotFile.read() dophotFile.close() except IOError, e: message = 'could not open the file %s' % (dophotFilePath, ) log.critical(message) raise IOError(message)
def get(self): """ *get the panstarrs_location_stamps object* """ self.log.debug('starting the ``get`` method') # FOR A SINGLE TRANSIENT if self.transientId: transientId = self.transientId sqlQuery = u""" select t.transientBucketId, t.raDeg,t.decDeg from pesstoObjects p, transientBucketSummaries t where p.transientBucketId = t.transientBucketId and t.transientBucketId = %(transientId)s; """ % locals() # OR THE NEXT 200 TRANSIENTS NEEDING STAMPS else: # GET NEXT 200 TRANSIENTS NEEDING PANSTARRS STAMPS sqlQuery = u""" select t.transientBucketId, t.raDeg,t.decDeg from pesstoObjects p, transientBucketSummaries t where ps1_map is null and p.transientBucketId = t.transientBucketId order by t.transientBucketId desc limit 200; """ % locals() rows = readquery(log=self.log, sqlQuery=sqlQuery, dbConn=self.dbConn) # FOR EACH TRANSIENT DOWNLOAD STAMP TO CACHE DIRECTORY downloadDirectoryPath = self.settings["downloads"][ "transient cache directory"] for row in rows: transientBucketId = row["transientBucketId"] downloadPath = "%s/%s" % (downloadDirectoryPath, transientBucketId) ra = row["raDeg"] dec = row["decDeg"] fitsPaths, jpegPaths, colorPath = downloader( log=self.log, settings=self.settings, downloadDirectory=downloadPath, fits=False, jpeg=False, arcsecSize=60, filterSet='gri', color=True, singleFilters=False, ra=ra, dec=dec, imageType="stack" # warp | stack ).get() # CHECK FOR FAILED IMAGES AND FLAG IN DATABASE if len(colorPath) == 0 or not colorPath[0]: sqlQuery = u""" update pesstoObjects set ps1_map = 0 where transientBucketId = %(transientBucketId)s """ % locals() writequery(log=self.log, sqlQuery=sqlQuery, dbConn=self.dbConn) continue source = colorPath[0] destination = downloadPath + "/ps1_map_color.jpeg" try: os.rename(source, destination) # DOWNLOAD THE COLOR IMAGE myimage = image(log=self.log, settings=self.settings, imagePath=destination, arcsecSize=60, crosshairs=True, transient=False, scale=True, invert=False, greyscale=False).get() # UPDATE DATABASE FLAG sqlQuery = u""" update pesstoObjects set ps1_map = 1 where transientBucketId = %(transientBucketId)s """ % locals() writequery(log=self.log, sqlQuery=sqlQuery, dbConn=self.dbConn) except: self.log.warning( "Could not process the image %(destination)s" % locals()) self.log.debug('completed the ``get`` method') return None
def main(arguments=None): """ *The main function used when `cl_utils.py` is run as a single script from the cl, or when installed as a cl command* """ # setup the command-line util settings su = tools(arguments=arguments, docString=__doc__, logLevel="WARNING", options_first=False, projectName="marshallEngine", defaultSettingsFile=True) arguments, settings, log, dbConn = su.setup() # tab completion for raw_input readline.set_completer_delims(' \t\n;') readline.parse_and_bind("tab: complete") readline.set_completer(tab_complete) # UNPACK REMAINING CL ARGUMENTS USING `EXEC` TO SETUP THE VARIABLE NAMES # AUTOMATICALLY a = {} for arg, val in list(arguments.items()): if arg[0] == "-": varname = arg.replace("-", "") + "Flag" else: varname = arg.replace("<", "").replace(">", "") a[varname] = val if arg == "--dbConn": dbConn = val a["dbConn"] = val log.debug('%s = %s' % ( varname, val, )) ## START LOGGING ## startTime = times.get_now_sql_datetime() log.info('--- STARTING TO RUN THE cl_utils.py AT %s' % (startTime, )) init = a["init"] clean = a["clean"] iimport = a["import"] lightcurve = a["lightcurve"] transientBucketId = a["transientBucketId"] survey = a["survey"] withInLastDay = a["withInLastDay"] settingsFlag = a["settingsFlag"] # set options interactively if user requests if "interactiveFlag" in a and a["interactiveFlag"]: # load previous settings moduleDirectory = os.path.dirname(__file__) + "/resources" pathToPickleFile = "%(moduleDirectory)s/previousSettings.p" % locals() try: with open(pathToPickleFile): pass previousSettingsExist = True except: previousSettingsExist = False previousSettings = {} if previousSettingsExist: previousSettings = pickle.load(open(pathToPickleFile, "rb")) # x-raw-input # x-boolean-raw-input # x-raw-input-with-default-value-from-previous-settings # save the most recently used requests pickleMeObjects = [] pickleMe = {} theseLocals = locals() for k in pickleMeObjects: pickleMe[k] = theseLocals[k] pickle.dump(pickleMe, open(pathToPickleFile, "wb")) if a["init"]: from os.path import expanduser home = expanduser("~") filepath = home + "/.config/marshallEngine/marshallEngine.yaml" try: cmd = """open %(filepath)s""" % locals() p = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True) except: pass try: cmd = """start %(filepath)s""" % locals() p = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True) except: pass return # CALL FUNCTIONS/OBJECTS # DEFAULT VALUES if not withInLastDay: withInLastDay = 30 # CALL FUNCTIONS/OBJECTS if clean: # RESCUE ORPHANED TRANSIENTS - NO MASTER ID FLAG print("rescuing orphaned transients") from fundamentals.mysql import writequery procedureNames = [ "update_transients_with_no_masteridflag", "insert_new_transients_into_transientbucketsummaries", "resurrect_objects", "update_sherlock_xmatch_counts", "update_inbox_auto_archiver", "update_transient_akas" ] # CALL EACH PROCEDURE for p in procedureNames: sqlQuery = "CALL `%(p)s`();" % locals() writequery( log=log, sqlQuery=sqlQuery, dbConn=dbConn, ) # UPDATE THE TRANSIENT BUCKET SUMMARY TABLE IN THE MARSHALL DATABASE from marshallEngine.housekeeping import update_transient_summaries updater = update_transient_summaries(log=log, settings=settings, dbConn=dbConn).update() if iimport: if survey.lower() == "panstarrs": from marshallEngine.feeders.panstarrs.data import data from marshallEngine.feeders.panstarrs import images if survey.lower() == "atlas": from marshallEngine.feeders.atlas.data import data from marshallEngine.feeders.atlas import images if survey.lower() == "useradded": from marshallEngine.feeders.useradded.data import data from marshallEngine.feeders.useradded import images if survey.lower() == "tns": from marshallEngine.feeders.tns.data import data from marshallEngine.feeders.tns import images if survey.lower() == "ztf": from marshallEngine.feeders.ztf.data import data from marshallEngine.feeders.ztf import images ingester = data(log=log, settings=settings, dbConn=dbConn).ingest(withinLastDays=withInLastDay) cacher = images(log=log, settings=settings, dbConn=dbConn).cache(limit=3000) from marshallEngine.services import panstarrs_location_stamps ps_stamp = panstarrs_location_stamps(log=log, settings=settings, dbConn=dbConn).get() if lightcurve: from marshallEngine.lightcurves import marshall_lightcurves lc = marshall_lightcurves(log=log, dbConn=dbConn, settings=settings, transientBucketIds=transientBucketId) filepath = lc.plot() print( "The lightcurve plot for transient %(transientBucketId)s can be found here: %(filepath)s" % locals()) if "dbConn" in locals() and dbConn: dbConn.commit() dbConn.close() ## FINISH LOGGING ## endTime = times.get_now_sql_datetime() runningTime = times.calculate_time_difference(startTime, endTime) log.info( '-- FINISHED ATTEMPT TO RUN THE cl_utils.py AT %s (RUNTIME: %s) --' % ( endTime, runningTime, )) return
# Recursively create missing directories if not os.path.exists(pathToOutputDir): os.makedirs(pathToOutputDir) # SETUP ALL DATABASE CONNECTIONS from sherlock import database db = database(log=log, settings=settings) dbConns, dbVersions = db.connect() transientsDbConn = dbConns["transients"] cataloguesDbConn = dbConns["catalogues"] try: from fundamentals.mysql import writequery sqlQuery = """drop table IF EXISTS tcs_cat_ned_stream;""" % locals() writequery(log=log, sqlQuery=sqlQuery, dbConn=cataloguesDbConn) except: pass class test_ned(unittest.TestCase): def test_ned_function(self): coordinateList = ["23.2323 -43.23434"] from sherlock.imports import ned catalogue = ned(log=log, settings=settings, coordinateList=coordinateList, radiusArcsec=300) catalogue.ingest() def test_ned_function_exception(self):
def ingest(self, withinLastDays): """*Ingest the data into the marshall feeder survey table* **Key Arguments** - ``withinLastDays`` -- within the last number of days. *Default: 50* """ self.log.debug('starting the ``ingest`` method') allLists = [] csvDicts = self.get_csv_data( url=self.settings["panstarrs urls"]["ps13pi"]["summary csv"], user=self.settings["credentials"]["ps13pi"]["username"], pwd=self.settings["credentials"]["ps13pi"]["password"]) allLists.extend( self._clean_data_pre_ingest(surveyName="ps13pi", withinLastDays=withinLastDays)) csvDicts = self.get_csv_data( url=self.settings["panstarrs urls"]["ps13pi"]["recurrence csv"], user=self.settings["credentials"]["ps13pi"]["username"], pwd=self.settings["credentials"]["ps13pi"]["password"]) allLists.extend( self._clean_data_pre_ingest(surveyName="ps13pi", withinLastDays=withinLastDays)) csvDicts = self.get_csv_data( url=self.settings["panstarrs urls"]["ps23pi"]["summary csv"], user=self.settings["credentials"]["ps23pi"]["username"], pwd=self.settings["credentials"]["ps23pi"]["password"]) allLists.extend( self._clean_data_pre_ingest(surveyName="ps23pi", withinLastDays=withinLastDays)) csvDicts = self.get_csv_data( url=self.settings["panstarrs urls"]["ps23pi"]["recurrence csv"], user=self.settings["credentials"]["ps23pi"]["username"], pwd=self.settings["credentials"]["ps23pi"]["password"]) allLists.extend( self._clean_data_pre_ingest(surveyName="ps23pi", withinLastDays=withinLastDays)) csvDicts = self.get_csv_data( url=self.settings["panstarrs urls"]["pso3"]["summary csv"], user=self.settings["credentials"]["pso3"]["username"], pwd=self.settings["credentials"]["pso3"]["password"]) allLists.extend( self._clean_data_pre_ingest(surveyName="pso3", withinLastDays=withinLastDays)) csvDicts = self.get_csv_data( url=self.settings["panstarrs urls"]["pso3"]["recurrence csv"], user=self.settings["credentials"]["pso3"]["username"], pwd=self.settings["credentials"]["pso3"]["password"]) allLists.extend( self._clean_data_pre_ingest(surveyName="pso3", withinLastDays=withinLastDays)) self.dictList = allLists self._import_to_feeder_survey_table() self.insert_into_transientBucket() # FIX ODD PANSTARRS COORDINATES sqlQuery = """update transientBucket set raDeg = raDeg+360.0 where raDeg < 0;""" % locals( ) writequery(log=self.log, sqlQuery=sqlQuery, dbConn=self.dbConn) # CLEAN UP TASKS TO MAKE THE TICKET UPDATE self.clean_up() self.log.debug('completed the ``ingest`` method') return None
def _insert_single_batch_into_database(batchIndex, log, dbTableName, uniqueKeyList, dateModified, replace, batchSize, reDatetime): """*summary of function* **Key Arguments:** - ``batchIndex`` -- the index of the batch to insert - ``dbConn`` -- mysql database connection - ``log`` -- logger **Return:** - None **Usage:** .. todo:: add usage info create a sublime snippet for usage .. code-block:: python usage code """ log.info('starting the ``_insert_single_batch_into_database`` function') global totalCount global globalDbConn global sharedList batch = sharedList[batchIndex] reDate = reDatetime if isinstance(globalDbConn, dict): # SETUP ALL DATABASE CONNECTIONS dbConn = database(log=log, dbSettings=globalDbConn, autocommit=False).connect() else: dbConn = globalDbConn count = batch[1] if count > totalCount: count = totalCount ltotalCount = totalCount inserted = False while inserted == False: if not replace: insertVerb = "INSERT IGNORE" else: insertVerb = "INSERT IGNORE" uniKeys = set().union(*(d.keys() for d in batch[0])) tmp = [] tmp[:] = [m.replace(" ", "_").replace("-", "_") for m in uniKeys] uniKeys = tmp myKeys = '`,`'.join(uniKeys) vals = [ tuple([ None if d[k] in ["None", None] else str(d[k]) for k in uniKeys ]) for d in batch[0] ] valueString = ("%s, " * len(vals[0]))[:-2] insertCommand = insertVerb + """ INTO `""" + dbTableName + \ """` (`""" + myKeys + """`, dateCreated) VALUES (""" + \ valueString + """, NOW())""" dup = "" if replace: dup = " ON DUPLICATE KEY UPDATE " for k in uniKeys: dup = """%(dup)s %(k)s=values(%(k)s),""" % locals() dup = """%(dup)s updated=1, dateLastModified=NOW()""" % locals() insertCommand = insertCommand + dup insertCommand = insertCommand.replace('\\""', '\\" "') insertCommand = insertCommand.replace('""', "null") insertCommand = insertCommand.replace('"None"', 'null') message = "" # log.debug('adding new data to the %s table; query: %s' % # (dbTableName, addValue)) try: message = writequery(log=log, sqlQuery=insertCommand, dbConn=dbConn, Force=True, manyValueList=vals) except: theseInserts = [] for aDict in batch[0]: insertCommand, valueTuple = convert_dictionary_to_mysql_table( dbConn=dbConn, log=log, dictionary=aDict, dbTableName=dbTableName, uniqueKeyList=uniqueKeyList, dateModified=dateModified, returnInsertOnly=True, replace=replace, reDatetime=reDate, skipChecks=True) theseInserts.append(valueTuple) message = "" # log.debug('adding new data to the %s table; query: %s' % # (dbTableName, addValue)) message = writequery(log=log, sqlQuery=insertCommand, dbConn=dbConn, Force=True, manyValueList=theseInserts) if message == "unknown column": for aDict in batch: convert_dictionary_to_mysql_table(dbConn=dbConn, log=log, dictionary=aDict, dbTableName=dbTableName, uniqueKeyList=uniqueKeyList, dateModified=dateModified, reDatetime=reDate, replace=replace) else: inserted = True dbConn.commit() log.info('completed the ``_insert_single_batch_into_database`` function') return "None"
pass # COPY INPUT TO OUTPUT DIR shutil.copytree(pathToInputDir, pathToOutputDir) # Recursively create missing directories if not os.path.exists(pathToOutputDir): os.makedirs(pathToOutputDir) # xt-setup-unit-testing-files-and-folders try: from fundamentals.mysql import writequery sqlQuery = """drop table IF EXISTS tcs_cat_ned_d_v1_0;""" % locals() writequery( log=log, sqlQuery=sqlQuery, dbConn=dbConn ) except: pass class test_ned_d(unittest.TestCase): def test_ned_d_function(self): from sherlock.imports import ned_d catalogue = ned_d( log=log, settings=settings, pathToDataFile=pathToInputDir + "/ned_d_test.csv",
def label_pointings_with_gw_ids( self): """ *Attempt to label the PS1 pointing with the GW IDs* The GW metadata used to associate PS1 pointings is taken from the settings file **Return:** - None **Usage:** .. code-block:: python # ATTEMPT TO LABEL PS1 POINTINGS IN DATABASE WITH A GW ID from breaker import update_ps1_atlas_footprint_tables dbUpdater = update_ps1_atlas_footprint_tables( log=log, settings=settings ) dbUpdater.label_pointings_with_gw_ids() """ self.log.debug('starting the ``label_pointings_with_gw_ids`` method') # WAVE METADATA FOUND IN SETTINGS FILE for wave in self.settings["gravitational waves"]: # UNPACK THE PLOT PARAMETERS FROM THE SETTINGS FILE centralCoordinate = self.settings["gravitational waves"][ wave]["plot"]["centralCoordinate"] raRange = float(self.settings["gravitational waves"][ wave]["plot"]["raRange"]) decRange = float(self.settings["gravitational waves"][ wave]["plot"]["decRange"]) raMax = (centralCoordinate[0] + raRange / 2.) + 5. raMin = (centralCoordinate[0] - raRange / 2.) - 5. decMax = (centralCoordinate[1] + decRange / 2.) + 5. decMin = (centralCoordinate[1] - decRange / 2.) - 5. mjdLower = self.settings["gravitational waves"][ wave]["mjd"] - 21. mjdUpper = self.settings["gravitational waves"][ wave]["mjd"] + 31 if raMin > 0. and raMax < 360.: raWhere = """(raDeg > %(raMin)s and raDeg < %(raMax)s)""" % locals( ) elif raMin < 0.: raMin2 = raMin + 360. raWhere = """((raDeg > 0. and raDeg < %(raMax)s) or raDeg > %(raMin2)s)""" % locals( ) elif raMax > 360.: raMax2 = raMax - 360. raWhere = """((raDeg > %(raMin)s and raDeg < 360.) or raDeg < %(raMax2)s)""" % locals( ) decWhere = """(decDeg > %(decMin)s and decDeg < %(decMax)s)""" % locals( ) mjdWhere = "(mjd>%(mjdLower)s and mjd<%(mjdUpper)s)" % locals() sqlQuery = u""" update ps1_pointings set gw_id = "%(wave)s" where %(raWhere)s and %(decWhere)s and %(mjdWhere)s and gw_id is null """ % locals() writequery( log=self.log, sqlQuery=sqlQuery, dbConn=self.ligo_virgo_wavesDbConn, ) sqlQuery = u""" update ps1_pointings set gw_id = CONCAT(gw_id, " %(wave)s") where %(raWhere)s and %(decWhere)s and %(mjdWhere)s and gw_id is not null and gw_id not like "%%%(wave)s%%"; """ % locals() writequery( log=self.log, sqlQuery=sqlQuery, dbConn=self.ligo_virgo_wavesDbConn, ) sqlQuery = u""" update atlas_pointings set gw_id = "%(wave)s" where %(raWhere)s and %(decWhere)s and %(mjdWhere)s and gw_id is null """ % locals() writequery( log=self.log, sqlQuery=sqlQuery, dbConn=self.ligo_virgo_wavesDbConn, ) sqlQuery = u""" update atlas_pointings set gw_id = CONCAT(gw_id, " %(wave)s") where %(raWhere)s and %(decWhere)s and %(mjdWhere)s and gw_id is not null and gw_id not like "%%%(wave)s%%"; """ % locals() writequery( log=self.log, sqlQuery=sqlQuery, dbConn=self.ligo_virgo_wavesDbConn, ) mjdWhere = mjdWhere.replace("mjd", "mjd_registered") sqlQuery = u""" update ps1_nightlogs set gw_id = "%(wave)s" where %(raWhere)s and %(decWhere)s and %(mjdWhere)s and gw_id is null and type = "OBJECT" """ % locals() writequery( log=self.log, sqlQuery=sqlQuery, dbConn=self.ligo_virgo_wavesDbConn, ) sqlQuery = u""" update ps1_nightlogs set gw_id = CONCAT(gw_id, " %(wave)s") where %(raWhere)s and %(decWhere)s and %(mjdWhere)s and gw_id is not null and type = "OBJECT" and gw_id not like "%%%(wave)s%%"; """ % locals() writequery( log=self.log, sqlQuery=sqlQuery, dbConn=self.ligo_virgo_wavesDbConn, ) sqlQuery = u""" select count(*) as count from ps1_pointings where gw_id is null; """ % locals() count = readquery( log=self.log, sqlQuery=sqlQuery, dbConn=self.ligo_virgo_wavesDbConn, quiet=False )[0]["count"] print "PS1 pointings labelled with their associated GW id" if count == 0: print " Note all pointings have been labelled with GW ID" else: print " %(count)s pointings remain unlabelled with a GW ID" % locals() self.log.debug('completed the ``label_pointings_with_gw_ids`` method') return None
def _update_sdss_coverage( self): """ update sdss coverage .. todo :: - update key arguments values and definitions with defaults - update return values and definitions - update usage examples and text - update docstring text - check sublime snippet exists - clip any useful text to docs mindmap - regenerate the docs and check redendering of this docstring """ self.log.debug('starting the ``_update_sdss_coverage`` method') tableName = self.dbTableName # SELECT THE LOCATIONS NEEDING TO BE CHECKED sqlQuery = u""" select primary_ned_id, primaryID, raDeg, decDeg, sdss_coverage from %(tableName)s where sdss_coverage is null and master_row = 1 and in_ned = 1 order by dist_mpc; """ % locals() rows = readquery( log=self.log, sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, quiet=False ) totalCount = len(rows) count = 0 for row in rows: count += 1 if count > 1: # Cursor up three lines and clear sys.stdout.write("\x1b[1A\x1b[2K") sys.stdout.write("\x1b[1A\x1b[2K") sys.stdout.write("\x1b[1A\x1b[2K") if count > totalCount: count = totalCount percent = (float(count) / float(totalCount)) * 100. primaryID = row["primaryID"] raDeg = float(row["raDeg"]) decDeg = float(row["decDeg"]) primary_ned_id = row["primary_ned_id"] # SDSS CAN ONLY ACCEPT 60 QUERIES/MIN time.sleep(1.1) print "%(count)s / %(totalCount)s (%(percent)1.1f%%) NED galaxies checked for SDSS coverage" % locals() print "NED NAME: ", primary_ned_id # covered = True | False | 999 (i.e. not sure) sdss_coverage = check_coverage( log=self.log, ra=raDeg, dec=decDeg ).get() if sdss_coverage == 999: sdss_coverage_flag = "null" elif sdss_coverage == True: sdss_coverage_flag = 1 elif sdss_coverage == False: sdss_coverage_flag = 0 else: self.log.error('cound not get sdss coverage' % locals()) sys.exit(0) # UPDATE THE DATABASE FLAG sqlQuery = u""" update %(tableName)s set sdss_coverage = %(sdss_coverage_flag)s where primaryID = %(primaryID)s """ % locals() writequery( log=self.log, sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, ) self.log.debug('completed the ``_update_sdss_coverage`` method') return None
def update_ned_database_table( self): """ *Use Sherlock & Neddy to query NED and update the catalogues database for previously unseen/stale PS1 footprint areas* **Return:** - None **Usage:** .. code-block:: python # UPDATE THE NED STREAM FOR NEW PS1 FOOTPRINTS from breaker import update_ps1_atlas_footprint_tables dbUpdater = update_ps1_atlas_footprint_tables( log=log, settings=settings ) dbUpdater.update_ned_database_table() """ self.log.debug('starting the ``update_ned_database_table`` method') from sherlock.update_ned_stream import update_ned_stream numDisksToConesearch = 100 rowCount = 100 while rowCount > 0: sqlQuery = u""" select primaryId, raDeg as "ra", decDeg as "dec", htm16ID from ps1_pointings_subdisks where nedQueried = 0 limit %(numDisksToConesearch)s """ % locals() rows = readquery( log=self.log, sqlQuery=sqlQuery, dbConn=self.ligo_virgo_wavesDbConn, quiet=False ) rowCount = len(rows) ids = [] ids[:] = [str(row["primaryId"]) for row in rows] ids = ",".join(ids) if rowCount > 0: print "Selecting the next %(rowCount)s subdisks areas to conesearch against NED from the `ps1_pointings_subdisks` table" % locals() else: print "NED stream is up-to-date, no queries required" % locals() update_ned_stream( log=self.log, cataloguesDbConn=self.cataloguesDbConn, settings=self.settings, transientsMetadataList=rows ).get() if len(ids): sqlQuery = u""" update ps1_pointings_subdisks set nedQueried = 1 where primaryId in (%(ids)s) """ % locals() writequery( log=self.log, sqlQuery=sqlQuery, dbConn=self.ligo_virgo_wavesDbConn, ) sqlQuery = u""" select count(*) as count from ps1_pointings_subdisks where nedQueried = 0 """ % locals() count = readquery( log=self.log, sqlQuery=sqlQuery, dbConn=self.ligo_virgo_wavesDbConn, quiet=False ) count = count[0]["count"] if rowCount > 0: print "NED stream updated for %(rowCount)s PS1 pointing sub-disks (%(count)s to go)" % locals() print "-----\n\n" self.log.debug('completed the ``update_ned_database_table`` method') return None
def _update_ned_query_history(self): """*Update the database helper table to give details of the ned cone searches performed* *Usage:* .. code-block:: python stream._update_ned_query_history() """ self.log.debug('starting the ``_update_ned_query_history`` method') myPid = self.myPid # ASTROCALC UNIT CONVERTER OBJECT converter = unit_conversion(log=self.log) # UPDATE THE DATABASE HELPER TABLE TO GIVE DETAILS OF THE NED CONE # SEARCHES PERFORMED dataList = [] for i, coord in enumerate(self.coordinateList): if isinstance(coord, str): ra = coord.split(" ")[0] dec = coord.split(" ")[1] elif isinstance(coord, tuple) or isinstance(coord, list): ra = coord[0] dec = coord[1] dataList.append({ "raDeg": ra, "decDeg": dec, "arcsecRadius": self.radiusArcsec }) if len(dataList) == 0: return None # CREATE TABLE IF NOT EXIST createStatement = """CREATE TABLE IF NOT EXISTS `tcs_helper_ned_query_history` ( `primaryId` bigint(20) NOT NULL AUTO_INCREMENT, `raDeg` double DEFAULT NULL, `decDeg` double DEFAULT NULL, `dateCreated` datetime DEFAULT CURRENT_TIMESTAMP, `dateLastModified` datetime DEFAULT CURRENT_TIMESTAMP, `updated` varchar(45) DEFAULT '0', `arcsecRadius` int(11) DEFAULT NULL, `dateQueried` datetime DEFAULT CURRENT_TIMESTAMP, `htm16ID` bigint(20) DEFAULT NULL, `htm13ID` int(11) DEFAULT NULL, `htm10ID` int(11) DEFAULT NULL, PRIMARY KEY (`primaryId`), KEY `idx_htm16ID` (`htm16ID`), KEY `dateQueried` (`dateQueried`), KEY `dateHtm16` (`dateQueried`,`htm16ID`), KEY `idx_htm10ID` (`htm10ID`), KEY `idx_htm13ID` (`htm13ID`) ) ENGINE=MyISAM AUTO_INCREMENT=0 DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; """ writequery(log=self.log, sqlQuery=createStatement, dbConn=self.cataloguesDbConn) # USE dbSettings TO ACTIVATE MULTIPROCESSING insert_list_of_dictionaries_into_database_tables( dbConn=self.cataloguesDbConn, log=self.log, dictList=dataList, dbTableName="tcs_helper_ned_query_history", uniqueKeyList=[], dateModified=True, batchSize=10000, replace=True, dbSettings=self.settings["database settings"]["static catalogues"]) # INDEX THE TABLE FOR LATER SEARCHES add_htm_ids_to_mysql_database_table( raColName="raDeg", declColName="decDeg", tableName="tcs_helper_ned_query_history", dbConn=self.cataloguesDbConn, log=self.log, primaryIdColumnName="primaryId") self.log.debug('completed the ``_update_ned_query_history`` method') return None
def add_data_to_database_table(self, dictList, createStatement=False): """*Import data in the list of dictionaries in the requested database table* Also adds HTMIDs and updates the sherlock-catalogue database helper table with the time-stamp of when the imported catlogue was last updated **Key Arguments:** - ``dictList`` - a list of dictionaries containing all the rows in the catalogue to be imported - ``createStatement`` - the table's mysql create statement (used to generate table if it does not yet exist in database). Default *False* **Usage:** .. code-block:: python self.add_data_to_database_table( dictList=dictList, createStatement=createStatement ) .. todo :: - Write a checklist for creating a new sherlock database importer """ self.log.debug('starting the ``add_data_to_database_table`` method') if len(dictList) == 0: return myPid = self.myPid dbTableName = self.dbTableName if createStatement: writequery( log=self.log, sqlQuery=createStatement, dbConn=self.cataloguesDbConn, ) insert_list_of_dictionaries_into_database_tables( dbConn=self.cataloguesDbConn, log=self.log, dictList=dictList, dbTableName=dbTableName, uniqueKeyList=[], dateModified=True, dateCreated=True, batchSize=10000, replace=True, dbSettings=self.settings["database settings"]["static catalogues"]) self._add_htmids_to_database_table() cleaner = database_cleaner(log=self.log, settings=self.settings) cleaner._update_tcs_helper_catalogue_tables_info_with_new_tables() self._update_database_helper_table() print """Now: - [ ] edit the `%(dbTableName)s` row in the sherlock catalogues database adding relevant column mappings, catalogue version number etc - [ ] retire any previous version of this catlogue in the database. Renaming the catalogue-table by appending `legacy_` and also change the name in the `tcs_helper_catalogue_tables_info` table - [ ] dupliate views from the previous catalogue version to point towards the new version and then delete the old views - [ ] run the command `sherlock clean [-s <pathToSettingsFile>]` to clean up helper tables - [ ] switch out the old catalogue table/views in your sherlock search algorithms in the yaml settings files - [ ] run a test batch of transients to make sure catalogue is installed as expected """ % locals() self.log.debug('completed the ``add_data_to_database_table`` method') return None
def insert_into_transientBucket(self, importUnmatched=True, updateTransientSummaries=True): """*insert objects/detections from the feeder survey table into the transientbucket* **Key Arguments** - ``importUnmatched`` -- import unmatched (new) transients into the marshall (not wanted in some circumstances) - ``updateTransientSummaries`` -- update the transient summaries and lightcurves? Can be True or False, or alternatively a specific transientBucketId This method aims to reduce crossmatching and load on the database by: 1. automatically assign the transientbucket id to feeder survey detections where the object name is found in the transientbukcet (no spatial crossmatch required). Copy matched feeder survey rows to the transientbucket. 2. crossmatch remaining unique, unmatched sources in feeder survey with sources in the transientbucket. Add associated transientBucketIds to matched feeder survey sources. Copy matched feeder survey rows to the transientbucket. 3. assign a new transientbucketid to any feeder survey source not matched in steps 1 & 2. Copy these unmatched feeder survey rows to the transientbucket as new transient detections. **Return** - None **Usage** ```python ingester.insert_into_transientBucket() ``` """ self.log.debug( 'starting the ``crossmatch_with_transientBucket`` method') fsTableName = self.fsTableName # 1. automatically assign the transientbucket id to feeder survey # detections where the object name is found in the transientbukcet (no # spatial crossmatch required). Copy matched feeder survey rows to the # transientbucket. self._feeder_survey_transientbucket_name_match_and_import() # 2. crossmatch remaining unique, unmatched sources in feeder survey # with sources in the transientbucket. Add associated # transientBucketIds to matched feeder survey sources. Copy matched # feeder survey rows to the transientbucket. from HMpTy.mysql import add_htm_ids_to_mysql_database_table add_htm_ids_to_mysql_database_table( raColName="raDeg", declColName="decDeg", tableName="transientBucket", dbConn=self.dbConn, log=self.log, primaryIdColumnName="primaryKeyId", dbSettings=self.settings["database settings"]) unmatched = self._feeder_survey_transientbucket_crossmatch() # 3. assign a new transientbucketid to any feeder survey source not # matched in steps 1 & 2. Copy these unmatched feeder survey rows to # the transientbucket as new transient detections. if importUnmatched: self._import_unmatched_feeder_survey_sources_to_transientbucket( unmatched) # UPDATE OBSERVATION DATES FROM MJDs sqlQuery = "call update_transientbucket_observation_dates()" writequery(log=self.log, sqlQuery=sqlQuery, dbConn=self.dbConn) # UPDATE THE TRANSIENT BUCKET SUMMARY TABLE IN THE MARSHALL DATABASE if updateTransientSummaries: if isinstance(updateTransientSummaries, int) and not isinstance( updateTransientSummaries, bool): transientBucketId = updateTransientSummaries else: transientBucketId = False from marshallEngine.housekeeping import update_transient_summaries updater = update_transient_summaries( log=self.log, settings=self.settings, dbConn=self.dbConn, transientBucketId=transientBucketId) updater.update() self.log.debug( 'completed the ``crossmatch_with_transientBucket`` method') return None
def convert_sqlite_to_mysql(self): """*copy the contents of the sqlite database into the mysql database* See class docstring for usage """ from fundamentals.renderer import list_of_dictionaries from fundamentals.mysql import directory_script_runner self.log.debug('starting the ``convert_sqlite_to_mysql`` method') con = lite.connect(self.pathToSqlite) con.row_factory = lite.Row cur = con.cursor() # GET ALL TABLE NAMES cur.execute("SELECT name FROM sqlite_master WHERE type='table';") tables = cur.fetchall() createStatements = [] inserts = [] for table in tables: table = table['name'] if table == "sqlite_sequence": continue # CREATE TABLE collection_books (folder_id, fingerprint, primary key(folder_id, fingerprint)); # GENEREATE THE MYSQL CREATE STATEMENTS FOR EACH TABLE cur.execute( "SELECT sql FROM sqlite_master WHERE name = '%(table)s';" % locals()) createStatement = cur.fetchone() createStatement = createStatement[0].replace('"', '`') + ";" if "DEFAULT" not in createStatement: if "primary key(" in createStatement: tmp = createStatement.split("primary key(") tmp[0] = tmp[0].replace(",", " varchar(150) DEFAULT NULL,") createStatement = ("primary key(").join(tmp) if "primary key," in createStatement: tmp = createStatement.split("primary key,") tmp[1] = tmp[1].replace(",", " varchar(150) DEFAULT NULL,") tmp[1] = tmp[1].replace(");", " varchar(150) DEFAULT NULL);") createStatement = ("primary key,").join(tmp) createStatement = createStatement.replace( "INTEGER PRIMARY KEY", "INTEGER AUTO_INCREMENT PRIMARY KEY") createStatement = createStatement.replace("AUTOINCREMENT", "AUTO_INCREMENT") createStatement = createStatement.replace("DEFAULT 't'", "DEFAULT '1'") createStatement = createStatement.replace("DEFAULT 'f'", "DEFAULT '0'") createStatement = createStatement.replace(",'t'", ",'1'") createStatement = createStatement.replace(",'f'", ",'0'") if "CREATE TABLE `" in createStatement: createStatement = createStatement.replace( "CREATE TABLE `", "CREATE TABLE IF NOT EXISTS `" + self.tablePrefix) else: createStatement = createStatement.replace( "CREATE TABLE ", "CREATE TABLE IF NOT EXISTS " + self.tablePrefix) if ", primary key(" in createStatement: createStatement = createStatement.replace( ", primary key(", """, `dateCreated` datetime DEFAULT CURRENT_TIMESTAMP, `dateLastModified` datetime DEFAULT CURRENT_TIMESTAMP, `updated` tinyint(4) DEFAULT '0', primary key(""") else: createStatement = createStatement.replace( ");", """, `dateCreated` datetime DEFAULT CURRENT_TIMESTAMP, `dateLastModified` datetime DEFAULT CURRENT_TIMESTAMP, `updated` tinyint(4) DEFAULT '0'); """) createStatement = createStatement.replace( " text primary key", " varchar(100) primary key") createStatement = createStatement.replace( "`EntryText` TEXT NOT NULL,", "`EntryText` TEXT,") createStatement = createStatement.replace( "`SelectionText` TEXT NOT NULL", "`SelectionText` TEXT") createStatement = createStatement.replace( "`Filename` INTEGER NOT NULL,", "`Filename` TEXT NOT NULL,") createStatement = createStatement.replace( "`SessionPartUUID` TEXT NOT NULL UNIQUE,", "`SessionPartUUID` VARCHAR(100) NOT NULL UNIQUE,") createStatement = createStatement.replace( "`Name` TEXT PRIMARY KEY NOT NULL", "`Name` VARCHAR(100) PRIMARY KEY NOT NULL") createStatement = createStatement.replace(" VARCHAR ", " VARCHAR(100) ") createStatement = createStatement.replace(" VARCHAR,", " VARCHAR(100),") if len(createStatement.lower().split("datecreated")) > 2: createStatement = createStatement.replace( "`dateCreated` datetime DEFAULT CURRENT_TIMESTAMP,\n", "") # GRAB THE DATA TO ADD TO THE MYSQL DATABASE TABLES cur.execute("SELECT * from '%(table)s';" % locals()) rows = cur.fetchall() allRows = [] for row in rows: allRows.append(dict(row)) # RECURSIVELY CREATE MISSING DIRECTORIES if not os.path.exists("/tmp/headjack/"): os.makedirs("/tmp/headjack/") writequery( log=self.log, sqlQuery=createStatement, dbConn=self.dbConn, ) from fundamentals.mysql import insert_list_of_dictionaries_into_database_tables # USE dbSettings TO ACTIVATE MULTIPROCESSING insert_list_of_dictionaries_into_database_tables( dbConn=self.dbConn, log=self.log, dictList=allRows, dbTableName=self.tablePrefix + table, uniqueKeyList=[], dateModified=True, dateCreated=True, batchSize=10000, replace=True, dbSettings=self.settings["database settings"]) # # DUMP THE DATA INTO A MYSQL DATABASE # dataSet = list_of_dictionaries( # log=self.log, # listOfDictionaries=allRows # ) # originalList = dataSet.list # now = datetime.now() # now = now.strftime("%Y%m%dt%H%M%S%f.sql") # mysqlData = dataSet.mysql( # tableName=self.tablePrefix + table, filepath="/tmp/headjack/" + # now, createStatement=createStatement) # directory_script_runner( # log=self.log, # pathToScriptDirectory="/tmp/headjack/", # databaseName=self.settings["database settings"]["db"], # loginPath=self.settings["database settings"]["loginPath"], # successRule="delete", # failureRule="failed" # ) con.close() self.log.debug('completed the ``convert_sqlite_to_mysql`` method') return None
def _import_unmatched_feeder_survey_sources_to_transientbucket( self, unmatched): """*assign a new transientbucketid to any feeder survey source not yet matched in steps. Copy these unmatched feeder survey rows to the transientbucket as new transient detections.* **Key Arguments** - ``unmatched`` -- the remaining unmatched feeder survey object names. """ self.log.debug( 'starting the ``_import_unmatched_feeder_survey_sources_to_transientbucket`` method' ) if not len(unmatched): return None fsTableName = self.fsTableName fs_name = self.fs_name # READ MAX TRANSIENTBUCKET ID FROM TRANSIENTBUCKET sqlQuery = u""" select max(transientBucketId) as maxId from transientBucket """ % locals() rows = readquery(log=self.log, sqlQuery=sqlQuery, dbConn=self.dbConn) if not len(rows) or not rows[0]["maxId"]: maxId = 1 else: maxId = rows[0]["maxId"] + 1 # ADD NEW TRANSIENTBUCKETIDS TO FEEDER SURVEY TABLE updates = [] newTransientBucketIds = [] for u in unmatched: update = "update " + fsTableName + " set transientBucketId = " + \ str(maxId) + " where " + fs_name + " = '" + str(u) + "';" updates.append(update) newTransientBucketIds.append(str(maxId)) maxId += 1 updates = ("\n").join(updates) writequery(log=self.log, sqlQuery=updates, dbConn=self.dbConn) # COPY FEEDER SURVEY ROWS TO TRANSIENTBUCKET self._feeder_survey_transientbucket_name_match_and_import() # SET THE MASTER ID FLAG FOR ALL NEW TRANSIENTS IN THE TRANSIENTBUCKET newTransientBucketIds = (",").join(newTransientBucketIds) sqlQuery = """UPDATE transientBucket t JOIN (SELECT transientBucketId, MIN(primaryKeyId) AS minpk FROM transientBucket WHERE transientBucketId IN (%(newTransientBucketIds)s) GROUP BY transientBucketId) tmin ON t.primaryKeyId = tmin.minpk SET masterIDFlag = 1;""" % locals() writequery(log=self.log, sqlQuery=sqlQuery, dbConn=self.dbConn) self.log.debug( 'completed the ``_import_unmatched_feeder_survey_sources_to_transientbucket`` method' ) return None
def convert_dictionary_to_mysql_table( log, dictionary, dbTableName, uniqueKeyList=[], dbConn=False, createHelperTables=False, dateModified=False, returnInsertOnly=False, replace=False, batchInserts=True, reDatetime=False, skipChecks=False, dateCreated=True): """convert dictionary to mysql table **Key Arguments:** - ``log`` -- logger - ``dictionary`` -- python dictionary - ``dbConn`` -- the db connection - ``dbTableName`` -- name of the table you wish to add the data to (or create if it does not exist) - ``uniqueKeyList`` - a lists column names that need combined to create the primary key - ``createHelperTables`` -- create some helper tables with the main table, detailing original keywords etc - ``returnInsertOnly`` -- returns only the insert command (does not execute it) - ``dateModified`` -- add a modification date and updated flag to the mysql table - ``replace`` -- use replace instead of mysql insert statements (useful when updates are required) - ``batchInserts`` -- if returning insert statements return separate insert commands and value tuples - ``reDatetime`` -- compiled regular expression matching datetime (passing this in cuts down on execution time as it doesn't have to be recompiled everytime during multiple iterations of ``convert_dictionary_to_mysql_table``) - ``skipChecks`` -- skip reliability checks. Less robust but a little faster. - ``dateCreated`` -- add a timestamp for dateCreated? **Return:** - ``returnInsertOnly`` -- the insert statement if requested **Usage:** To add a python dictionary to a database table, creating the table and/or columns if they don't yet exist: .. code-block:: python from fundamentals.mysql import convert_dictionary_to_mysql_table dictionary = {"a newKey": "cool", "and another": "super cool", "uniquekey1": "cheese", "uniqueKey2": "burgers"} convert_dictionary_to_mysql_table( dbConn=dbConn, log=log, dictionary=dictionary, dbTableName="testing_table", uniqueKeyList=["uniquekey1", "uniqueKey2"], dateModified=False, returnInsertOnly=False, replace=True ) Or just return the insert statement with a list of value tuples, i.e. do not execute the command on the database: insertCommand, valueTuple = convert_dictionary_to_mysql_table( dbConn=dbConn, log=log, dictionary=dictionary, dbTableName="testing_table", uniqueKeyList=["uniquekey1", "uniqueKey2"], dateModified=False, returnInsertOnly=True, replace=False, batchInserts=True ) print insertCommand, valueTuple # OUT: 'INSERT IGNORE INTO `testing_table` # (a_newKey,and_another,dateCreated,uniqueKey2,uniquekey1) VALUES # (%s, %s, %s, %s, %s)', ('cool', 'super cool', # '2016-06-21T12:08:59', 'burgers', 'cheese') You can also return a list of single insert statements using ``batchInserts = False``. Using ``replace = True`` will also add instructions about how to replace duplicate entries in the database table if found: inserts = convert_dictionary_to_mysql_table( dbConn=dbConn, log=log, dictionary=dictionary, dbTableName="testing_table", uniqueKeyList=["uniquekey1", "uniqueKey2"], dateModified=False, returnInsertOnly=True, replace=True, batchInserts=False ) print inserts # OUT: INSERT INTO `testing_table` (a_newKey,and_another,dateCreated,uniqueKey2,uniquekey1) # VALUES ("cool" ,"super cool" ,"2016-09-14T13:12:08" ,"burgers" ,"cheese") # ON DUPLICATE KEY UPDATE a_newKey="cool", and_another="super # cool", dateCreated="2016-09-14T13:12:08", uniqueKey2="burgers", # uniquekey1="cheese" """ log.debug('starting the ``convert_dictionary_to_mysql_table`` function') if not reDatetime: reDatetime = re.compile('^[0-9]{4}-[0-9]{2}-[0-9]{2}T') if not replace: insertVerb = "INSERT" else: insertVerb = "INSERT IGNORE" if returnInsertOnly == False: # TEST THE ARGUMENTS if str(type(dbConn).__name__) != "Connection": message = 'Please use a valid MySQL DB connection.' log.critical(message) raise TypeError(message) if not isinstance(dictionary, dict): message = 'Please make sure "dictionary" argument is a dict type.' log.critical(message) raise TypeError(message) if not isinstance(uniqueKeyList, list): message = 'Please make sure "uniqueKeyList" is a list' log.critical(message) raise TypeError(message) for i in uniqueKeyList: if i not in dictionary.keys(): message = 'Please make sure values in "uniqueKeyList" are present in the "dictionary" you are tring to convert' log.critical(message) raise ValueError(message) for k, v in dictionary.iteritems(): # log.debug('k: %s, v: %s' % (k, v,)) if isinstance(v, list) and len(v) != 2: message = 'Please make sure the list values in "dictionary" 2 items in length' log.critical("%s: in %s we have a %s (%s)" % (message, k, v, type(v))) raise ValueError(message) if isinstance(v, list): if not (isinstance(v[0], str) or isinstance(v[0], int) or isinstance(v[0], bool) or isinstance(v[0], float) or isinstance(v[0], long) or isinstance(v[0], datetime.date) or v[0] == None): message = 'Please make sure values in "dictionary" are of an appropriate value to add to the database, must be str, float, int or bool' log.critical("%s: in %s we have a %s (%s)" % (message, k, v, type(v))) raise ValueError(message) else: if not (isinstance(v, str) or isinstance(v, int) or isinstance(v, bool) or isinstance(v, float) or isinstance(v, long) or isinstance(v, unicode) or isinstance(v, datetime.date) or v == None): this = type(v) message = 'Please make sure values in "dictionary" are of an appropriate value to add to the database, must be str, float, int or bool : %(k)s is a %(this)s' % locals( ) log.critical("%s: in %s we have a %s (%s)" % (message, k, v, type(v))) raise ValueError(message) if not isinstance(createHelperTables, bool): message = 'Please make sure "createHelperTables" is a True or False' log.critical(message) raise TypeError(message) # TEST IF TABLE EXISTS if not skipChecks: tableExists = table_exists.table_exists( dbConn=dbConn, log=log, dbTableName=dbTableName ) else: tableExists = False # CREATE THE TABLE IF IT DOES NOT EXIST if tableExists is False: sqlQuery = """ CREATE TABLE IF NOT EXISTS `%(dbTableName)s` (`primaryId` bigint(20) NOT NULL AUTO_INCREMENT COMMENT 'An internal counter', `dateCreated` DATETIME NULL DEFAULT CURRENT_TIMESTAMP, `dateLastModified` DATETIME NULL DEFAULT CURRENT_TIMESTAMP, `updated` tinyint(4) DEFAULT '0', PRIMARY KEY (`primaryId`)) ENGINE=MyISAM AUTO_INCREMENT=0 DEFAULT CHARSET=latin1; """ % locals() writequery( log=log, sqlQuery=sqlQuery, dbConn=dbConn, ) qCreateColumn = '' formattedKey = '' formattedKeyList = [] myValues = [] # ADD EXTRA COLUMNS TO THE DICTIONARY todo: do I need this? if dateModified: dictionary['dateLastModified'] = [ str(times.get_now_sql_datetime()), "date row was modified"] if replace == False: dictionary['updated'] = [0, "this row has been updated"] else: dictionary['updated'] = [1, "this row has been updated"] # ITERATE THROUGH THE DICTIONARY AND GENERATE THE TABLE COLUMN WITH THE # NAME OF THE KEY, IF IT DOES NOT EXIST count = len(dictionary) i = 1 for (key, value) in dictionary.items(): if (isinstance(value, list) and value[0] is None): del dictionary[key] # SORT THE DICTIONARY BY KEY odictionary = c.OrderedDict(sorted(dictionary.items())) for (key, value) in odictionary.iteritems(): formattedKey = key.replace(" ", "_").replace("-", "_") # DEC A KEYWORD IN MYSQL - NEED TO CHANGE BEFORE INGEST if formattedKey == "dec": formattedKey = "decl" if formattedKey == "DEC": formattedKey = "DECL" formattedKeyList.extend([formattedKey]) if len(key) > 0: # CONVERT LIST AND FEEDPARSER VALUES TO YAML (SO I CAN PASS IT AS A # STRING TO MYSQL) if isinstance(value, list) and (isinstance(value[0], list)): value[0] = yaml.dump(value[0]) value[0] = str(value[0]) # REMOVE CHARACTERS THAT COLLIDE WITH MYSQL # if type(value[0]) == str or type(value[0]) == unicode: # value[0] = value[0].replace('"', """'""") # JOIN THE VALUES TOGETHER IN A LIST - EASIER TO GENERATE THE MYSQL # COMMAND LATER if isinstance(value, str): value = value.replace('\\', '\\\\') value = value.replace('"', '\\"') try: udata = value.decode("utf-8", "ignore") value = udata.encode("ascii", "ignore") except: log.error('cound not decode value %(value)s' % locals()) # log.debug('udata: %(udata)s' % locals()) if isinstance(value, unicode): value = value.replace('"', '\\"') value = value.encode("ascii", "ignore") if isinstance(value, list) and isinstance(value[0], unicode): myValues.extend(['%s' % value[0].strip()]) elif isinstance(value, list): myValues.extend(['%s' % (value[0], )]) else: myValues.extend(['%s' % (value, )]) if returnInsertOnly == False: # CHECK IF COLUMN EXISTS YET colExists = \ "SELECT * FROM information_schema.COLUMNS WHERE TABLE_SCHEMA=DATABASE() AND COLUMN_NAME='" + \ formattedKey + "'AND TABLE_NAME='" + dbTableName + """'""" try: # log.debug('checking if the column '+formattedKey+' exists # in the '+dbTableName+' table') rows = readquery( log=log, sqlQuery=colExists, dbConn=dbConn, ) except Exception as e: log.error('something went wrong' + str(e) + '\n') # IF COLUMN DOESN'T EXIT - GENERATE IT if len(rows) == 0: qCreateColumn = """ALTER TABLE `%s` ADD `%s""" % ( dbTableName, formattedKey) if not isinstance(value, list): value = [value] if reDatetime.search(str(value[0])): # log.debug('Ok - a datetime string was found') qCreateColumn += '` datetime DEFAULT NULL' elif formattedKey == 'updated_parsed' or formattedKey == 'published_parsed' or formattedKey \ == 'feedName' or formattedKey == 'title': qCreateColumn += '` varchar(100) DEFAULT NULL' elif (isinstance(value[0], str) or isinstance(value[0], unicode)) and len(value[0]) < 30: qCreateColumn += '` varchar(100) DEFAULT NULL' elif (isinstance(value[0], str) or isinstance(value[0], unicode)) and len(value[0]) >= 30 and len(value[0]) < 80: qCreateColumn += '` varchar(100) DEFAULT NULL' elif isinstance(value[0], str) or isinstance(value[0], unicode): columnLength = 450 + len(value[0]) * 2 qCreateColumn += '` varchar(' + str( columnLength) + ') DEFAULT NULL' elif isinstance(value[0], int) and abs(value[0]) <= 9: qCreateColumn += '` tinyint DEFAULT NULL' elif isinstance(value[0], int): qCreateColumn += '` int DEFAULT NULL' elif isinstance(value[0], float) or isinstance(value[0], long): qCreateColumn += '` double DEFAULT NULL' elif isinstance(value[0], bool): qCreateColumn += '` tinyint DEFAULT NULL' elif isinstance(value[0], list): qCreateColumn += '` varchar(1024) DEFAULT NULL' else: # log.debug('Do not know what format to add this key in # MySQL - removing from dictionary: %s, %s' # % (key, type(value[0]))) formattedKeyList.pop() myValues.pop() qCreateColumn = None if qCreateColumn: # ADD COMMENT TO GIVE THE ORGINAL KEYWORD IF formatted FOR # MYSQL if key is not formattedKey: qCreateColumn += " COMMENT 'original keyword: " + \ key + """'""" # CREATE THE COLUMN IF IT DOES NOT EXIST try: log.info('creating the ' + formattedKey + ' column in the ' + dbTableName + ' table') writequery( log=log, sqlQuery=qCreateColumn, dbConn=dbConn ) except Exception as e: # log.debug('qCreateColumn: %s' % (qCreateColumn, # )) log.error('could not create the ' + formattedKey + ' column in the ' + dbTableName + ' table -- ' + str(e) + '\n') if returnInsertOnly == False: # GENERATE THE INDEX NAME - THEN CREATE INDEX IF IT DOES NOT YET EXIST if len(uniqueKeyList): for i in range(len(uniqueKeyList)): uniqueKeyList[i] = uniqueKeyList[ i].replace(" ", "_").replace("-", "_") if uniqueKeyList[i] == "dec": uniqueKeyList[i] = "decl" if uniqueKeyList[i] == "DEC": uniqueKeyList[i] = "DECL" indexName = uniqueKeyList[0].replace(" ", "_").replace("-", "_") for i in range(len(uniqueKeyList) - 1): indexName += '_' + uniqueKeyList[i + 1] indexName = indexName.lower().replace(" ", " ").replace(" ", "_") sqlQuery = u"""SELECT COUNT(*) FROM INFORMATION_SCHEMA.STATISTICS WHERE TABLE_SCHEMA = DATABASE() AND TABLE_NAME = '""" + \ dbTableName + """' AND INDEX_NAME = '""" + indexName + """'""" rows = readquery( log=log, sqlQuery=sqlQuery, dbConn=dbConn, quiet=False ) exists = rows[0]['COUNT(*)'] # log.debug('uniqueKeyList: %s' % (uniqueKeyList,)) if exists == 0: if isinstance(uniqueKeyList, list): uniqueKeyList = ','.join(uniqueKeyList) addUniqueKey = 'ALTER TABLE `' + dbTableName + \ '` ADD unique ' + indexName + \ """ (""" + uniqueKeyList + ')' # log.debug('HERE IS THE COMMAND:'+addUniqueKey) writequery( log=log, sqlQuery=addUniqueKey, dbConn=dbConn ) if returnInsertOnly == True and batchInserts == True: myKeys = '`,`'.join(formattedKeyList) valueString = ("%s, " * len(myValues))[:-2] insertCommand = insertVerb + """ INTO `""" + dbTableName + \ """` (`""" + myKeys + """`, dateCreated) VALUES (""" + \ valueString + """, NOW())""" mv = [] mv[:] = [None if m == "None" else m for m in myValues] valueTuple = tuple(mv) dup = "" if replace: dup = " ON DUPLICATE KEY UPDATE " for k, v in zip(formattedKeyList, mv): dup = """%(dup)s %(k)s=values(%(k)s),""" % locals() insertCommand = insertCommand + dup insertCommand = insertCommand.replace('\\""', '\\" "') insertCommand = insertCommand.replace('""', "null") insertCommand = insertCommand.replace('!!python/unicode:', '') insertCommand = insertCommand.replace('!!python/unicode', '') insertCommand = insertCommand.replace('"None"', 'null') if not dateCreated: insertCommand = insertCommand.replace( ", dateCreated)", ")").replace(", NOW())", ")") return insertCommand, valueTuple # GENERATE THE INSERT COMMAND - IGNORE DUPLICATE ENTRIES myKeys = '`,`'.join(formattedKeyList) myValues = '" ,"'.join(myValues) # log.debug(myValues+" ------ PRESTRIP") # REMOVE SOME CONVERSION NOISE myValues = myValues.replace('time.struct_time', '') myValues = myValues.replace( '- !!python/object/new:feedparser.FeedParserDict', '') myValues = myValues.replace( '!!python/object/new:feedparser.FeedParserDict', '') myValues = myValues.replace('dictitems:', '') myValues = myValues.replace('dictitems', '') myValues = myValues.replace('!!python/unicode:', '') myValues = myValues.replace('!!python/unicode', '') myValues = myValues.replace('"None"', 'null') # myValues = myValues.replace('"None', 'null') if myValues[-4:] != 'null': myValues += '"' dup = "" if replace: dupValues = ('"' + myValues).split(" ,") dupKeys = formattedKeyList dup = dup + " ON DUPLICATE KEY UPDATE " for k, v in zip(dupKeys, dupValues): dup = """%(dup)s `%(k)s`=%(v)s,""" % locals() if dateModified: dup = """%(dup)s updated=IF(""" % locals() for k, v in zip(dupKeys, dupValues): if v == "null": dup = """%(dup)s `%(k)s` is %(v)s AND """ % locals() else: dup = """%(dup)s `%(k)s`=%(v)s AND """ % locals() dup = dup[:-5] + ", 0, 1), dateLastModified=IF(" for k, v in zip(dupKeys, dupValues): if v == "null": dup = """%(dup)s `%(k)s` is %(v)s AND """ % locals() else: dup = """%(dup)s `%(k)s`=%(v)s AND """ % locals() dup = dup[:-5] + ", dateLastModified, NOW())" else: dup = dup[:-1] # log.debug(myValues+" ------ POSTSTRIP") addValue = insertVerb + """ INTO `""" + dbTableName + \ """` (`""" + myKeys + """`, dateCreated) VALUES (\"""" + \ myValues + """, NOW()) %(dup)s """ % locals() if not dateCreated: addValue = addValue.replace( ", dateCreated)", ")").replace(", NOW())", ")", 1) addValue = addValue.replace('\\""', '\\" "') addValue = addValue.replace('""', "null") addValue = addValue.replace('!!python/unicode:', '') addValue = addValue.replace('!!python/unicode', '') addValue = addValue.replace('"None"', 'null') # log.debug(addValue) if returnInsertOnly == True: return addValue message = "" try: # log.debug('adding new data to the %s table; query: %s' % # (dbTableName, addValue))" writequery( log=log, sqlQuery=addValue, dbConn=dbConn ) except Exception as e: log.error("could not add new data added to the table '" + dbTableName + "' : " + str(e) + '\n') log.debug('completed the ``convert_dictionary_to_mysql_table`` function') return None, None
def convert_dictionary_to_mysql_table( log, dictionary, dbTableName, uniqueKeyList=[], dbConn=False, createHelperTables=False, dateModified=False, returnInsertOnly=False, replace=False, batchInserts=True, reDatetime=False, skipChecks=False, dateCreated=True): """convert dictionary to mysql table **Key Arguments:** - ``log`` -- logger - ``dictionary`` -- python dictionary - ``dbConn`` -- the db connection - ``dbTableName`` -- name of the table you wish to add the data to (or create if it does not exist) - ``uniqueKeyList`` - a lists column names that need combined to create the primary key - ``createHelperTables`` -- create some helper tables with the main table, detailing original keywords etc - ``returnInsertOnly`` -- returns only the insert command (does not execute it) - ``dateModified`` -- add a modification date and updated flag to the mysql table - ``replace`` -- use replace instead of mysql insert statements (useful when updates are required) - ``batchInserts`` -- if returning insert statements return separate insert commands and value tuples - ``reDatetime`` -- compiled regular expression matching datetime (passing this in cuts down on execution time as it doesn't have to be recompiled everytime during multiple iterations of ``convert_dictionary_to_mysql_table``) - ``skipChecks`` -- skip reliability checks. Less robust but a little faster. - ``dateCreated`` -- add a timestamp for dateCreated? **Return:** - ``returnInsertOnly`` -- the insert statement if requested **Usage:** To add a python dictionary to a database table, creating the table and/or columns if they don't yet exist: .. code-block:: python from fundamentals.mysql import convert_dictionary_to_mysql_table dictionary = {"a newKey": "cool", "and another": "super cool", "uniquekey1": "cheese", "uniqueKey2": "burgers"} convert_dictionary_to_mysql_table( dbConn=dbConn, log=log, dictionary=dictionary, dbTableName="testing_table", uniqueKeyList=["uniquekey1", "uniqueKey2"], dateModified=False, returnInsertOnly=False, replace=True ) Or just return the insert statement with a list of value tuples, i.e. do not execute the command on the database: insertCommand, valueTuple = convert_dictionary_to_mysql_table( dbConn=dbConn, log=log, dictionary=dictionary, dbTableName="testing_table", uniqueKeyList=["uniquekey1", "uniqueKey2"], dateModified=False, returnInsertOnly=True, replace=False, batchInserts=True ) print(insertCommand, valueTuple) # OUT: 'INSERT IGNORE INTO `testing_table` # (a_newKey,and_another,dateCreated,uniqueKey2,uniquekey1) VALUES # (%s, %s, %s, %s, %s)', ('cool', 'super cool', # '2016-06-21T12:08:59', 'burgers', 'cheese') You can also return a list of single insert statements using ``batchInserts = False``. Using ``replace = True`` will also add instructions about how to replace duplicate entries in the database table if found: inserts = convert_dictionary_to_mysql_table( dbConn=dbConn, log=log, dictionary=dictionary, dbTableName="testing_table", uniqueKeyList=["uniquekey1", "uniqueKey2"], dateModified=False, returnInsertOnly=True, replace=True, batchInserts=False ) print(inserts) # OUT: INSERT INTO `testing_table` (a_newKey,and_another,dateCreated,uniqueKey2,uniquekey1) # VALUES ("cool" ,"super cool" ,"2016-09-14T13:12:08" ,"burgers" ,"cheese") # ON DUPLICATE KEY UPDATE a_newKey="cool", and_another="super # cool", dateCreated="2016-09-14T13:12:08", uniqueKey2="burgers", # uniquekey1="cheese" """ log.debug('starting the ``convert_dictionary_to_mysql_table`` function') if not reDatetime: reDatetime = re.compile('^[0-9]{4}-[0-9]{2}-[0-9]{2}T') if not replace: insertVerb = "INSERT" else: insertVerb = "INSERT IGNORE" if returnInsertOnly == False: # TEST THE ARGUMENTS if str(type(dbConn).__name__) != "Connection": message = 'Please use a valid MySQL DB connection.' log.critical(message) raise TypeError(message) if not isinstance(dictionary, dict): message = 'Please make sure "dictionary" argument is a dict type.' log.critical(message) raise TypeError(message) if not isinstance(uniqueKeyList, list): message = 'Please make sure "uniqueKeyList" is a list' log.critical(message) raise TypeError(message) for i in uniqueKeyList: if i not in list(dictionary.keys()): message = 'Please make sure values in "uniqueKeyList" are present in the "dictionary" you are tring to convert' log.critical(message) raise ValueError(message) for k, v in list(dictionary.items()): # log.debug('k: %s, v: %s' % (k, v,)) if isinstance(v, list) and len(v) != 2: message = 'Please make sure the list values in "dictionary" 2 items in length' log.critical("%s: in %s we have a %s (%s)" % (message, k, v, type(v))) raise ValueError(message) if isinstance(v, list): if not (isinstance(v[0], six.string_types) or isinstance(v[0], int) or isinstance(v[0], bool) or isinstance(v[0], float) or isinstance(v[0], int) or isinstance(v[0], datetime.date) or v[0] == None): message = 'Please make sure values in "dictionary" are of an appropriate value to add to the database, must be str, float, int or bool' log.critical("%s: in %s we have a %s (%s)" % (message, k, v, type(v))) raise ValueError(message) else: if not (isinstance(v, six.string_types) or isinstance(v, int) or isinstance(v, bool) or isinstance(v, float) or isinstance(v, datetime.date) or v == None or "int" in str(type(v))): this = type(v) message = 'Please make sure values in "dictionary" are of an appropriate value to add to the database, must be str, float, int or bool : %(k)s is a %(this)s' % locals( ) log.critical("%s: in %s we have a %s (%s)" % (message, k, v, type(v))) raise ValueError(message) if not isinstance(createHelperTables, bool): message = 'Please make sure "createHelperTables" is a True or False' log.critical(message) raise TypeError(message) # TEST IF TABLE EXISTS if not skipChecks: tableExists = table_exists.table_exists( dbConn=dbConn, log=log, dbTableName=dbTableName ) else: tableExists = False # CREATE THE TABLE IF IT DOES NOT EXIST if tableExists is False: sqlQuery = """ CREATE TABLE IF NOT EXISTS `%(dbTableName)s` (`primaryId` bigint(20) NOT NULL AUTO_INCREMENT COMMENT 'An internal counter', `dateCreated` DATETIME NULL DEFAULT CURRENT_TIMESTAMP, `dateLastModified` DATETIME NULL DEFAULT CURRENT_TIMESTAMP, `updated` tinyint(4) DEFAULT '0', PRIMARY KEY (`primaryId`)) ENGINE=MyISAM AUTO_INCREMENT=0 DEFAULT CHARSET=latin1; """ % locals() writequery( log=log, sqlQuery=sqlQuery, dbConn=dbConn, ) qCreateColumn = '' formattedKey = '' formattedKeyList = [] myValues = [] # ADD EXTRA COLUMNS TO THE DICTIONARY todo: do I need this? if dateModified: dictionary['dateLastModified'] = [ str(times.get_now_sql_datetime()), "date row was modified"] if replace == False: dictionary['updated'] = [0, "this row has been updated"] else: dictionary['updated'] = [1, "this row has been updated"] # ITERATE THROUGH THE DICTIONARY AND GENERATE THE TABLE COLUMN WITH THE # NAME OF THE KEY, IF IT DOES NOT EXIST count = len(dictionary) i = 1 for (key, value) in list(dictionary.items()): if (isinstance(value, list) and value[0] is None): del dictionary[key] # SORT THE DICTIONARY BY KEY odictionary = c.OrderedDict(sorted(dictionary.items())) for (key, value) in list(odictionary.items()): formattedKey = key.replace(" ", "_").replace("-", "_") # DEC A KEYWORD IN MYSQL - NEED TO CHANGE BEFORE INGEST if formattedKey == u"dec": formattedKey = u"decl" if formattedKey == u"DEC": formattedKey = u"DECL" formattedKeyList.extend([formattedKey]) if len(key) > 0: # CONVERT LIST AND FEEDPARSER VALUES TO YAML (SO I CAN PASS IT AS A # STRING TO MYSQL) if isinstance(value, list) and (isinstance(value[0], list)): value[0] = yaml.dump(value[0]) value[0] = str(value[0]) # REMOVE CHARACTERS THAT COLLIDE WITH MYSQL # JOIN THE VALUES TOGETHER IN A LIST - EASIER TO GENERATE THE MYSQL # COMMAND LATER if isinstance(value, str): value = value.replace('\\', '\\\\') value = value.replace('"', '\\"') try: udata = value.decode("utf-8", "ignore") value = udata.encode("ascii", "ignore") except: pass # log.debug('udata: %(udata)s' % locals()) if isinstance(value, list) and isinstance(value[0], str): myValues.extend(['%s' % value[0].strip()]) elif isinstance(value, list): myValues.extend(['%s' % (value[0], )]) else: myValues.extend(['%s' % (value, )]) if returnInsertOnly == False: # CHECK IF COLUMN EXISTS YET colExists = \ "SELECT * FROM information_schema.COLUMNS WHERE TABLE_SCHEMA=DATABASE() AND COLUMN_NAME='" + \ formattedKey + "'AND TABLE_NAME='" + dbTableName + """'""" try: # log.debug('checking if the column '+formattedKey+' exists # in the '+dbTableName+' table') rows = readquery( log=log, sqlQuery=colExists, dbConn=dbConn, ) except Exception as e: log.error('something went wrong' + str(e) + '\n') # IF COLUMN DOESN'T EXIT - GENERATE IT if len(rows) == 0: qCreateColumn = """ALTER TABLE `%s` ADD `%s""" % ( dbTableName, formattedKey) if not isinstance(value, list): value = [value] if reDatetime.search(str(value[0])): # log.debug('Ok - a datetime string was found') qCreateColumn += '` datetime DEFAULT NULL' elif formattedKey == 'updated_parsed' or formattedKey == 'published_parsed' or formattedKey \ == 'feedName' or formattedKey == 'title': qCreateColumn += '` varchar(100) DEFAULT NULL' elif isinstance(value[0], ("".__class__, u"".__class__)) and len(value[0]) < 30: qCreateColumn += '` varchar(100) DEFAULT NULL' elif isinstance(value[0], ("".__class__, u"".__class__)) and len(value[0]) >= 30 and len(value[0]) < 80: qCreateColumn += '` varchar(100) DEFAULT NULL' elif isinstance(value[0], ("".__class__, u"".__class__)): columnLength = 450 + len(value[0]) * 2 qCreateColumn += '` varchar(' + str( columnLength) + ') DEFAULT NULL' elif isinstance(value[0], int) and abs(value[0]) <= 9: qCreateColumn += '` tinyint DEFAULT NULL' elif isinstance(value[0], int): qCreateColumn += '` int DEFAULT NULL' elif isinstance(value[0], float) or isinstance(value[0], int): qCreateColumn += '` double DEFAULT NULL' elif isinstance(value[0], bool): qCreateColumn += '` tinyint DEFAULT NULL' elif isinstance(value[0], list): qCreateColumn += '` varchar(1024) DEFAULT NULL' else: # log.debug('Do not know what format to add this key in # MySQL - removing from dictionary: %s, %s' # % (key, type(value[0]))) formattedKeyList.pop() myValues.pop() qCreateColumn = None if qCreateColumn: # ADD COMMENT TO GIVE THE ORGINAL KEYWORD IF formatted FOR # MYSQL if key is not formattedKey: qCreateColumn += " COMMENT 'original keyword: " + \ key + """'""" # CREATE THE COLUMN IF IT DOES NOT EXIST try: log.info('creating the ' + formattedKey + ' column in the ' + dbTableName + ' table') writequery( log=log, sqlQuery=qCreateColumn, dbConn=dbConn ) except Exception as e: # log.debug('qCreateColumn: %s' % (qCreateColumn, # )) log.error('could not create the ' + formattedKey + ' column in the ' + dbTableName + ' table -- ' + str(e) + '\n') if returnInsertOnly == False: # GENERATE THE INDEX NAME - THEN CREATE INDEX IF IT DOES NOT YET EXIST if len(uniqueKeyList): for i in range(len(uniqueKeyList)): uniqueKeyList[i] = uniqueKeyList[ i].replace(" ", "_").replace("-", "_") if uniqueKeyList[i] == u"dec": uniqueKeyList[i] = u"decl" if uniqueKeyList[i] == u"DEC": uniqueKeyList[i] = u"DECL" indexName = uniqueKeyList[0].replace(" ", "_").replace("-", "_") for i in range(len(uniqueKeyList) - 1): indexName += '_' + uniqueKeyList[i + 1] indexName = indexName.lower().replace(" ", " ").replace(" ", "_") sqlQuery = u"""SELECT COUNT(*) FROM INFORMATION_SCHEMA.STATISTICS WHERE TABLE_SCHEMA = DATABASE() AND TABLE_NAME = '""" + \ dbTableName + """' AND INDEX_NAME = '""" + indexName + """'""" rows = readquery( log=log, sqlQuery=sqlQuery, dbConn=dbConn, quiet=False ) exists = rows[0]['COUNT(*)'] # log.debug('uniqueKeyList: %s' % (uniqueKeyList,)) if exists == 0: if isinstance(uniqueKeyList, list): uniqueKeyList = ','.join(uniqueKeyList) addUniqueKey = 'ALTER TABLE `' + dbTableName + \ '` ADD unique ' + indexName + \ """ (""" + uniqueKeyList + ')' # log.debug('HERE IS THE COMMAND:'+addUniqueKey) writequery( log=log, sqlQuery=addUniqueKey, dbConn=dbConn ) if returnInsertOnly == True and batchInserts == True: myKeys = '`,`'.join(formattedKeyList) valueString = ("%s, " * len(myValues))[:-2] insertCommand = insertVerb + """ INTO `""" + dbTableName + \ """` (`""" + myKeys + """`, dateCreated) VALUES (""" + \ valueString + """, NOW())""" mv = [] mv[:] = [None if m == u"None" else m for m in myValues] valueTuple = tuple(mv) dup = "" if replace: dup = " ON DUPLICATE KEY UPDATE " for k, v in zip(formattedKeyList, mv): dup = """%(dup)s %(k)s=values(%(k)s),""" % locals() insertCommand = insertCommand + dup insertCommand = insertCommand.replace('\\""', '\\" "') insertCommand = insertCommand.replace('""', "null") insertCommand = insertCommand.replace('!!python/unicode:', '') insertCommand = insertCommand.replace('!!python/unicode', '') insertCommand = insertCommand.replace('"None"', 'null') insertCommand = insertCommand.replace('"null"', 'null') if not dateCreated: insertCommand = insertCommand.replace( ", dateCreated)", ")").replace(", NOW())", ")") return insertCommand, valueTuple # GENERATE THE INSERT COMMAND - IGNORE DUPLICATE ENTRIES myKeys = '`,`'.join(formattedKeyList) myValues = '" ,"'.join(myValues) # log.debug(myValues+" ------ PRESTRIP") # REMOVE SOME CONVERSION NOISE myValues = myValues.replace('time.struct_time', '') myValues = myValues.replace( '- !!python/object/new:feedparser.FeedParserDict', '') myValues = myValues.replace( '!!python/object/new:feedparser.FeedParserDict', '') myValues = myValues.replace('dictitems:', '') myValues = myValues.replace('dictitems', '') myValues = myValues.replace('!!python/unicode:', '') myValues = myValues.replace('!!python/unicode', '') myValues = myValues.replace('"None"', 'null') myValues = myValues.replace('"null"', 'null') # myValues = myValues.replace('"None', 'null') if myValues[-4:] != 'null': myValues += '"' dup = "" if replace: dupValues = ('"' + myValues).split(" ,") dupKeys = formattedKeyList dup = dup + " ON DUPLICATE KEY UPDATE " for k, v in zip(dupKeys, dupValues): dup = """%(dup)s `%(k)s`=%(v)s,""" % locals() if dateModified: dup = """%(dup)s updated=IF(""" % locals() for k, v in zip(dupKeys, dupValues): if v == "null": dup = """%(dup)s `%(k)s` is %(v)s AND """ % locals() else: dup = """%(dup)s `%(k)s`=%(v)s AND """ % locals() dup = dup[:-5] + ", 0, 1), dateLastModified=IF(" for k, v in zip(dupKeys, dupValues): if v == "null": dup = """%(dup)s `%(k)s` is %(v)s AND """ % locals() else: dup = """%(dup)s `%(k)s`=%(v)s AND """ % locals() dup = dup[:-5] + ", dateLastModified, NOW())" else: dup = dup[:-1] # log.debug(myValues+" ------ POSTSTRIP") addValue = insertVerb + """ INTO `""" + dbTableName + \ """` (`""" + myKeys + """`, dateCreated) VALUES (\"""" + \ myValues + """, NOW()) %(dup)s """ % locals() if not dateCreated: addValue = addValue.replace( ", dateCreated)", ")").replace(", NOW())", ")", 1) addValue = addValue.replace('\\""', '\\" "') addValue = addValue.replace('""', "null") addValue = addValue.replace('!!python/unicode:', '') addValue = addValue.replace('!!python/unicode', '') addValue = addValue.replace('"None"', 'null') addValue = addValue.replace('"null"', 'null') # log.debug(addValue) if returnInsertOnly == True: return addValue message = "" try: # log.debug('adding new data to the %s table; query: %s' % # (dbTableName, addValue))" writequery( log=log, sqlQuery=addValue, dbConn=dbConn ) except Exception as e: log.error("could not add new data added to the table '" + dbTableName + "' : " + str(e) + '\n') log.debug('completed the ``convert_dictionary_to_mysql_table`` function') return None, None
def convert_sqlite_to_mysql( self): """*copy the contents of the sqlite database into the mysql database* See class docstring for usage """ from fundamentals.renderer import list_of_dictionaries from fundamentals.mysql import directory_script_runner self.log.debug('starting the ``convert_sqlite_to_mysql`` method') con = lite.connect(self.pathToSqlite) con.row_factory = lite.Row cur = con.cursor() # GET ALL TABLE NAMES cur.execute("SELECT name FROM sqlite_master WHERE type='table';") tables = cur.fetchall() createStatements = [] inserts = [] for table in tables: table = table['name'] if table == "sqlite_sequence": continue # CREATE TABLE collection_books (folder_id, fingerprint, primary key(folder_id, fingerprint)); # GENEREATE THE MYSQL CREATE STATEMENTS FOR EACH TABLE cur.execute( "SELECT sql FROM sqlite_master WHERE name = '%(table)s';" % locals()) createStatement = cur.fetchone() createStatement = createStatement[0].replace('"', '`') + ";" if "DEFAULT" not in createStatement: if "primary key(" in createStatement: tmp = createStatement.split("primary key(") tmp[0] = tmp[0].replace( ",", " varchar(150) DEFAULT NULL,") createStatement = ("primary key(").join(tmp) if "primary key," in createStatement: tmp = createStatement.split("primary key,") tmp[1] = tmp[1].replace( ",", " varchar(150) DEFAULT NULL,") tmp[1] = tmp[1].replace( ");", " varchar(150) DEFAULT NULL);") createStatement = ("primary key,").join(tmp) createStatement = createStatement.replace( "INTEGER PRIMARY KEY", "INTEGER AUTO_INCREMENT PRIMARY KEY") createStatement = createStatement.replace( "AUTOINCREMENT", "AUTO_INCREMENT") createStatement = createStatement.replace( "DEFAULT 't'", "DEFAULT '1'") createStatement = createStatement.replace( "DEFAULT 'f'", "DEFAULT '0'") createStatement = createStatement.replace(",'t'", ",'1'") createStatement = createStatement.replace(",'f'", ",'0'") if "CREATE TABLE `" in createStatement: createStatement = createStatement.replace( "CREATE TABLE `", "CREATE TABLE IF NOT EXISTS `" + self.tablePrefix) else: createStatement = createStatement.replace( "CREATE TABLE ", "CREATE TABLE IF NOT EXISTS " + self.tablePrefix) if ", primary key(" in createStatement: createStatement = createStatement.replace(", primary key(", """, `dateCreated` datetime DEFAULT CURRENT_TIMESTAMP, `dateLastModified` datetime DEFAULT CURRENT_TIMESTAMP, `updated` tinyint(4) DEFAULT '0', primary key(""") else: createStatement = createStatement.replace(");", """, `dateCreated` datetime DEFAULT CURRENT_TIMESTAMP, `dateLastModified` datetime DEFAULT CURRENT_TIMESTAMP, `updated` tinyint(4) DEFAULT '0'); """) createStatement = createStatement.replace( " text primary key", " varchar(100) primary key") createStatement = createStatement.replace( "`EntryText` TEXT NOT NULL,", "`EntryText` TEXT,") createStatement = createStatement.replace( "`SelectionText` TEXT NOT NULL", "`SelectionText` TEXT") createStatement = createStatement.replace( "`Filename` INTEGER NOT NULL,", "`Filename` TEXT NOT NULL,") createStatement = createStatement.replace( "`SessionPartUUID` TEXT NOT NULL UNIQUE,", "`SessionPartUUID` VARCHAR(100) NOT NULL UNIQUE,") createStatement = createStatement.replace( "`Name` TEXT PRIMARY KEY NOT NULL", "`Name` VARCHAR(100) PRIMARY KEY NOT NULL") createStatement = createStatement.replace( " VARCHAR ", " VARCHAR(100) ") createStatement = createStatement.replace( " VARCHAR,", " VARCHAR(100),") # GRAB THE DATA TO ADD TO THE MYSQL DATABASE TABLES cur.execute( "SELECT * from '%(table)s';" % locals()) rows = cur.fetchall() allRows = [] for row in rows: allRows.append(dict(row)) # RECURSIVELY CREATE MISSING DIRECTORIES if not os.path.exists("/tmp/headjack/"): os.makedirs("/tmp/headjack/") writequery( log=self.log, sqlQuery=createStatement, dbConn=self.dbConn, ) from fundamentals.mysql import insert_list_of_dictionaries_into_database_tables # USE dbSettings TO ACTIVATE MULTIPROCESSING insert_list_of_dictionaries_into_database_tables( dbConn=self.dbConn, log=self.log, dictList=allRows, dbTableName=self.tablePrefix + table, uniqueKeyList=[], dateModified=True, dateCreated=True, batchSize=10000, replace=True, dbSettings=self.settings["database settings"] ) # # DUMP THE DATA INTO A MYSQL DATABASE # dataSet = list_of_dictionaries( # log=self.log, # listOfDictionaries=allRows # ) # originalList = dataSet.list # now = datetime.now() # now = now.strftime("%Y%m%dt%H%M%S%f.sql") # mysqlData = dataSet.mysql( # tableName=self.tablePrefix + table, filepath="/tmp/headjack/" + # now, createStatement=createStatement) # directory_script_runner( # log=self.log, # pathToScriptDirectory="/tmp/headjack/", # databaseName=self.settings["database settings"]["db"], # loginPath=self.settings["database settings"]["loginPath"], # successRule="delete", # failureRule="failed" # ) con.close() self.log.debug('completed the ``convert_sqlite_to_mysql`` method') return None
def generate_atlas_lightcurves( dbConn, log, settings): """generate atlas lightcurves **Key Arguments** - ``dbConn`` -- mysql database connection - ``log`` -- logger - ``settings`` -- settings for the marshall. **Return** - None **Usage** ..todo:: add usage info create a sublime snippet for usage ```python usage code ``` ..todo:: - @review: when complete, clean generate_atlas_lightcurves function - @review: when complete add logging - @review: when complete, decide whether to abstract function to another module """ log.debug('starting the ``generate_atlas_lightcurves`` function') # SELECT OUT THE SOURCES THAT NEED THEIR LCS UPDATED sqlQuery = u""" SELECT a.transientBucketId FROM (SELECT transientBucketId, dateCreated FROM transientBucket WHERE survey = 'ATLAS FP' and limitingMag = 0 ORDER BY dateCreated DESC) a, pesstoObjects p where p.transientBucketId=a.transientBucketId and ((p.atlas_fp_lightcurve < a.dateCreated) or p.atlas_fp_lightcurve is null) GROUP BY a.transientBucketId; """ % locals() rows = readquery( log=log, sqlQuery=sqlQuery, dbConn=dbConn ) total = len(rows) print("Generating ATLAS lightcurves for %(total)s sources" % locals()) index = 1 for row in rows: # SELECT OUT THE LIGHT CURVE DATA FOR A GIVEN ATLAS TRANSIENT transientBucketId = row["transientBucketId"] if index > 1: # Cursor up one line and clear line sys.stdout.write("\x1b[1A\x1b[2K") percent = (old_div(float(index), float(total))) * 100. print('%(index)s/%(total)s (%(percent)1.1f%% done): generating ATLAS LC for transientBucketId: %(transientBucketId)s' % locals()) index += 1 sqlQuery = u""" SELECT atlas_designation, mjd_obs, filter, marshall_mag as mag, marshall_mag_error as dm, fnu*1e27 as fnu, fnu_error*1e27 as fnu_error, snr, zp, marshall_limiting_mag as limiting_mag FROM fs_atlas_forced_phot WHERE (skyfit > 0) and atlas_designation in (SELECT distinct name FROM transientBucket WHERE survey = 'ATLAS FP' AND transientBucketId = %(transientBucketId)s AND dateDeleted IS NULL) and fnu is not null; """ % locals() epochs = readquery( log=log, sqlQuery=sqlQuery, dbConn=dbConn ) # FIND THE CACHE DIR FOR THE SOURCE cacheDirectory = settings["downloads"][ "transient cache directory"] + "/" + str(transientBucketId) # CREATE THE PLOT FOR THIS ONE ATLAS SOURCE create_lc( log=log, cacheDirectory=cacheDirectory, epochs=epochs ) # UPDATE THE OBJECTS FLAG sqlQuery = """update pesstoObjects set atlas_fp_lightcurve = NOW() where transientBucketID = %(transientBucketId)s """ % locals() writequery( log=log, sqlQuery=sqlQuery, dbConn=dbConn ) log.debug('completed the ``generate_atlas_lightcurves`` function') return None
def add_htm_ids_to_mysql_database_table( raColName, declColName, tableName, dbConn, log, primaryIdColumnName="primaryId", cartesian=False, batchSize=25000, reindex=False): """*Given a database connection, a name of a table and the column names for RA and DEC, generates ID for one or more HTM level in the table* **Key Arguments:** - ``raColName`` -- ra in sexegesimal - ``declColName`` -- dec in sexegesimal - ``tableName`` -- name of table to add htmid info to - ``dbConn`` -- database hosting the above table - ``log`` -- logger - ``primaryIdColumnName`` -- the primary id for the table - ``cartesian`` -- add cartesian columns. Default *False* - ``batchSize`` -- the size of the batches of rows to add HTMIds to concurrently. Default *2500* - ``reindex`` -- reindex the entire table **Return:** - None **Usage:** .. code-block:: python from HMpTy.mysql import add_htm_ids_to_mysql_database_table add_htm_ids_to_mysql_database_table( raColName="raDeg", declColName="decDeg", tableName="my_big_star_table", dbConn=dbConn, log=log, primaryIdColumnName="primaryId", reindex=False ) """ log.info('starting the ``add_htm_ids_to_mysql_database_table`` function') # TEST TABLE EXIST sqlQuery = """show tables""" rows = readquery( log=log, sqlQuery=sqlQuery, dbConn=dbConn ) log.debug( """Checking the table %(tableName)s exists in the database""" % locals()) tableList = [] for row in rows: tableList.extend(row.values()) if tableName not in tableList: message = "The %s table does not exist in the database" % (tableName,) log.critical(message) raise IOError(message) log.debug( """Checking the RA and DEC columns exist in the %(tableName)s table""" % locals()) # TEST COLUMNS EXISTS cursor = dbConn.cursor(ms.cursors.DictCursor) sqlQuery = """SELECT * FROM %s LIMIT 1""" % (tableName,) cursor.execute(sqlQuery) rows = cursor.fetchall() desc = cursor.description existingColumns = [] for i in range(len(desc)): existingColumns.append(desc[i][0]) if (raColName not in existingColumns) or (declColName not in existingColumns): message = 'Please make sure you have got the naes of the RA and DEC columns correct' log.critical(message) raise IOError(message) if cartesian: # ACTION(S) ## htmCols = { 'htm16ID': 'BIGINT(20)', 'htm13ID': 'INT', 'htm10ID': 'INT', 'cx': 'DOUBLE', 'cy': 'DOUBLE', 'cz': 'DOUBLE' } else: htmCols = { 'htm16ID': 'BIGINT(20)', 'htm13ID': 'INT', 'htm10ID': 'INT' } # CHECK IF COLUMNS EXISTS YET - IF NOT CREATE FROM for key in htmCols.keys(): try: log.debug( 'attempting to check and generate the HTMId columns for the %s db table' % (tableName, )) colExists = \ """SELECT * FROM information_schema.COLUMNS WHERE TABLE_SCHEMA=DATABASE() AND COLUMN_NAME='%s' AND TABLE_NAME='%s'""" \ % (key, tableName) colExists = readquery( log=log, sqlQuery=colExists, dbConn=dbConn ) switch = 0 if not colExists: if switch == 0: print "Adding the HTMCircle columns to %(tableName)s" % locals() switch = 1 sqlQuery = 'ALTER TABLE ' + tableName + ' ADD ' + \ key + ' ' + htmCols[key] + ' DEFAULT NULL' writequery( log=log, sqlQuery=sqlQuery, dbConn=dbConn, ) except Exception as e: log.critical('could not check and generate the HTMId columns for the %s db table - failed with this error: %s ' % (tableName, str(e))) raise e log.debug( """Counting the number of rows still requiring HTMID information""" % locals()) if reindex: sqlQuery = u""" SELECT INDEX_NAME FROM INFORMATION_SCHEMA.STATISTICS WHERE table_schema=DATABASE() AND table_name='%(tableName)s' and COLUMN_NAME = "%(primaryIdColumnName)s"; """ % locals() keyname = readquery( log=log, sqlQuery=sqlQuery, dbConn=dbConn )[0]["INDEX_NAME"] if keyname != "PRIMARY": log.error('To reindex the entire table the primaryID you submit must be unique. "%(primaryIdColumnName)s" is not unique in table "%(tableName)s"' % locals()) return sqlQuery = """ALTER TABLE `%(tableName)s` disable keys""" % locals() writequery( log=log, sqlQuery=sqlQuery, dbConn=dbConn ) sqlQuery = """SELECT count(*) as count from `%(tableName)s`""" % locals( ) elif cartesian: # COUNT ROWS WHERE HTMIDs ARE NOT SET sqlQuery = """SELECT count(*) as count from `%(tableName)s` where htm10ID is NULL or cx is null""" % locals( ) else: # COUNT ROWS WHERE HTMIDs ARE NOT SET sqlQuery = """SELECT count(*) as count from `%(tableName)s` where htm10ID is NULL""" % locals( ) log.debug( """SQLQUERY:\n\n%(sqlQuery)s\n\n""" % locals()) rowCount = readquery( log=log, sqlQuery=sqlQuery, dbConn=dbConn, quiet=False ) totalCount = rowCount[0]["count"] # ADD HTMIDs IN BATCHES total = totalCount batches = int(total / batchSize) count = 0 lastId = False # NOW GENERATE THE HTMLIds FOR THESE ROWS for i in range(batches + 1): if total == 0: continue count += batchSize if count > batchSize: # Cursor up one line and clear line sys.stdout.write("\x1b[1A\x1b[2K") if count > totalCount: count = totalCount start = time.time() log.debug( """Selecting the next %(batchSize)s rows requiring HTMID information in the %(tableName)s table""" % locals()) if reindex: # SELECT THE ROWS WHERE THE HTMIds ARE NOT SET if lastId: sqlQuery = """SELECT `%s`, `%s`, `%s` from `%s` where `%s` > '%s' order by `%s` limit %s""" % ( primaryIdColumnName, raColName, declColName, tableName, primaryIdColumnName, lastId, primaryIdColumnName, batchSize) else: sqlQuery = """SELECT `%s`, `%s`, `%s` from `%s` order by `%s` limit %s""" % ( primaryIdColumnName, raColName, declColName, tableName, primaryIdColumnName, batchSize) elif cartesian: # SELECT THE ROWS WHERE THE HTMIds ARE NOT SET sqlQuery = """SELECT `%s`, `%s`, `%s` from `%s` where `%s` is not null and `%s` > 0 and ((htm10ID is NULL or cx is null)) limit %s""" % ( primaryIdColumnName, raColName, declColName, tableName, raColName, raColName, batchSize) else: # SELECT THE ROWS WHERE THE HTMIds ARE NOT SET sqlQuery = """SELECT `%s`, `%s`, `%s` from `%s` where `%s` is not null and `%s` > 0 and htm10ID is NULL limit %s""" % ( primaryIdColumnName, raColName, declColName, tableName, raColName, raColName, batchSize) batch = readquery( log=log, sqlQuery=sqlQuery, dbConn=dbConn ) if reindex and len(batch): lastId = batch[-1][primaryIdColumnName] log.debug( """The next %(batchSize)s rows requiring HTMID information have now been selected""" % locals()) raList = [] decList = [] pIdList = [] raList[:] = [r[raColName] for r in batch] decList[:] = [r[declColName] for r in batch] pIdList[:] = [r[primaryIdColumnName] for r in batch] from HMpTy import htm mesh16 = htm.HTM(16) mesh13 = htm.HTM(13) mesh10 = htm.HTM(10) log.debug( 'calculating htmIds for batch of %s rows in %s db table' % (batchSize, tableName, )) htm16Ids = mesh16.lookup_id(raList, decList) htm13Ids = mesh13.lookup_id(raList, decList) htm10Ids = mesh10.lookup_id(raList, decList) log.debug( 'finshed calculating htmIds for batch of %s rows in %s db table' % (batchSize, tableName, )) if cartesian: log.debug( 'calculating cartesian coordinates for batch of %s rows in %s db table' % (batchSize, tableName, )) cx = [] cy = [] cz = [] for r, d in zip(raList, decList): r = math.radians(r) d = math.radians(d) cos_dec = math.cos(d) cx.append(math.cos(r) * cos_dec) cy.append(math.sin(r) * cos_dec) cz.append(math.sin(d)) sqlQuery = "" for h16, h13, h10, pid, cxx, cyy, czz in zip(htm16Ids, htm13Ids, htm10Ids, pIdList, cx, cy, cz): sqlQuery += \ """UPDATE `%s` SET htm16ID=%s, htm13ID=%s, htm10ID=%s, cx=%s, cy=%s, cz=%s where `%s` = '%s';\n""" \ % ( tableName, h16, h13, h10, cxx, cyy, czz, primaryIdColumnName, pid ) log.debug( 'finished calculating cartesian coordinates for batch of %s rows in %s db table' % ( batchSize, tableName, )) else: log.debug('building the sqlquery') updates = [] updates[:] = ["UPDATE `%(tableName)s` SET htm16ID=%(h16)s, htm13ID=%(h13)s, htm10ID=%(h10)s where %(primaryIdColumnName)s = '%(pid)s';" % locals() for h16, h13, h10, pid in zip(htm16Ids, htm13Ids, htm10Ids, pIdList)] sqlQuery = "\n".join(updates) log.debug('finshed building the sqlquery') if len(sqlQuery): log.debug( 'starting to update the HTMIds for new objects in the %s db table' % (tableName, )) writequery( log=log, sqlQuery=sqlQuery, dbConn=dbConn, ) log.debug( 'finished updating the HTMIds for new objects in the %s db table' % (tableName, )) else: log.debug( 'no HTMIds to add to the %s db table' % (tableName, )) percent = float(count) * 100. / float(totalCount) print "%(count)s / %(totalCount)s htmIds added to %(tableName)s (%(percent)0.5f%% complete)" % locals() end = time.time() timediff = end - start timediff = timediff * 1000000. / float(batchSize) print "Update speed: %(timediff)0.2fs/1e6 rows\n" % locals() # APPLY INDEXES IF NEEDED sqlQuery = "" for index in ["htm10ID", "htm13ID", "htm16ID"]: log.debug('adding %(index)s index to %(tableName)s' % locals()) iname = "idx_" + index asqlQuery = u""" SELECT COUNT(1) IndexIsThere FROM INFORMATION_SCHEMA.STATISTICS WHERE table_schema=DATABASE() AND table_name='%(tableName)s' AND index_name='%(iname)s'; """ % locals() count = readquery( log=log, sqlQuery=asqlQuery, dbConn=dbConn )[0]["IndexIsThere"] if count == 0: if not len(sqlQuery): sqlQuery += u""" ALTER TABLE %(tableName)s ADD INDEX `%(iname)s` (`%(index)s` ASC) """ % locals() else: sqlQuery += u""", ADD INDEX `%(iname)s` (`%(index)s` ASC)""" % locals() if len(sqlQuery): writequery( log=log, sqlQuery=sqlQuery + ";", dbConn=dbConn, ) log.debug('finished adding indexes to %(tableName)s' % locals()) if reindex: print "Re-enabling keys within the '%(tableName)s' table" % locals() sqlQuery = """ALTER TABLE `%(tableName)s` enable keys""" % locals() writequery( log=log, sqlQuery=sqlQuery, dbConn=dbConn ) print "All HTMIds added to %(tableName)s" % locals() log.info('completed the ``add_htm_ids_to_mysql_database_table`` function') return None
def insert_list_of_dictionaries_into_database_tables(dbConn, log, dictList, dbTableName, uniqueKeyList=[], dateModified=False, batchSize=2500, replace=False): """insert list of dictionaries into database tables **Key Arguments:** - ``dbConn`` -- mysql database connection - ``log`` -- logger - ``dictList`` -- list of python dictionaries to add to the database table - ``dbTableName`` -- name of the database table - ``uniqueKeyList`` -- a list of column names to append as a unique constraint on the database - ``dateModified`` -- add the modification date as a column in the database - ``batchSize`` -- batch the insert commands into *batchSize* batches - ``replace`` -- repalce row if a duplicate is found **Return:** - None **Usage:** .. code-block:: python from fundamentals.mysql import insert_list_of_dictionaries_into_database_tables insert_list_of_dictionaries_into_database_tables( dbConn=dbConn, log=log, dictList=dictList, dbTableName="test_insert_many", uniqueKeyList=["col1", "col3"], dateModified=False, batchSize=2500 ) """ log.info( 'starting the ``insert_list_of_dictionaries_into_database_tables`` function' ) if len(dictList) == 0: log.warning('the dictionary to be added to the database is empty' % locals()) return None if len(dictList): convert_dictionary_to_mysql_table(dbConn=dbConn, log=log, dictionary=dictList[0], dbTableName=dbTableName, uniqueKeyList=uniqueKeyList, dateModified=dateModified, replace=replace) total = len(dictList[1:]) batches = int(total / batchSize) start = 0 end = 0 theseBatches = [] for i in range(batches + 1): end = end + batchSize start = i * batchSize thisBatch = dictList[start:end] theseBatches.append(thisBatch) totalCount = total count = 0 for batch in theseBatches: count += len(batch) if count > batchSize: # Cursor up one line and clear line sys.stdout.write("\x1b[1A\x1b[2K") if count > totalCount: count = totalCount print "%(count)s / %(totalCount)s rows inserted into %(dbTableName)s" % locals( ) inserted = False while inserted == False: theseInserts = [] for aDict in batch: insertCommand, valueTuple = convert_dictionary_to_mysql_table( dbConn=dbConn, log=log, dictionary=aDict, dbTableName=dbTableName, uniqueKeyList=uniqueKeyList, dateModified=dateModified, returnInsertOnly=True, replace=replace) theseInserts.append(valueTuple) message = "" # log.debug('adding new data to the %s table; query: %s' % # (dbTableName, addValue)) message = writequery(log=log, sqlQuery=insertCommand, dbConn=dbConn, Force=True, manyValueList=theseInserts) if message == "unknown column": sys.exit(0) for aDict in batch: convert_dictionary_to_mysql_table( dbConn=dbConn, log=log, dictionary=aDict, dbTableName=dbTableName, uniqueKeyList=uniqueKeyList, dateModified=dateModified, replace=replace) else: inserted = True log.info( 'completed the ``insert_list_of_dictionaries_into_database_tables`` function' ) return None