def clean_up_columns( self): """clean up columns of the NED table """ self.log.info('starting the ``clean_up_columns`` method') tableName = self.dbTableName print "cleaning up %(tableName)s columns" % locals() sqlQuery = u""" update %(tableName)s set dist_mod_err = null where dist_mod_err = 0; update %(tableName)s set dist_in_ned_flag = null where dist_in_ned_flag = ""; update %(tableName)s set notes = null where notes = ""; update %(tableName)s set redshift = null where redshift = 0; update %(tableName)s set dist_derived_from_sn = null where dist_derived_from_sn = ""; update %(tableName)s set hubble_const = null where hubble_const = 0; update %(tableName)s set lmc_mod = null where lmc_mod = 0; update %(tableName)s set master_row = 0; update %(tableName)s set master_row = 1 where primaryId in (select * from (select distinct primaryId from %(tableName)s group by galaxy_index_id) as alias); """ % locals() dms.execute_mysql_write_query( sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, log=self.log ) self.log.info('completed the ``clean_up_columns`` method') return None
def _update_database_helper_table( self): """ update dataasbe helper table **Key Arguments:** # - **Return:** - None **Todo** - @review: when complete, clean _update_database_helper_table method - @review: when complete add logging """ self.log.info('starting the ``_update_database_helper_table`` method') tableName = self.dbTableName sqlQuery = u""" update tcs_helper_catalogue_tables_info set last_updated = now() where table_name = "%(tableName)s"; """ % locals() dms.execute_mysql_write_query( sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, log=self.log ) self.log.info('completed the ``_update_database_helper_table`` method') return None
def create_the_master_view( self): """create the master view """ self.log.info('starting the ``create_the_master_view`` method') tableName = self.dbTableName print "creating view for %(tableName)s table" % locals() sqlQuery = u""" DROP VIEW IF EXISTS `view_%(tableName)s_master_recorders`; """ % locals() dms.execute_mysql_write_query( sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, log=self.log ) sqlQuery = u""" CREATE VIEW `view_%(tableName)s_master_recorders` AS (SELECT `%(tableName)s`.`primary_ned_id` AS `primary_ned_id`, `%(tableName)s`.`object_type` AS `object_type`, `%(tableName)s`.`raDeg` AS `raDeg`, `%(tableName)s`.`decDeg` AS `decDeg`, `%(tableName)s`.`dist_mpc` AS `dist_mpc`, `%(tableName)s`.`dist_mod` AS `dist_mod`, `%(tableName)s`.`dist_mod_err` AS `dist_mod_err`, `%(tableName)s`.`Method` AS `dist_measurement_method`, `%(tableName)s`.`redshift` AS `redshift`, `%(tableName)s`.`redshift_err` AS `redshift_err`, `%(tableName)s`.`redshift_quality` AS `redshift_quality`, `%(tableName)s`.`major_diameter_arcmin` AS `major_diameter_arcmin`, `%(tableName)s`.`minor_diameter_arcmin` AS `minor_diameter_arcmin`, `%(tableName)s`.`magnitude_filter` AS `magnitude_filter`, `%(tableName)s`.`eb_v` AS `gal_eb_v`, `%(tableName)s`.`hierarchy` AS `hierarchy`, `%(tableName)s`.`morphology` AS `morphology`, `%(tableName)s`.`radio_morphology` AS `radio_morphology`, `%(tableName)s`.`activity_type` AS `activity_type`, `%(tableName)s`.`ned_notes` AS `ned_notes`, `%(tableName)s`.`in_ned` AS `in_ned`, `%(tableName)s`.`primaryId` AS `primaryId` FROM `%(tableName)s` WHERE (`%(tableName)s`.`master_row` = 1)) """ % locals() dms.execute_mysql_write_query( sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, log=self.log ) self.log.info('completed the ``create_the_master_view`` method') return None
def _create_crossmatch_table_if_not_existing( self): """ create crossmatch table if it does not yet exist in the transients database **Return:** - None """ self.log.info( 'starting the ``_create_crossmatch_table_if_not_existing`` method') sqlQuery = u""" CREATE TABLE `tcs_cross_matches` ( `transient_object_id` bigint(20) unsigned NOT NULL, `catalogue_object_id` varchar(30) COLLATE utf8_unicode_ci NOT NULL, `catalogue_table_id` smallint(5) unsigned NOT NULL, `separation` double DEFAULT NULL, `id` bigint(20) unsigned NOT NULL AUTO_INCREMENT, `z` double DEFAULT NULL, `scale` double DEFAULT NULL, `distance` double DEFAULT NULL, `distance_modulus` double DEFAULT NULL, `association_type` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL, `date_added` datetime DEFAULT NULL, `physical_separation_kpc` double DEFAULT NULL, `catalogue_object_type` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL, `catalogue_object_subtype` varchar(45) COLLATE utf8_unicode_ci DEFAULT NULL, `association_rank` int(11) DEFAULT NULL, `catalogue_table_name` varchar(100) COLLATE utf8_unicode_ci DEFAULT NULL, `catalogue_view_name` varchar(100) COLLATE utf8_unicode_ci DEFAULT NULL, `rank` int(11) DEFAULT NULL, `search_name` varchar(100) COLLATE utf8_unicode_ci DEFAULT NULL, `major_axis_arcsec` double DEFAULT NULL, `direct_distance` double DEFAULT NULL, `direct_distance_scale` double DEFAULT NULL, `direct_distance_modulus` double DEFAULT NULL, PRIMARY KEY (`id`), KEY `key_transient_object_id` (`transient_object_id`), KEY `key_catalogue_object_id` (`catalogue_object_id`), KEY `idx_separation` (`separation`) ) ENGINE=MyISAM AUTO_INCREMENT=0 DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; """ % locals() try: dms.execute_mysql_write_query( sqlQuery=sqlQuery, dbConn=self.transientsDbConn, log=self.log ) except: pass self.log.info( 'completed the ``_create_crossmatch_table_if_not_existing`` method') return None
def _update_ned_query_history( self): """ update ned query history **Key Arguments:** - ``coordinateList`` - the coordinates that where queried **Return:** - None **Todo** - @review: when complete, clean _update_ned_query_history method - @review: when complete add logging """ self.log.info('starting the ``_update_ned_query_history`` method') from dryxPython import mysql as dms manyValueList = [] radius = self.settings["ned stream search radius arcec"] from datetime import datetime, date, time now = datetime.now() now = now.strftime("%Y-%m-%d %H:%M:%S") for i, coord in enumerate(self.coordinateList): coord = coord.split(" ") manyValueList.append((coord[0], coord[1], radius, now)) sqlQuery = u""" insert into tcs_helper_ned_query_history (raDeg, decDeg, arcsecRadius, dateQueried) values (%s, %s, %s, %s) """ dms.execute_mysql_write_query( sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, log=self.log, manyValueList=manyValueList ) dms.add_HTMIds_to_mysql_tables.add_HTMIds_to_mysql_tables( raColName="raDeg", declColName="decDeg", tableName="tcs_helper_ned_query_history", dbConn=self.cataloguesDbConn, log=self.log, primaryIdColumnName="primaryId" ) self.log.info('completed the ``_update_ned_query_history`` method') return None
def _get_ned_names( self): """ get ned names **Key Arguments:** # - **Return:** - None **Todo** - @review: when complete, clean _get_ned_names method - @review: when complete add logging """ self.log.info('starting the ``_get_ned_names`` method') from neddy import conesearch tableName = self.dbTableName names, searchParams = conesearch( log=self.log, radiusArcsec=self.settings["ned stream search radius arcec"], nearestOnly=False, unclassified=True, quiet=False, listOfCoordinates=self.coordinateList, outputFilePath=False, verbose=False ).get_crossmatch_names() manyValueList = [] for n in names: manyValueList.append((n,)) sqlQuery = u""" insert ignore into tcs_cat_ned_stream (ned_name, dateCreated) values (%s, now()) """ dms.execute_mysql_write_query( sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, log=self.log, manyValueList=manyValueList) self.log.info('completed the ``_get_ned_names`` method') return None
def set_flag( dbConn, log, tableName, flagColumn, flagValue, primaryKeyColumn, primaryKeyId, ): """ * Set a flag in a db table to a given value* ****Key Arguments: **** - ``dbConn`` - - db connection - ``tableName`` - - db table name - ``flagColumn`` - - flag column name - ``flagValue`` - - value flag is to be set to - ``primaryKeyColumn`` - - primaryKey column name - ``primaryKeyId`` - - single id of the row you wish to set the flag for **Return: ** - ``None`` """ # # > IMPORTS ## import dryxPython.mysql as m # # >SETTINGS ## # ########################################################## # >ACTION(S) # # ########################################################## # CREATE THE MYSQL COMMAND TO UPDATE FLAG sqlQuery = 'update ' + tableName + ' set ' + flagColumn + ' = ' + str(flagValue) + ' where ' + primaryKeyColumn \ + ' = ' + str(primaryKeyId) try: log.debug('update the ingested flags for ' + tableName) m.execute_mysql_write_query( sqlQuery, dbConn, log, ) except Exception as e: log.error('cound not update the ingested flags for %s, error: %s' % (tableName, str(e))) return -1 return None
def set_master_obj_types( self): """set master obj types **Key Arguments:** # - **Return:** - None **Todo** - @review: when complete, clean set_master_obj_types method - @review: when complete add logging """ self.log.info('starting the ``set_master_obj_types`` method') batchSize = 1250 t = self.dbTableName if "photo" in t: totalRows = 500000000 return else: sqlQuery = u""" select distinct objid from %(t)s where objType = "Q" """ % locals() count = dms.execute_mysql_read_query( sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, log=self.log ) totalRows = len(count) count = "" total = totalRows batches = int(total / batchSize) start = 0 end = 0 theseBatches = [] for i in range(batches): end = end + batchSize if end > total: end = total start = i * batchSize if start > 1: # Cursor up one line and clear line sys.stdout.write("\x1b[1A\x1b[2K") percent = (float(end) / float(totalRows)) * 100. print "%(end)s / %(totalRows)s (%(percent)1.1f%%) master objIds updated in %(t)s" % locals() sqlQuery = u""" select distinct objid from %(t)s where objType = "Q" limit %(start)s , %(batchSize)s """ % locals() rows = dms.execute_mysql_read_query( sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, log=self.log ) sqlQuery = "" for row in rows: objid = row["objid"] sqlQuery = sqlQuery + \ u"""\nupdate %(t)s set objType = "Q" where objId = %(objid)s;""" % locals( ) dms.execute_mysql_write_query( sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, log=self.log, Force=False ) self.log.info('completed the ``set_master_obj_types`` method') return None
def generate_the_database_table( self): """generate the NED-D database table """ self.log.info('starting the ``generate_the_database_table`` method') tableName = self.dbTableName sqlQuery = u""" CREATE TABLE `%(tableName)s` ( `primaryId` bigint(20) NOT NULL AUTO_INCREMENT COMMENT 'An internal counter', `Method` varchar(150) DEFAULT NULL, `dateCreated` datetime DEFAULT NULL, `dist_derived_from_sn` varchar(150) DEFAULT NULL, `dist_in_ned_flag` varchar(10) DEFAULT NULL, `dist_index_id` mediumint(9) DEFAULT NULL, `dist_mod` double DEFAULT NULL, `dist_mod_err` double DEFAULT NULL, `dist_mpc` double DEFAULT NULL, `galaxy_index_id` mediumint(9) DEFAULT NULL, `hubble_const` double DEFAULT NULL, `lmc_mod` double DEFAULT NULL, `notes` varchar(500) DEFAULT NULL, `primary_ned_id` varchar(150) DEFAULT NULL, `redshift` double DEFAULT NULL, `ref` varchar(150) DEFAULT NULL, `ref_date` int(11) DEFAULT NULL, `master_row` tinyint(4) DEFAULT '0', `major_diameter_arcmin` double DEFAULT NULL, `ned_notes` varchar(700) DEFAULT NULL, `object_type` varchar(100) DEFAULT NULL, `redshift_err` double DEFAULT NULL, `redshift_quality` varchar(100) DEFAULT NULL, `magnitude_filter` varchar(10) DEFAULT NULL, `minor_diameter_arcmin` double DEFAULT NULL, `morphology` varchar(50) DEFAULT NULL, `hierarchy` varchar(50) DEFAULT NULL, `galaxy_morphology` varchar(50) DEFAULT NULL, `radio_morphology` varchar(50) DEFAULT NULL, `activity_type` varchar(50) DEFAULT NULL, `in_ned` tinyint(4) DEFAULT NULL, `raDeg` double DEFAULT NULL, `decDeg` double DEFAULT NULL, `eb_v` double DEFAULT NULL, `sdss_coverage` TINYINT DEFAULT NULL, PRIMARY KEY (`primaryId`), UNIQUE KEY `galaxy_index_id_dist_index_id` (`galaxy_index_id`,`dist_index_id`) ) ENGINE=MyISAM AUTO_INCREMENT=0 DEFAULT CHARSET=latin1; """ % locals() try: dms.execute_mysql_write_query( sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, log=self.log ) except: self.log.info( '`%(tableName)s` table already exists - continuing as normal' % locals()) self.log.info('completed the ``generate_the_database_table`` method') return None
def _update_sdss_coverage( self): """ update sdss coverage """ self.log.info('starting the ``_update_sdss_coverage`` method') tableName = self.dbTableName # SELECT THE LOCATIONS NEEDING TO BE CHECKED sqlQuery = u""" select primary_ned_id, primaryID, raDeg, decDeg, sdss_coverage from %(tableName)s where sdss_coverage is null and master_row = 1 and in_ned = 1 order by dist_mpc; """ % locals() rows = dms.execute_mysql_read_query( sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, log=self.log ) totalCount = len(rows) count = 0 for row in rows: count += 1 if count > 1: # Cursor up three lines and clear sys.stdout.write("\x1b[1A\x1b[2K") sys.stdout.write("\x1b[1A\x1b[2K") sys.stdout.write("\x1b[1A\x1b[2K") if count > totalCount: count = totalCount percent = (float(count) / float(totalCount)) * 100. primaryID = row["primaryID"] raDeg = float(row["raDeg"]) decDeg = float(row["decDeg"]) primary_ned_id = row["primary_ned_id"] # SDSS CAN ONLY ACCEPT 60 QUERIES/MIN time.sleep(1.1) print "%(count)s / %(totalCount)s (%(percent)1.1f%%) NED galaxies checked for SDSS coverage" % locals() print "NED NAME: ", primary_ned_id sdss_coverage = dat.check_for_sdss_coverage.check_for_sdss_coverage( log=self.log, raDeg=raDeg, decDeg=decDeg ) if sdss_coverage == 999: sdss_coverage_flag = "null" elif sdss_coverage == True: sdss_coverage_flag = 1 elif sdss_coverage == False: sdss_coverage_flag = 0 else: self.log.error('cound not get sdss coverage' % locals()) sys.exit(0) # UPDATE THE DATABASE FLAG sqlQuery = u""" update %(tableName)s set sdss_coverage = %(sdss_coverage_flag)s where primaryID = %(primaryID)s """ % locals() dms.execute_mysql_write_query( sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, log=self.log ) self.log.info('completed the ``_update_sdss_coverage`` method') return None
def add_HTMIds_to_mysql_tables( raColName, declColName, tableName, dbConn, log, primaryIdColumnName="primaryId"): """*Calculate and append HTMId info to a mysql db table containing ra and dec columns* ****Key Arguments:**** - ``raColName`` -- ra in sexegesimal - ``declColName`` -- dec in sexegesimal - ``tableName`` -- name of table to add htmid info to - ``dbConn`` -- database hosting the above table - ``log`` -- logger - ``primaryIdColumnName`` -- the primary id for the table **Return:** - ``None``""" ## IMPORTS ## import pymysql as ms import dryxPython.mysql as m from dryxPython.kws import utils as u # TEST TABLE EXIST sqlQuery = """show tables""" rows = m.execute_mysql_read_query( sqlQuery=sqlQuery, dbConn=dbConn, log=log ) tableList = [] for row in rows: tableList.extend(row.values()) if tableName not in tableList: message = "The %s table does not exist in the database" % (tableName,) log.critical(message) raise IOError(message) # TEST COLUMNS EXISTS cursor = dbConn.cursor(ms.cursors.DictCursor) sqlQuery = """SELECT * FROM %s LIMIT 1""" % (tableName,) cursor.execute(sqlQuery) rows = cursor.fetchall() desc = cursor.description existingColumns = [] for i in range(len(desc)): existingColumns.append(desc[i][0]) if (raColName not in existingColumns) or (declColName not in existingColumns): message = 'Please make sure you have got the naes of the RA and DEC columns correct' log.critical(message) raise IOError(message) # ACTION(S) ## htmCols = { 'htm16ID': 'BIGINT(20)', 'htm20ID': 'BIGINT(20)', 'cx': 'DOUBLE', 'cy': 'DOUBLE', 'cz': 'DOUBLE', } # CHECK IF COLUMNS EXISTS YET - IF NOT CREATE FROM for key in htmCols.keys(): try: log.debug( 'attempting to check and generate the HTMId columns for the %s db table' % (tableName, )) colExists = \ """SELECT * FROM information_schema.COLUMNS WHERE TABLE_SCHEMA=DATABASE() AND COLUMN_NAME='%s' AND TABLE_NAME='%s'""" \ % (key, tableName) colExists = m.execute_mysql_read_query( colExists, dbConn, log, ) switch = 0 if not colExists: if switch == 0: print "Adding the HTMCircle columns to %(tableName)s" % locals() switch = 1 sqlQuery = 'ALTER TABLE ' + tableName + ' ADD ' + \ key + ' ' + htmCols[key] + ' DEFAULT NULL' m.execute_mysql_write_query( sqlQuery, dbConn, log, ) except Exception as e: log.critical('could not check and generate the HTMId columns for the %s db table - failed with this error: %s ' % (tableName, str(e))) return -1 # COUNT ROWS WHERE HTMIDs ARE NOT SET sqlQuery = """SELECT count(*) as count from %(tableName)s where %(raColName)s is not null and ((htm16ID is NULL or htm16ID = 0))""" % locals( ) rowCount = m.execute_mysql_read_query( sqlQuery, dbConn, log, ) totalCount = rowCount[0]["count"] # ADD HTMIDs IN BATCHES batchSize = 2500 total = totalCount batches = int(total / batchSize) count = 0 # NOW GENERATE THE HTMLIds FOR THESE ROWS for i in range(batches + 1): if total == 0: continue count += batchSize if count > batchSize: # Cursor up one line and clear line sys.stdout.write("\x1b[1A\x1b[2K") if count > totalCount: count = totalCount print "%(count)s / %(totalCount)s htmIds added to %(tableName)s" % locals() # SELECT THE ROWS WHERE THE HTMIds ARE NOT SET sqlQuery = """SELECT %s, %s, %s from %s where %s is not null and ((htm16ID is NULL or htm16ID = 0)) limit %s""" % ( primaryIdColumnName, raColName, declColName, tableName, raColName, batchSize) batch = m.execute_mysql_read_query( sqlQuery, dbConn, log, ) raList = [] decList = [] pIdList = [] raList[:] = [r[raColName] for r in batch] decList[:] = [r[declColName] for r in batch] pIdList[:] = [r[primaryIdColumnName] for r in batch] from HMpTy import htm mesh16 = htm.HTM(16) mesh20 = htm.HTM(20) htm16Ids = mesh16.lookup_id(raList, decList) htm20Ids = mesh20.lookup_id(raList, decList) sqlQuery = "" for h16, h20, pid, r, d in zip(htm16Ids, htm20Ids, pIdList, raList, decList): # CALCULATE CARTESIANS raRad = math.radians(r) decRad = math.radians(d) cos_dec = math.cos(decRad) cx = math.cos(raRad) * cos_dec cy = math.sin(raRad) * cos_dec cz = math.sin(decRad) sqlQuery += \ """UPDATE %s SET htm16ID=%s, htm20ID=%s, cx=%s, cy=%s, cz=%s where %s = '%s';\n""" \ % ( tableName, h16, h20, cx, cy, cz, primaryIdColumnName, pid ) try: if len(sqlQuery): log.debug( 'attempting to update the HTMIds for new objects in the %s db table' % (tableName, )) m.execute_mysql_write_query( sqlQuery, dbConn, log, ) else: log.debug( 'no HTMIds to add to the %s db table' % (tableName, )) except Exception as e: log.critical('could not update the HTMIds for new objects in the %s db table - failed with this error: %s ' % (tableName, str(e))) return -1 # APPLY INDEXES IF NEEDED try: sqlQuery = u""" ALTER TABLE %(tableName)s ADD INDEX `idx_htm20ID` (`htm20ID` ASC); ALTER TABLE %(tableName)s ADD INDEX `idx_htm16ID` (`htm16ID` ASC); """ % locals() m.execute_mysql_write_query( sqlQuery=sqlQuery, dbConn=dbConn, log=log ) except Exception, e: log.info('no index needed on table: %(e)s' % locals())
def convert_dictionary_to_mysql_table( dbConn, log, dictionary, dbTableName, uniqueKeyList=[], createHelperTables=False, dateModified=False, returnInsertOnly=False, replace=False): """ *Convert a python dictionary into a mysql table* NOTE: ADDED TO FUNDAMENTALS **Key Arguments:** - ``log`` -- logger - ``dictionary`` -- python dictionary - ``dbConn`` -- the db connection - ``dbTableName`` -- name of the table you wish to add the data to (or create if it does not exist) - ``uniqueKeyList`` - a lists column names that need combined to create the primary key - ``createHelperTables`` -- create some helper tables with the main table, detailing original keywords etc - ``returnInsertOnly`` -- returns only the insert command (does not execute it) - ``replace`` -- use replace instead of insert statement **Return:** - ``None`` """ # # >IMPORTS ## import pymysql as mdb import re import yaml import time import datetime from dryxPython import commonutils as dcu # import ordereddict as c # REMOVE WHEN PYTHON 2.7 INSTALLED ON PSDB import collections as c import dryxPython.mysql as dms log.debug('starting convert_dictionary_to_mysql_table') if replace: insertVerb = "INSERT" else: insertVerb = "INSERT IGNORE" if returnInsertOnly == False: # TEST THE ARGUMENTS if str(type(dbConn).__name__) != "Connection": message = 'Please use a valid MySQL DB connection.' log.critical(message) raise TypeError(message) if not isinstance(dictionary, dict): message = 'Please make sure "dictionary" argument is a dict type.' log.critical(message) raise TypeError(message) if not isinstance(uniqueKeyList, list): message = 'Please make sure "uniqueKeyList" is a list' log.critical(message) raise TypeError(message) for i in uniqueKeyList: if i not in dictionary.keys(): message = 'Please make sure values in "uniqueKeyList" are present in the "dictionary" you are tring to convert' log.critical(message) raise ValueError(message) for k, v in dictionary.iteritems(): # log.debug('k: %s, v: %s' % (k, v,)) if isinstance(v, list) and len(v) != 2: message = 'Please make sure the list values in "dictionary" 2 items in length' log.critical("%s: in %s we have a %s (%s)" % (message, k, v, type(v))) raise ValueError(message) if isinstance(v, list): if not (isinstance(v[0], str) or isinstance(v[0], int) or isinstance(v[0], bool) or isinstance(v[0], float) or isinstance(v[0], long) or isinstance(v[0], datetime.date) or v[0] == None): message = 'Please make sure values in "dictionary" are of an appropriate value to add to the database, must be str, float, int or bool' log.critical("%s: in %s we have a %s (%s)" % (message, k, v, type(v))) raise ValueError(message) else: if not (isinstance(v, str) or isinstance(v, int) or isinstance(v, bool) or isinstance(v, float) or isinstance(v, long) or isinstance(v, unicode) or isinstance(v, datetime.date) or v == None): this = type(v) message = 'Please make sure values in "dictionary" are of an appropriate value to add to the database, must be str, float, int or bool : %(k)s is a %(this)s' % locals( ) log.critical("%s: in %s we have a %s (%s)" % (message, k, v, type(v))) raise ValueError(message) if not isinstance(createHelperTables, bool): message = 'Please make sure "createHelperTables" is a True or False' log.critical(message) raise TypeError(message) # TEST IF TABLE EXISTS tableExists = does_mysql_table_exist(dbConn, log, dbTableName) # CREATE THE TABLE IF IT DOES NOT EXIST if tableExists is False: sqlQuery = """ CREATE TABLE `%(dbTableName)s` (`primaryId` bigint(20) NOT NULL AUTO_INCREMENT COMMENT 'An internal counter', PRIMARY KEY (`primaryId`)) ENGINE=MyISAM AUTO_INCREMENT=0 DEFAULT CHARSET=latin1; """ % locals() dms.execute_mysql_write_query( sqlQuery=sqlQuery, dbConn=dbConn, log=log ) reFeedParserClass = re.compile('FeedParserDict') reDatetime = re.compile('^[0-9]{4}-[0-9]{2}-[0-9]{2}T') reTypeTime = re.compile('struct_time') qCreateColumn = '' formattedKey = '' formattedKeyList = [] myValues = [] # ADD EXTRA COLUMNS TO THE DICTIONARY dictionary['dateCreated'] = [ str(dcu.get_now_sql_datetime()), "date row was created"] if dateModified: dictionary['dateModified'] = [ str(dcu.get_now_sql_datetime()), "date row was modified"] # ITERATE THROUGH THE DICTIONARY AND GENERATE THE A TABLE COLUMN WITH THE # NAME OF THE KEY, IF IT DOES NOT EXIST count = len(dictionary) i = 1 for (key, value) in dictionary.items(): if (isinstance(value, list) and value[0] is None): del dictionary[key] # SORT THE DICTIONARY BY KEY odictionary = c.OrderedDict(sorted(dictionary.items())) for (key, value) in odictionary.iteritems(): formattedKey = key.replace(" ", "_").replace("-", "_") # DEC A KEYWORD IN MYSQL - NEED TO CHANGE BEFORE INGEST if formattedKey == "dec": formattedKey = "decl" if formattedKey == "DEC": formattedKey = "DECL" formattedKeyList.extend([formattedKey]) if len(key) > 0: # CONVERT LIST AND FEEDPARSER VALUES TO YAML (SO I CAN PASS IT AS A # STRING TO MYSQL) if isinstance(value, list) and (isinstance(value[0], list) or reFeedParserClass.search(str(type(value[0])))): value[0] = yaml.dump(value[0]) value[0] = str(value[0]) # REMOVE CHARACTERS THAT COLLIDE WITH MYSQL # if type(value[0]) == str or type(value[0]) == unicode: # value[0] = value[0].replace('"', """'""") # JOIN THE VALUES TOGETHER IN A LIST - EASIER TO GENERATE THE MYSQL # COMMAND LATER if isinstance(value, str): value = value.replace('\\', '\\\\') value = value.replace('"', '\\"') try: udata = value.decode("utf-8", "ignore") value = udata.encode("ascii", "ignore") except: log.error('cound not decode value %(value)s' % locals()) # log.debug('udata: %(udata)s' % locals()) if isinstance(value, unicode): value = value.replace('"', '\\"') value = value.encode("ascii", "ignore") if isinstance(value, list) and isinstance(value[0], unicode): myValues.extend(['%s' % value[0].strip()]) elif isinstance(value, list): myValues.extend(['%s' % (value[0], )]) else: myValues.extend(['%s' % (value, )]) if returnInsertOnly == False: # CHECK IF COLUMN EXISTS YET colExists = \ "SELECT *\ FROM information_schema.COLUMNS\ WHERE TABLE_SCHEMA=DATABASE()\ AND COLUMN_NAME='" \ + formattedKey + "'\ AND TABLE_NAME='" + dbTableName + """'""" try: # log.debug('checking if the column '+formattedKey+' exists # in the '+dbTableName+' table') rows = execute_mysql_read_query( colExists, dbConn, log, ) except Exception as e: log.error('something went wrong' + str(e) + '\n') # IF COLUMN DOESN'T EXIT - GENERATE IT if len(rows) == 0: qCreateColumn = """ALTER TABLE %s ADD %s""" % ( dbTableName, formattedKey) if not isinstance(value, list): value = [value] if reDatetime.search(str(value[0])): # log.debug('Ok - a datetime string was found') qCreateColumn += ' datetime DEFAULT NULL' elif formattedKey == 'updated_parsed' or formattedKey == 'published_parsed' or formattedKey \ == 'feedName' or formattedKey == 'title': qCreateColumn += ' varchar(100) DEFAULT NULL' elif (isinstance(value[0], str) or isinstance(value[0], unicode)) and len(value[0]) < 30: qCreateColumn += ' varchar(100) DEFAULT NULL' elif (isinstance(value[0], str) or isinstance(value[0], unicode)) and len(value[0]) >= 30 and len(value[0]) < 80: qCreateColumn += ' varchar(100) DEFAULT NULL' elif isinstance(value[0], str) or isinstance(value[0], unicode): columnLength = 450 + len(value[0]) * 2 qCreateColumn += ' varchar(' + str( columnLength) + ') DEFAULT NULL' elif isinstance(value[0], int) and abs(value[0]) <= 9: qCreateColumn += ' tinyint DEFAULT NULL' elif isinstance(value[0], int): qCreateColumn += ' int DEFAULT NULL' elif isinstance(value[0], float) or isinstance(value[0], long): qCreateColumn += ' double DEFAULT NULL' elif isinstance(value[0], bool): qCreateColumn += ' tinyint DEFAULT NULL' elif isinstance(value[0], list): qCreateColumn += ' varchar(1024) DEFAULT NULL' else: # log.debug('Do not know what format to add this key in # MySQL - removing from dictionary: %s, %s' # % (key, type(value[0]))) formattedKeyList.pop() myValues.pop() qCreateColumn = None if qCreateColumn: # ADD COMMENT TO GIVE THE ORGINAL KEYWORD IF formatted FOR # MYSQL if key is not formattedKey: qCreateColumn += " COMMENT 'original keyword: " + \ key + """'""" # CREATE THE COLUMN IF IT DOES NOT EXIST try: log.info('creating the ' + formattedKey + ' column in the ' + dbTableName + ' table') message = execute_mysql_write_query( qCreateColumn, dbConn, log, ) except Exception as e: # log.debug('qCreateColumn: %s' % (qCreateColumn, # )) log.error('could not create the ' + formattedKey + ' column in the ' + dbTableName + ' table -- ' + str(e) + '\n') if returnInsertOnly == False: # GENERATE THE INDEX NAME - THEN CREATE INDEX IF IT DOES NOT YET EXIST if len(uniqueKeyList): for i in range(len(uniqueKeyList)): uniqueKeyList[i] = uniqueKeyList[ i].replace(" ", "_").replace("-", "_") if uniqueKeyList[i] == "dec": uniqueKeyList[i] = "decl" if uniqueKeyList[i] == "DEC": uniqueKeyList[i] = "DECL" indexName = uniqueKeyList[0].replace(" ", "_").replace("-", "_") for i in range(len(uniqueKeyList) - 1): indexName += '_' + uniqueKeyList[i + 1] indexName = dcu.make_lowercase_nospace(indexName) rows = execute_mysql_read_query( """SELECT COUNT(*) FROM INFORMATION_SCHEMA.STATISTICS WHERE TABLE_SCHEMA = DATABASE() AND TABLE_NAME = '""" + dbTableName + """' AND INDEX_NAME = '""" + indexName + """'""", dbConn, log, ) exists = rows[0]['COUNT(*)'] # log.debug('uniqueKeyList: %s' % (uniqueKeyList,)) if exists == 0: if isinstance(uniqueKeyList, list): uniqueKeyList = ','.join(uniqueKeyList) addUniqueKey = 'ALTER TABLE ' + dbTableName + \ ' ADD unique ' + indexName + """ (""" + uniqueKeyList + ')' # log.debug('HERE IS THE COMMAND:'+addUniqueKey) message = execute_mysql_write_query( addUniqueKey, dbConn, log, ) if returnInsertOnly == True: myKeys = ','.join(formattedKeyList) valueString = ("%s, " * len(myValues))[:-2] insertCommand = insertVerb + """ INTO `""" + dbTableName + \ """` (""" + myKeys + """) VALUES (""" + valueString + """)""" mv = [] mv[:] = [None if m == "None" else m for m in myValues] valueTuple = tuple(mv) dup = "" if replace: dup = " ON DUPLICATE KEY UPDATE " for k, v in zip(formattedKeyList, mv): dup = """%(dup)s %(k)s=values(%(k)s),""" % locals() dup = dup[:-1] insertCommand = insertCommand + dup return insertCommand, valueTuple # GENERATE THE INSERT COMMAND - IGNORE DUPLICATE ENTRIES myKeys = ','.join(formattedKeyList) myValues = '" ,"'.join(myValues) # log.debug(myValues+" ------ PRESTRIP") # REMOVE SOME CONVERSION NOISE myValues = myValues.replace('time.struct_time', '') myValues = myValues.replace( '- !!python/object/new:feedparser.FeedParserDict', '') myValues = myValues.replace( '!!python/object/new:feedparser.FeedParserDict', '') myValues = myValues.replace('dictitems:', '') myValues = myValues.replace('dictitems', '') myValues = myValues.replace('!!python/unicode:', '') myValues = myValues.replace('!!python/unicode', '') myValues = myValues.replace('"None"', 'null') myValues = myValues.replace('"None', 'null') if myValues[-4:] != 'null': myValues += '"' dup = "" if replace: dup = " ON DUPLICATE KEY UPDATE " dupValues = ('"' + myValues).split(" ,") dupKeys = myKeys.split(",") for k, v in zip(dupKeys, dupValues): dup = """%(dup)s %(k)s=%(v)s,""" % locals() dup = dup[:-1] # log.debug(myValues+" ------ POSTSTRIP") addValue = insertVerb + """ INTO `""" + dbTableName + \ """` (""" + myKeys + """) VALUES (\"""" + \ myValues + """) %(dup)s """ % locals() # log.debug(addValue) message = "" try: # log.debug('adding new data to the %s table; query: %s' % # (dbTableName, addValue)) message = execute_mysql_write_query( addValue, dbConn, log ) except Exception as e: log.error("could not add new data added to the table '" + dbTableName + "' : " + str(e) + '\n') log.debug('finished convert_dictionary_to_mysql_table') return message
def _update_tcs_helper_catalogue_views_info_with_new_views( self): """ update tcs helper catalogue tables info with new tables **Key Arguments:** # - **Return:** - None **Todo** - @review: when complete, clean _update_tcs_helper_catalogue_views_info_with_new_views method - @review: when complete add logging """ self.log.info( 'starting the ``_update_tcs_helper_catalogue_views_info_with_new_views`` method') sqlQuery = u""" SELECT max(id) as thisId FROM tcs_helper_catalogue_views_info; """ % locals() thisId = dms.execute_mysql_read_query( sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, log=self.log ) try: highestId = thisId[0]["thisId"] + 1 except: highestId = 1 sqlQuery = u""" SELECT TABLE_NAME FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_TYPE='VIEW' AND TABLE_SCHEMA='crossmatch_catalogues' and TABLE_NAME like "tcs_view%%" and TABLE_NAME not like "%%helper%%"; """ % locals() tablesInDatabase = dms.execute_mysql_read_query( sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, log=self.log ) sqlQuery = u""" SELECT view_name FROM tcs_helper_catalogue_views_info; """ % locals() tableList = dms.execute_mysql_read_query( sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, log=self.log ) tbList = [] for tb in tableList: tbList.append(tb["view_name"]) for tb in tablesInDatabase: if tb["TABLE_NAME"] not in tbList: thisViewName = tb["TABLE_NAME"] print "`%(thisViewName)s` added to `tcs_helper_catalogue_views_info` database table" % locals() sqlQuery = u""" INSERT INTO tcs_helper_catalogue_views_info ( id, view_name ) VALUES ( %(highestId)s, "%(thisViewName)s" )""" % locals() dms.execute_mysql_write_query( sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, log=self.log ) highestId += 1 self.log.info( 'completed the ``_update_tcs_helper_catalogue_views_info_with_new_views`` method') return None
def _clean_up_columns( self): """clean up columns **Key Arguments:** # - **Return:** - None **Todo** - @review: when complete, clean _clean_up_columns method - @review: when complete add logging """ self.log.info('starting the ``_clean_up_columns`` method') sqlQueries = [ "update tcs_helper_catalogue_tables_info set old_table_name = table_name where old_table_name is null;", "update tcs_helper_catalogue_tables_info set version_number = 'stream' where table_name like '%%stream' and version_number is null;", """update tcs_helper_catalogue_tables_info set in_ned = 0 where table_name like '%%stream' and in_ned is null;""", """update tcs_helper_catalogue_tables_info set vizier_link = 0 where table_name like '%%stream' and vizier_link is null;""", "update tcs_helper_catalogue_views_info set old_view_name = view_name where old_view_name is null;", ] for sqlQuery in sqlQueries: dms.execute_mysql_write_query( sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, log=self.log ) # VIEW OBJECT TYPES sqlQuery = u""" SELECT view_name FROM crossmatch_catalogues.tcs_helper_catalogue_views_info where legacy_view = 0 and object_type is null; """ % locals() rows = dms.execute_mysql_read_query( sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, log=self.log ) for row in rows: view_name = row["view_name"] object_type = view_name.replace("tcs_view_", "").split("_")[0] sqlQuery = u""" update tcs_helper_catalogue_views_info set object_type = "%(object_type)s" where view_name = "%(view_name)s" """ % locals() dms.execute_mysql_write_query( sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, log=self.log ) # MASTER TABLE ID FOR VIEWS sqlQuery = u""" SELECT view_name FROM crossmatch_catalogues.tcs_helper_catalogue_views_info where legacy_view = 0 and table_id is null; """ % locals() rows = dms.execute_mysql_read_query( sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, log=self.log ) for row in rows: view_name = row["view_name"] table_name = view_name.replace("tcs_view_", "").split("_")[1:] table_name = ("_").join(table_name) table_name = "tcs_cat_%(table_name)s" % locals() print table_name sqlQuery = u""" update tcs_helper_catalogue_views_info set table_id = (select id from tcs_helper_catalogue_tables_info where table_name = "%(table_name)s") where view_name = "%(view_name)s" """ % locals() dms.execute_mysql_write_query( sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, log=self.log ) self.log.info('completed the ``_clean_up_columns`` method') return None
def _update_transient_database( self): """ update transient database **Key Arguments:** # - **Return:** - None **Todo** - @review: when complete, clean _update_transient_database method - @review: when complete add logging """ self.log.debug('starting the ``_update_transient_database`` method') from datetime import datetime, date, time now = datetime.now() now = now.strftime("%Y-%m-%d %H:%M:%S") transientTable = self.settings["database settings"][ "transients"]["transient table"] transientTableClassCol = self.settings["database settings"][ "transients"]["transient classification column"] transientTableIdCol = self.settings["database settings"][ "transients"]["transient primary id column"] for c in self.classifications: objectType = c["object_classification_new"] transientObjectId = c["id"] # DELETE PREVIOUS CROSSMATCHES sqlQuery = u""" delete from tcs_cross_matches where transient_object_id = %(transientObjectId)s """ % locals() dms.execute_mysql_write_query( sqlQuery=sqlQuery, dbConn=self.transientsDbConn, log=self.log ) # INSERT NEW CROSSMATCHES for crossmatch in c["crossmatches"]: for k, v in crossmatch.iteritems(): if v == None: crossmatch[k] = "null" if "physical_separation_kpc" not in crossmatch.keys(): crossmatch["physical_separation_kpc"] = "null" if crossmatch["sourceSubType"] and "null" not in str(crossmatch["sourceSubType"]): crossmatch["sourceSubType"] = '"%s"' % (crossmatch[ "sourceSubType"],) else: crossmatch["sourceSubType"] = "null" sqlQuery = u""" insert into tcs_cross_matches ( transient_object_id, catalogue_object_id, catalogue_table_id, catalogue_object_ra, catalogue_object_dec, original_search_radius_arcsec, separation, z, scale, distance, distance_modulus, date_added, association_type, physical_separation_kpc, catalogue_object_type, catalogue_object_subtype, catalogue_table_name, catalogue_view_name, search_name, major_axis_arcsec, direct_distance, direct_distance_scale, direct_distance_modulus ) values ( %s, "%s", %s, %s, %s, %s, %s, %s, %s, %s, %s, "%s", "%s", %s, "%s", %s, "%s", "%s", "%s", %s, %s, %s, %s) """ % (crossmatch["transientObjectId"], crossmatch["catalogueObjectId"], crossmatch["catalogueTableId"], crossmatch["sourceRa"], crossmatch["sourceDec"], crossmatch["originalSearchRadius"], crossmatch["separation"], crossmatch["z"], crossmatch["scale"], crossmatch["distance"], crossmatch["distanceModulus"], now, crossmatch["association_type"], crossmatch["physical_separation_kpc"], crossmatch["sourceType"], crossmatch["sourceSubType"], crossmatch["catalogueTableName"], crossmatch["catalogueViewName"], crossmatch["searchName"], crossmatch["xmmajoraxis"], crossmatch["xmdirectdistance"], crossmatch["xmdirectdistancescale"], crossmatch["xmdirectdistanceModulus"]) dms.execute_mysql_write_query( sqlQuery=sqlQuery, dbConn=self.transientsDbConn, log=self.log ) for ob in self.transientsMetadataList: transId = ob["id"] name = ob["name"] sqlQuery = u""" select id, separation, catalogue_view_name, association_type, physical_separation_kpc, major_axis_arcsec from tcs_cross_matches where transient_object_id = %(transId)s order by separation """ % locals() rows = dms.execute_mysql_read_query( sqlQuery=sqlQuery, dbConn=self.transientsDbConn, log=self.log ) rankScores = [] for row in rows: if row["separation"] < 2. or (row["physical_separation_kpc"] != "null" and row["physical_separation_kpc"] < 20. and row["association_type"] == "SN") or (row["major_axis_arcsec"] != "null" and row["association_type"] == "SN"): # print row["separation"] # print row["physical_separation_kpc"] # print row["major_axis_arcsec"] rankScore = 2. - \ self.colMaps[row["catalogue_view_name"]][ "object_type_accuracy"] * 0.1 # print rankScore else: # print row["separation"] # print row["physical_separation_kpc"] # print row["major_axis_arcsec"] rankScore = row["separation"] + 1. - \ self.colMaps[row["catalogue_view_name"]][ "object_type_accuracy"] * 0.1 rankScores.append(rankScore) rank = 0 for rs, row in sorted(zip(rankScores, rows)): rank += 1 primaryId = row["id"] sqlQuery = u""" update tcs_cross_matches set rank = %(rank)s where id = %(primaryId)s """ % locals() rows = dms.execute_mysql_read_query( sqlQuery=sqlQuery, dbConn=self.transientsDbConn, log=self.log ) sqlQuery = u""" select distinct association_type from (select association_type from tcs_cross_matches where transient_object_id = %(transId)s order by rank) as alias; """ % locals() rows = dms.execute_mysql_read_query( sqlQuery=sqlQuery, dbConn=self.transientsDbConn, log=self.log ) classification = "" for row in rows: classification += row["association_type"] + "/" classification = classification[:-1] if len(classification) == 0: classification = "ORPHAN" sqlQuery = u""" update %(transientTable)s set %(transientTableClassCol)s = "%(classification)s" where %(transientTableIdCol)s = %(transId)s """ % locals() print """%(name)s: %(classification)s """ % locals() dms.execute_mysql_write_query( sqlQuery=sqlQuery, dbConn=self.transientsDbConn, log=self.log ) self.log.debug('completed the ``_update_transient_database`` method') return None
def create_master_id( self): """create master id **Key Arguments:** # - **Return:** - None **Todo** - @review: when complete, clean create_master_id method - @review: when complete add logging """ self.log.info('starting the ``create_master_id`` method') batchSize = 1250 t = self.dbTableName if "photo" in t: totalRows = 500000000 else: sqlQuery = u""" select distinct objId from %(t)s where qubMasterFlag = 2 """ % locals() count = dms.execute_mysql_read_query( sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, log=self.log ) totalRows = len(count) count = "" total = totalRows batches = int(total / batchSize) start = 0 end = 0 theseBatches = [] for i in range(batches): end = end + batchSize if end > total: end = total start = i * batchSize if start > 1: # Cursor up one line and clear line sys.stdout.write("\x1b[1A\x1b[2K") percent = (float(end) / float(totalRows)) * 100. print "%(end)s / %(totalRows)s (%(percent)1.1f%%) masterFlags updated in %(t)s" % locals() sqlQuery = u""" select distinct objid from %(t)s where qubMasterFlag = 2 limit 0, %(batchSize)s """ % locals() rows = dms.execute_mysql_read_query( sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, log=self.log ) sqlQuery = "" for row in rows: objid = row["objid"] sqlQuery = sqlQuery + \ u"""\nupdate %(t)s set qubMasterFlag = 1 where qubPrimaryId = (select * from (SELECT qubPrimaryId FROM %(t)s where objId = %(objid)s order by clean desc limit 1) as alias); update %(t)s set qubMasterFlag = 0 where objId = %(objid)s and qubMasterFlag != 1;""" % locals() dms.execute_mysql_write_query( sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, log=self.log, Force=False ) self.log.info('completed the ``create_master_id`` method') return None
def _query_ned_and_add_results_to_database( self, batchCount): """ query ned and add results to database **Key Arguments:** - ``batchCount`` - the index number of the batch sent to NED """ self.log.info( 'starting the ``_query_ned_and_add_results_to_database`` method') tableName = self.dbTableName # QUERY NED WITH BATCH totalCount = len(self.theseIds) print "requesting metadata from NED for %(totalCount)s galaxies (batch %(batchCount)s)" % locals() search = namesearch( log=self.log, names=self.theseIds, quiet=True ) results = search.get() print "results returned from ned -- starting to add to database" % locals() # CLEAN THE RETURNED DATA AND UPDATE DATABASE totalCount = len(results) count = 0 sqlQuery = "" for thisDict in results: thisDict["tableName"] = tableName count += 1 for k, v in thisDict.iteritems(): if not v or len(v) == 0: thisDict[k] = "null" if k in ["major_diameter_arcmin", "minor_diameter_arcmin"] and (":" in v or "?" in v or "<" in v): thisDict[k] = v.replace(":", "").replace( "?", "").replace("<", "") if isinstance(v, str) and '"' in v: thisDict[k] = v.replace('"', '\\"') if "Input name not" not in thisDict["input_note"] and "Same object as" not in thisDict["input_note"]: try: thisDict[ "raDeg"] = dat.ra_sexegesimal_to_decimal.ra_sexegesimal_to_decimal(thisDict["ra"]) thisDict[ "decDeg"] = dat.declination_sexegesimal_to_decimal.declination_sexegesimal_to_decimal(thisDict["dec"]) except: thisName = thisDict["input_name"] self.log.warning( "Could not convert RA or DEC for NED object %(thisName)s" % locals()) continue sqlQuery += u""" update %(tableName)s set redshift_quality = "%(redshift_quality)s", redshift = %(redshift)s, hierarchy = "%(hierarchy)s", object_type = "%(object_type)s", major_diameter_arcmin = %(major_diameter_arcmin)s, morphology = "%(morphology)s", magnitude_filter = "%(magnitude_filter)s", ned_notes = "%(ned_notes)s", eb_v = %(eb-v)s, raDeg = %(raDeg)s, radio_morphology = "%(radio_morphology)s", activity_type = "%(activity_type)s", minor_diameter_arcmin = %(minor_diameter_arcmin)s, decDeg = %(decDeg)s, redshift_err = %(redshift_err)s where ned_name = "%(input_name)s";\n """ % thisDict sqlQuery = sqlQuery.replace('"null"', 'null') if len(sqlQuery) != 0: rows = dms.execute_mysql_read_query( sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, log=self.log ) else: for thisId in self.theseIds: sqlQuery = u""" update %(tableName)s set download_error = 1 where ned_name = "%(thisId)s" """ % locals() dms.execute_mysql_write_query( sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, log=self.log ) print "%(count)s/%(totalCount)s galaxy metadata batch entries added to database" % locals() if count < totalCount: # Cursor up one line and clear line sys.stdout.write("\x1b[1A\x1b[2K") sqlQuery = u""" update tcs_helper_catalogue_tables_info set last_updated = now() where table_name = "%(tableName)s" """ % locals() dms.execute_mysql_write_query( sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, log=self.log ) self.log.info( 'completed the ``_query_ned_and_add_results_to_database`` method') return None
def main(arguments=None): """ *The main function used when ``import-pdfs-into-papers.py`` is run as a single script from the cl, or when installed as a cl command* """ # setup the command-line util settings su = tools( arguments=arguments, docString=__doc__, logLevel="DEBUG", options_first=False, projectName="muppet" ) arguments, settings, log, dbConn = su.setup() # UNPACK REMAINING CL ARGUMENTS USING `EXEC` TO SETUP THE VARIABLE NAMES # AUTOMATICALLY for arg, val in arguments.iteritems(): if arg[0] == "-": varname = arg.replace("-", "") + "Flag" else: varname = arg.replace("<", "").replace(">", "") if isinstance(val, str) or isinstance(val, unicode): exec(varname + " = '%s'" % (val,)) else: exec(varname + " = %s" % (val,)) if arg == "--dbConn": dbConn = val log.debug('%s = %s' % (varname, val,)) # CREATE APPLESCRIPT RETURN STRING WITH THE DETAILS I WANT asProperties = { "abstract": "abstract", "title": "title", "tags": "keyword names", "papersId": "id", "papersUrl": "item url", "author": "author names", "subtype": "subtitle", "rating": "my rating", "url": "publication url", "path": "full path of primary file item", "reference": "formatted reference", "originalPath": "original path of primary file item" } returnString = "return " for i, v in enumerate(asProperties.values()): if i + 1 == len(asProperties): returnString += """ %(v)s of thisItem""" % locals() else: returnString += """ %(v)s of thisItem & "||" & """ % locals() elif action == "archive": if len(inputMetadata["tags"]) == 0: print "%(filePath)s needs tags added" % locals() continue outputMetadata = import_media_and_return_metadata( log=log, mediaPath=pdfPath, returnString=returnString, asProperties=asProperties, ) # CHECK FILE WAS INDEED IMPORTED if outputMetadata["originalPath"] != pdfPath: originalPath = outputMetadata["originalPath"] log.error( 'this is not the original file: "%(originalPath)s" ' % locals()) sys.exit(0) else: updates = "" sqlUpdates = "" sqlTags = "" thisId = outputMetadata["papersId"] for k, v in inputMetadata.iteritems(): if k == "tags": tags = v.split(",") for tag in tags: tag = tag.strip().lower().replace(" ", "-") updates += """ set this to (make new keyword item with properties {name:"%(tag)s"}) add keywords {this} to thisItem """ % locals() sqlTags += """insert ignore into `media-tags` (mediaTable,mediaId,tag) values("reading-list", %(databaseid)s , "%(tag)s");\n""" % locals( ) else: if k in asProperties: prop = asProperties[k] updates += """set %(prop)s of thisItem to "%(v)s"\n""" % locals() if len(sqlUpdates): sqlUpdates += """, %(k)s = "%(v)s" """ % locals( ) else: sqlUpdates += """update `reading-list` set %(k)s = "%(v)s" """ % locals( ) if len(thisData): import pyperclip pyperclip.copy(thisData) applescript = u""" tell application "Papers" repeat with i from (count of publication items) to 1 by -1 if id of item i of publication items is equal to "%(thisId)s" then set thisItem to item i of publication items exit repeat end if end repeat %(updates)s set m to selected mode of front library window set selected mode of front library window to Library Mode tell front library window to set selected publications to (thisItem) set i to selected inspector tab of front library window set selected inspector tab of front library window to Notes Tab set papersId to id of thisItem end tell tell application "System Events" activate set the_results to (display dialog "Paste notes to 'General Note' section on the right" buttons {"cancel", "skip", "notes added"} default button "skip") set BUTTON_Returned to button returned of the_results if ((BUTTON_Returned) is equal to "notes added") then set BUTTON_Returned to papersId end if return BUTTON_Returned end tell """ % locals() cmd = "\n".join( ["osascript << EOT", applescript, "EOT"]) p = Popen(cmd, stdout=PIPE, stdin=PIPE, shell=True) output = p.communicate()[0].strip() if output.lower() == "cancel": print "user cancelled the import script" sys.exit(0) elif output.lower() == "skip": continue elif "error" in output.lower(): print output sys.exit(0) elif len(output) == 0: print "Error in applescript:\n" print applescript sys.exit(0) papersId = output if databaseid and databaseid.lower() != "false": sqlUpdates += """, papersId = "%(papersId)s", workflowStage = "archived", notes = "%(thisData)s" where primaryId = %(databaseid)s """ % locals() dms.execute_mysql_write_query( sqlQuery=sqlUpdates, dbConn=dbConn, log=log ) dms.execute_mysql_write_query( sqlQuery=sqlTags, dbConn=dbConn, log=log ) # Recursively create missing directories if not os.path.exists(pdfFolderPath + "/trash"): os.makedirs(pdfFolderPath + "/trash") try: source = filePath destination = ("/").join(filePath.split("/") [0:-1]) + "/trash/" + filePath.split("/")[-1] log.debug("attempting to rename file %s to %s" % (source, destination)) os.rename(source, destination) except Exception, e: log.error("could not rename file %s to %s - failed with this error: %s " % (source, destination, str(e),)) sys.exit(0) try: source = pdfPath destination = ("/").join(pdfPath.split("/") [0:-1]) + "/trash/" + pdfPath.split("/")[-1] log.debug("attempting to rename file %s to %s" % (source, destination)) os.rename(source, destination) except Exception, e: log.error("could not rename file %s to %s - failed with this error: %s " % (source, destination, str(e),)) sys.exit(0)
def _updated_row_counts_in_tcs_helper_catalogue_tables_info( self): """ updated row counts in tcs catalogue tables **Key Arguments:** # - **Return:** - None **Todo** - @review: when complete, clean _updated_row_counts_in_tcs_helper_catalogue_tables_info method - @review: when complete add logging """ self.log.info( 'starting the ``_updated_row_counts_in_tcs_helper_catalogue_tables_info`` method') sqlQuery = u""" select * from tcs_helper_catalogue_tables_info where table_name like "%%stream" or number_of_rows is null and legacy_table = 0 """ % locals() rows = dms.execute_mysql_read_query( sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, log=self.log ) for row in rows: tbName = row["table_name"] sqlQuery = u""" update tcs_helper_catalogue_tables_info set number_of_rows = (select count(*) as count from %(tbName)s) where table_name = "%(tbName)s" """ % locals() count = dms.execute_mysql_write_query( sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, log=self.log, quiet=True ) print "Row counts updated in `tcs_helper_catalogue_tables_info` database table" sqlQuery = u""" select * from tcs_helper_catalogue_views_info where view_name like "%%stream" or number_of_rows is null and legacy_view = 0 """ % locals() rows = dms.execute_mysql_read_query( sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, log=self.log ) for row in rows: tbName = row["view_name"] sqlQuery = u""" update tcs_helper_catalogue_views_info set number_of_rows = (select count(*) as count from %(tbName)s) where view_name = "%(tbName)s" """ % locals() count = dms.execute_mysql_write_query( sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, log=self.log, quiet=True ) self.log.info( 'completed the ``_updated_row_counts_in_tcs_helper_catalogue_tables_info`` method') return None